def save_source_info(csm, h5): data = {} # src_id -> row wkts = [] lens = [] for sg in csm.src_groups: for src in sg: lens.append(len(src.trt_smrs)) row = [ src.source_id, src.grp_id, src.code, 0, 0, 0, csm.full_lt.trti[src.tectonic_region_type], 0 ] wkts.append(src._wkt) data[src.id] = row logging.info('There are %d groups and %d sources with len(trt_smrs)=%.2f', len(csm.src_groups), sum(len(sg) for sg in csm.src_groups), numpy.mean(lens)) csm.source_info = data # src_id -> row if h5: attrs = dict(atomic=any(grp.atomic for grp in csm.src_groups)) # avoid hdf5 damned bug by creating source_info in advance hdf5.create(h5, 'source_info', source_info_dt, attrs=attrs) h5['source_wkt'] = numpy.array(wkts, hdf5.vstr) h5['trt_smrs'] = csm.get_trt_smrs() h5['toms'] = numpy.array([get_tom_name(sg) for sg in csm.src_groups], hdf5.vstr)
def __init__(self, task_func, task_args=(), monitor=None, distribute=None, progress=logging.info): self.__class__.init(distribute=distribute or OQ_DISTRIBUTE) self.task_func = task_func self.monitor = monitor or Monitor(task_func.__name__) self.calc_id = getattr(self.monitor, 'calc_id', None) self.name = self.monitor.operation or task_func.__name__ self.task_args = task_args self.distribute = distribute or oq_distribute(task_func) self.progress = progress try: self.num_tasks = len(self.task_args) except TypeError: # generators have no len self.num_tasks = None # a task can be a function, a class or an instance with a __call__ if inspect.isfunction(task_func): self.argnames = inspect.getfullargspec(task_func).args elif inspect.isclass(task_func): self.argnames = inspect.getfullargspec(task_func.__init__).args[1:] else: # instance with a __call__ method self.argnames = inspect.getfullargspec(task_func.__call__).args[1:] self.receiver = 'tcp://%s:%s' % ( config.dbserver.listen, config.dbserver.receiver_ports) self.sent = numpy.zeros(len(self.argnames) - 1) self.monitor.backurl = None # overridden later self.tasks = [] # populated by .submit h5 = self.monitor.hdf5 task_info = 'task_info/' + self.name if h5 and task_info not in h5: # first time # task_info and performance_data should be generated in advance hdf5.create(h5, task_info, task_info_dt) if h5 and 'performance_data' not in h5: hdf5.create(h5, 'performance_data', perf_dt)
def _genargs(self, backurl=None, pickle=True): """ Add .task_no and .weight to the monitor and yield back the arguments by pickling them. """ task_info = 'task_info/' + self.name for task_no, args in enumerate(self.task_args, 1): mon = args[-1] assert isinstance(mon, Monitor), mon if mon.hdf5 and task_no == 1: self.hdf5 = mon.hdf5 if task_info not in self.hdf5: # first time # task_info performance_data should be generated in advance hdf5.create(mon.hdf5, task_info, task_data_dt) if 'performance_data' not in self.hdf5: hdf5.create(mon.hdf5, 'performance_data', perf_dt) # add incremental task number and task weight mon.task_no = task_no mon.weight = getattr(args[0], 'weight', 1.) mon.backurl = backurl self.calc_id = getattr(mon, 'calc_id', None) if pickle: args = pickle_sequence(args) self.sent += numpy.array([len(p) for p in args]) yield args
def get_composite_source_model(oqparam, full_lt=None, h5=None): """ Parse the XML and build a complete composite source model in memory. :param oqparam: an :class:`openquake.commonlib.oqvalidation.OqParam` instance :param full_lt: a :class:`openquake.commonlib.logictree.FullLogicTree` or None :param h5: an open hdf5.File where to store the source info """ if full_lt is None: full_lt = get_full_lt(oqparam) csm = get_csm(oqparam, full_lt, h5) grp_ids = csm.get_grp_ids() gidx = {tuple(arr): i for i, arr in enumerate(grp_ids)} if oqparam.is_event_based(): csm.init_serials(oqparam.ses_seed) data = {} # src_id -> row mags = AccumDict(accum=set()) # trt -> mags wkts = [] ns = 0 for sg in csm.src_groups: if hasattr(sg, 'mags'): # UCERF mags[sg.trt].update('%.2f' % mag for mag in sg.mags) for src in sg: ns += 1 if src.source_id in data: num_sources = data[src.source_id][3] + 1 else: num_sources = 1 row = [ src.source_id, gidx[tuple(src.grp_ids)], src.code, num_sources, 0, 0, 0, src.checksum, src.serial ] wkts.append(src._wkt) # this is a bit slow but okay data[src.source_id] = row if hasattr(src, 'mags'): # UCERF continue # already accounted for in sg.mags elif hasattr(src, 'data'): # nonparametric srcmags = ['%.2f' % item[0].mag for item in src.data] else: srcmags = [ '%.2f' % item[0] for item in src.get_annual_occurrence_rates() ] mags[sg.trt].update(srcmags) logging.info('There are %d sources with %d unique IDs', ns, len(data)) if h5: hdf5.create(h5, 'source_info', source_info_dt) # avoid hdf5 damned bug h5['source_wkt'] = numpy.array(wkts, hdf5.vstr) for trt in mags: h5['source_mags/' + trt] = numpy.array(sorted(mags[trt])) h5['grp_ids'] = grp_ids csm.gsim_lt.check_imts(oqparam.imtls) csm.source_info = data if os.environ.get('OQ_CHECK_INPUT'): source.check_complex_faults(csm.get_sources()) return csm
def get_composite_source_model(oqparam, h5=None): """ Parse the XML and build a complete composite source model in memory. :param oqparam: an :class:`openquake.commonlib.oqvalidation.OqParam` instance :param h5: an open hdf5.File where to store the source info """ full_lt = get_full_lt(oqparam) if oqparam.csm_cache and not oqparam.is_ucerf(): csm = _get_csm_cached(oqparam, full_lt, h5) else: csm = get_csm(oqparam, full_lt, h5) grp_ids = csm.get_grp_ids() gidx = {tuple(arr): i for i, arr in enumerate(grp_ids)} if oqparam.is_event_based(): csm.init_serials(oqparam.ses_seed) data = {} # src_id -> row mags = AccumDict(accum=set()) # trt -> mags wkts = [] ns = -1 for sg in csm.src_groups: if hasattr(sg, 'mags'): # UCERF mags[sg.trt].update('%.2f' % mag for mag in sg.mags) for src in sg: if src.source_id in data: multiplicity = data[src.source_id][MULTIPLICITY] + 1 else: multiplicity = 1 ns += 1 src.gidx = gidx[tuple(src.grp_ids)] row = [src.source_id, src.gidx, src.code, multiplicity, 0, 0, 0, src.checksum, src.serial or ns, full_lt.trti[src.tectonic_region_type]] wkts.append(src._wkt) # this is a bit slow but okay data[src.source_id] = row if hasattr(src, 'mags'): # UCERF continue # already accounted for in sg.mags elif hasattr(src, 'data'): # nonparametric srcmags = ['%.2f' % item[0].mag for item in src.data] else: srcmags = ['%.2f' % item[0] for item in src.get_annual_occurrence_rates()] mags[sg.trt].update(srcmags) logging.info('There are %d sources', ns + 1) if h5: attrs = dict(atomic=any(grp.atomic for grp in csm.src_groups)) # avoid hdf5 damned bug by creating source_info in advance hdf5.create(h5, 'source_info', source_info_dt, attrs=attrs) h5['source_wkt'] = numpy.array(wkts, hdf5.vstr) for trt in mags: h5['source_mags/' + trt] = numpy.array(sorted(mags[trt])) h5['grp_ids'] = grp_ids csm.gsim_lt.check_imts(oqparam.imtls) csm.source_info = data # src_id -> row if os.environ.get('OQ_CHECK_INPUT'): source.check_complex_faults(csm.get_sources()) return csm
def flush(self): """ Save the measurements on the performance file (or on stdout) """ for child in self.children: child.flush() data = self.get_data() if len(data) == 0: # no information return [] # reset monitor self.duration = 0 self.mem = 0 self.counts = 0 if self.hdf5path: h5 = h5py.File(self.hdf5path) try: pdata = h5['performance_data'] except KeyError: pdata = hdf5.create(h5, 'performance_data', perf_dt) hdf5.extend(pdata, data) h5.close() # else print(data[0]) on stdout return data
def get_composite_source_model(oqparam, h5=None): """ Parse the XML and build a complete composite source model in memory. :param oqparam: an :class:`openquake.commonlib.oqvalidation.OqParam` instance :param h5: an open hdf5.File where to store the source info """ ucerf = oqparam.calculation_mode.startswith('ucerf') source_model_lt = get_source_model_lt(oqparam, validate=not ucerf) trts = source_model_lt.tectonic_region_types trts_lower = {trt.lower() for trt in trts} reqv = oqparam.inputs.get('reqv', {}) for trt in reqv: if trt.lower() not in trts_lower: raise ValueError('Unknown TRT=%s in %s [reqv]' % (trt, oqparam.inputs['job_ini'])) gsim_lt = get_gsim_lt(oqparam, trts or ['*']) p = source_model_lt.num_paths * gsim_lt.get_num_paths() if oqparam.number_of_logic_tree_samples: logging.info('Considering {:_d} logic tree paths out of {:_d}'.format( oqparam.number_of_logic_tree_samples, p)) else: # full enumeration if (oqparam.is_event_based() and (oqparam.ground_motion_fields or oqparam.hazard_curves_from_gmfs) and p > oqparam.max_potential_paths): raise ValueError( 'There are too many potential logic tree paths (%d):' 'use sampling instead of full enumeration or reduce the ' 'source model with oq reduce_sm' % p) logging.info('Potential number of logic tree paths = {:_d}'.format(p)) if source_model_lt.on_each_source: logging.info('There is a logic tree on each source') ltmodels = get_ltmodels(oqparam, gsim_lt, source_model_lt, h5) csm = source.CompositeSourceModel(gsim_lt, source_model_lt, ltmodels) key = operator.attrgetter('source_id', 'checksum') srcidx = 0 if h5: info = hdf5.create(h5, 'source_info', source_info_dt) data = [] for k, srcs in groupby(csm.get_sources(), key).items(): for src in srcs: src.id = srcidx data.append((0, src.src_group_ids[0], src.source_id, src.code, src.num_ruptures, 0, 0, 0, src.checksum, src._wkt)) srcidx += 1 if h5: hdf5.extend(info, numpy.array(data, source_info_dt)) if oqparam.is_event_based(): # initialize the rupture rup_id numbers before splitting/filtering; in # this way the serials are independent from the site collection csm.init_serials(oqparam.ses_seed) if oqparam.disagg_by_src: csm = csm.grp_by_src() # one group per source csm.info.gsim_lt.check_imts(oqparam.imtls) return csm
def create_dframe(self, key, nametypes, compression=None, **kw): """ Create a HDF5 datagroup readable as a pandas DataFrame :param key: name of the dataset :param nametypes: list of pairs (name, dtype) or (name, array) :param compression: the kind of HDF5 compression to use :param kw: attributes to add """ names = [] for name, value in nametypes: is_array = isinstance(value, numpy.ndarray) if is_array: dt = value.dtype else: dt = value dset = hdf5.create(self.hdf5, f'{key}/{name}', dt, (None, ), compression) if is_array: hdf5.extend(dset, value) names.append(name) attrs = self.hdf5[key].attrs attrs['__pdcolumns__'] = ' '.join(names) for k, v in kw.items(): attrs[k] = v
def get_composite_source_model(oqparam, h5=None): """ Parse the XML and build a complete composite source model in memory. :param oqparam: an :class:`openquake.commonlib.oqvalidation.OqParam` instance :param h5: an open hdf5.File where to store the source info """ logging.info('Reading the CompositeSourceModel') full_lt = get_full_lt(oqparam) if oqparam.cachedir and not oqparam.is_ucerf(): csm = _get_cachedir(oqparam, full_lt, h5) else: csm = get_csm(oqparam, full_lt, h5) et_ids = csm.get_et_ids() logging.info('%d effective smlt realization(s)', len(full_lt.sm_rlzs)) data = {} # src_id -> row mags_by_trt = csm.get_mags_by_trt() wkts = [] lens = [] for sg in csm.src_groups: for src in sg: lens.append(len(src.et_ids)) row = [ src.source_id, src.grp_id, src.code, 0, 0, 0, full_lt.trti[src.tectonic_region_type], 0 ] wkts.append(src._wkt) data[src.id] = row logging.info('There are %d groups and %d sources with len(et_ids)=%.2f', len(csm.src_groups), sum(len(sg) for sg in csm.src_groups), numpy.mean(lens)) if h5: attrs = dict(atomic=any(grp.atomic for grp in csm.src_groups)) # avoid hdf5 damned bug by creating source_info in advance hdf5.create(h5, 'source_info', source_info_dt, attrs=attrs) h5['source_wkt'] = numpy.array(wkts, hdf5.vstr) h5['et_ids'] = et_ids for trt in mags_by_trt: h5['source_mags/' + trt] = numpy.array(mags_by_trt[trt]) oqparam.maximum_distance.interp(mags_by_trt) csm.gsim_lt.check_imts(oqparam.imtls) csm.source_info = data # src_id -> row if os.environ.get('OQ_CHECK_INPUT'): source.check_complex_faults(csm.get_sources()) return csm
def get_ltmodels(oq, gsim_lt, source_model_lt, h5=None): """ Build source models from the logic tree and to store them inside the `source_info` dataset. """ if oq.pointsource_distance['default'] == {}: spinning_off = False else: spinning_off = sum(oq.pointsource_distance.values()) == 0 if spinning_off: logging.info('Removing nodal plane and hypocenter distributions') # NB: the source models file are often NOT in the shared directory # (for instance in oq-engine/demos) so the processpool must be used dist = ('no' if os.environ.get('OQ_DISTRIBUTE') == 'no' else 'processpool') smlt_dir = os.path.dirname(source_model_lt.filename) converter = sourceconverter.SourceConverter( oq.investigation_time, oq.rupture_mesh_spacing, oq.complex_fault_mesh_spacing, oq.width_of_mfd_bin, oq.area_source_discretization, oq.minimum_magnitude, not spinning_off, oq.source_id) if h5: sources = hdf5.create(h5, 'source_info', source_info_dt) lt_models = list(source_model_lt.gen_source_models(gsim_lt)) if oq.calculation_mode.startswith('ucerf'): idx = 0 [grp] = nrml.to_python(oq.inputs["source_model"], converter) for grp_id, ltm in enumerate(lt_models): sg = copy.copy(grp) sg.id = grp_id ltm.src_groups = [sg] src = sg[0].new(ltm.ordinal, ltm.names) # one source src.src_group_id = grp_id src.id = idx idx += 1 if oq.number_of_logic_tree_samples: src.samples = ltm.samples sg.sources = [src] data = [((grp_id, grp_id, src.source_id, src.code, 0, 0, -1, src.num_ruptures, 0, ''))] hdf5.extend(sources, numpy.array(data, source_info_dt)) return lt_models logging.info('Reading the source model(s) in parallel') allargs = [] fileno = 0 for ltm in lt_models: apply_unc = functools.partial( source_model_lt.apply_uncertainties, ltm.path) for name in ltm.names.split(): fname = os.path.abspath(os.path.join(smlt_dir, name)) allargs.append((ltm, apply_unc, fname, fileno)) fileno += 1 smap = parallel.Starmap( SourceReader(converter, smlt_dir, h5), allargs, distribute=dist, h5=h5 if h5 else None) # NB: h5 is None in logictree_test.py return _store_results(smap, lt_models, source_model_lt, gsim_lt, oq, h5)
def init_performance(hdf5file, swmr=False): """ :param hdf5file: file name of hdf5.File instance """ fname = isinstance(hdf5file, str) h5 = hdf5.File(hdf5file, 'a') if fname else hdf5file if 'performance_data' not in h5: hdf5.create(h5, 'performance_data', perf_dt) if 'task_info' not in h5: hdf5.create(h5, 'task_info', task_info_dt) if 'task_sent' not in h5: h5['task_sent'] = '{}' if swmr: try: h5.swmr_mode = True except ValueError as exc: raise ValueError('%s: %s' % (hdf5file, exc)) if fname: h5.close()
def dump(temppath, perspath): """ Dump the performance info into a persistent file, then remove the temporary file. :param temppath: the temporary file :param perspath: the persistent file """ with hdf5.File(temppath, 'r') as h, hdf5.File(perspath, 'r+') as h5: if 'performance_data' not in h5: hdf5.create(h5, 'performance_data', perf_dt) if 'task_info' not in h5: hdf5.create(h5, 'task_info', task_info_dt) hdf5.extend(h5['performance_data'], h['performance_data'][()]) hdf5.extend(h5['task_info'], h['task_info'][()]) for k, v in h['task_info'].attrs.items(): h5['task_info'].attrs[k] = v os.remove(temppath)
def flush(self, hdf5path): """ Save the measurements on the performance file """ if not self.children: data = self.get_data() else: lst = [self.get_data()] for child in self.children: lst.append(child.get_data()) child.reset() data = numpy.concatenate(lst) if len(data) == 0: # no information return elif not os.path.exists(hdf5path): with hdf5.File(hdf5path, 'w') as h5: hdf5.create(h5, 'performance_data', perf_dt) hdf5.create(h5, 'task_info', task_info_dt) hdf5.extend3(hdf5path, 'performance_data', data) self.reset()
def get_composite_source_model(oqparam, h5=None): """ Parse the XML and build a complete composite source model in memory. :param oqparam: an :class:`openquake.commonlib.oqvalidation.OqParam` instance :param h5: an open hdf5.File where to store the source info """ # first read the logic tree full_lt = get_full_lt(oqparam) # then read the composite source model from the cache if possible if oqparam.cachedir and not os.path.exists(oqparam.cachedir): os.makedirs(oqparam.cachedir) if oqparam.cachedir and not oqparam.is_ucerf(): # for UCERF pickling the csm is slower checksum = get_checksum32(oqparam, h5) fname = os.path.join(oqparam.cachedir, 'csm_%s.pik' % checksum) if os.path.exists(fname): logging.info('Reading %s', fname) with open(fname, 'rb') as f: csm = pickle.load(f) csm.full_lt = full_lt if h5: # avoid errors with --reuse_hazard h5['et_ids'] = csm.get_et_ids() hdf5.create(h5, 'source_info', source_info_dt) _check_csm(csm, oqparam, h5) return csm # read and process the composite source model from the input files csm = get_csm(oqparam, full_lt, h5) save_source_info(csm, h5) if oqparam.cachedir and not oqparam.is_ucerf(): logging.info('Saving %s', fname) with open(fname, 'wb') as f: pickle.dump(csm, f) _check_csm(csm, oqparam, h5) return csm
def create_dset(self, key, dtype, shape=(None,), compression=None, fillvalue=0, attrs=None): """ Create a one-dimensional HDF5 dataset. :param key: name of the dataset :param dtype: dtype of the dataset (usually composite) :param shape: shape of the dataset, possibly extendable :param compression: the kind of HDF5 compression to use :param attrs: dictionary of attributes of the dataset :returns: a HDF5 dataset """ return hdf5.create( self.hdf5, key, dtype, shape, compression, fillvalue, attrs)
def extend(self, key, array): """ Extend the dataset associated to the given key; create it if needed :param key: name of the dataset :param array: array to store """ try: dset = self.hdf5[key] except KeyError: dset = hdf5.create(self.hdf5, key, array.dtype, shape=(None,) + array.shape[1:]) hdf5.extend(dset, array) return dset
def extend(self, key, array, **attrs): """ Extend the dataset associated to the given key; create it if needed :param key: name of the dataset :param array: array to store :param attrs: a dictionary of attributes """ try: dset = self.hdf5[key] except KeyError: dset = hdf5.create(self.hdf5, key, array.dtype, shape=(None,) + array.shape[1:]) hdf5.extend(dset, array) for k, v in attrs.items(): dset.attrs[k] = v return dset
def get_composite_source_model(oqparam, h5=None): """ Parse the XML and build a complete composite source model in memory. :param oqparam: an :class:`openquake.commonlib.oqvalidation.OqParam` instance :param h5: an open hdf5.File where to store the source info """ full_lt = get_full_lt(oqparam) if oqparam.cachedir and not oqparam.is_ucerf(): csm = _get_cachedir(oqparam, full_lt, h5) else: csm = get_csm(oqparam, full_lt, h5) et_ids = csm.get_et_ids() logging.info('%d effective smlt realization(s)', len(full_lt.sm_rlzs)) grp_id = {tuple(arr): i for i, arr in enumerate(et_ids)} data = {} # src_id -> row mags = AccumDict(accum=set()) # trt -> mags wkts = [] lens = [] for sg in csm.src_groups: if hasattr(sg, 'mags'): # UCERF mags[sg.trt].update('%.2f' % mag for mag in sg.mags) for src in sg: lens.append(len(src.et_ids)) src.grp_id = grp_id[tuple(src.et_ids)] row = [ src.source_id, src.grp_id, src.code, 0, 0, 0, src.id, full_lt.trti[src.tectonic_region_type] ] wkts.append(src._wkt) # this is a bit slow but okay data[src.source_id] = row if hasattr(src, 'mags'): # UCERF continue # already accounted for in sg.mags elif hasattr(src, 'data'): # nonparametric srcmags = ['%.2f' % item[0].mag for item in src.data] else: srcmags = [ '%.2f' % item[0] for item in src.get_annual_occurrence_rates() ] mags[sg.trt].update(srcmags) logging.info('There are %d groups and %d sources with len(et_ids)=%.1f', len(csm.src_groups), sum(len(sg) for sg in csm.src_groups), numpy.mean(lens)) if h5: attrs = dict(atomic=any(grp.atomic for grp in csm.src_groups)) # avoid hdf5 damned bug by creating source_info in advance hdf5.create(h5, 'source_info', source_info_dt, attrs=attrs) h5['source_wkt'] = numpy.array(wkts, hdf5.vstr) h5['et_ids'] = et_ids mags_by_trt = {} for trt in mags: mags_by_trt[trt] = arr = numpy.array(sorted(mags[trt])) h5['source_mags/' + trt] = arr oqparam.maximum_distance.interp(mags_by_trt) csm.gsim_lt.check_imts(oqparam.imtls) csm.source_info = data # src_id -> row if os.environ.get('OQ_CHECK_INPUT'): source.check_complex_faults(csm.get_sources()) return csm
def get_source_models(oqparam, gsim_lt, source_model_lt, monitor, in_memory=True): """ Build all the source models generated by the logic tree. :param oqparam: an :class:`openquake.commonlib.oqvalidation.OqParam` instance :param gsim_lt: a :class:`openquake.commonlib.logictree.GsimLogicTree` instance :param source_model_lt: a :class:`openquake.commonlib.logictree.SourceModelLogicTree` instance :param monitor: a `openquake.baselib.performance.Monitor` instance :param in_memory: if True, keep in memory the sources, else just collect the TRTs :returns: an iterator over :class:`openquake.commonlib.logictree.LtSourceModel` tuples """ make_sm = SourceModelFactory() converter = sourceconverter.SourceConverter( oqparam.investigation_time, oqparam.rupture_mesh_spacing, oqparam.complex_fault_mesh_spacing, oqparam.width_of_mfd_bin, oqparam.area_source_discretization, oqparam.source_id) if oqparam.calculation_mode.startswith('ucerf'): [grp] = nrml.to_python(oqparam.inputs["source_model"], converter) elif in_memory: logging.info('Reading the source model(s)') dic = logictree.parallel_read_source_models(gsim_lt, source_model_lt, converter, monitor) # consider only the effective realizations smlt_dir = os.path.dirname(source_model_lt.filename) idx = 0 grp_id = 0 if monitor.hdf5: sources = hdf5.create(monitor.hdf5, 'source_info', source_info_dt) hdf5.create(monitor.hdf5, 'source_geom', point3d) for sm in source_model_lt.gen_source_models(gsim_lt): src_groups = [] for name in sm.names.split(): fname = os.path.abspath(os.path.join(smlt_dir, name)) if oqparam.calculation_mode.startswith('ucerf'): sg = copy.copy(grp) sg.id = grp_id src = sg[0].new(sm.ordinal, sm.names) # one source src.id = idx sg.sources = [src] src_groups.append(sg) idx += 1 grp_id += 1 data = [((sg.id, src.source_id, src.code, 0, 0, src.num_ruptures, 0, 0, 0, 0, 0))] hdf5.extend(sources, numpy.array(data, source_info_dt)) elif in_memory: apply_unc = source_model_lt.make_apply_uncertainties(sm.path) newsm = make_sm(fname, dic[fname], apply_unc, oqparam.investigation_time) for sg in newsm: for src in sg: src.src_group_id = grp_id src.id = idx idx += 1 sg.id = grp_id grp_id += 1 if monitor.hdf5: store_sm(newsm, monitor.hdf5) src_groups.extend(newsm.src_groups) else: # just collect the TRT models src_groups.extend(logictree.read_source_groups(fname)) if grp_id >= TWO16: # the limit is really needed only for event based calculations raise ValueError('There is a limit of %d src groups!' % TWO16) num_sources = sum(len(sg.sources) for sg in src_groups) sm.src_groups = src_groups trts = [mod.trt for mod in src_groups] source_model_lt.tectonic_region_types.update(trts) logging.info( 'Processed source model %d with %d potential gsim path(s) and %d ' 'sources', sm.ordinal + 1, sm.num_gsim_paths, num_sources) gsim_file = oqparam.inputs.get('gsim_logic_tree') if gsim_file: # check TRTs for src_group in src_groups: if src_group.trt not in gsim_lt.values: raise ValueError( "Found in %r a tectonic region type %r inconsistent " "with the ones in %r" % (sm, src_group.trt, gsim_file)) yield sm # log if some source file is being used more than once dupl = 0 for fname, hits in make_sm.fname_hits.items(): if hits > 1: logging.info('%s has been considered %d times', fname, hits) if not make_sm.changed_sources: dupl += hits if (dupl and not oqparam.optimize_same_id_sources and 'event_based' not in oqparam.calculation_mode): logging.warn('You are doing redundant calculations: please make sure ' 'that different sources have different IDs and set ' 'optimize_same_id_sources=true in your .ini file') if make_sm.changed_sources: logging.info('Modified %d sources in the composite source model', make_sm.changed_sources)
def get_models(self): """ :yields: :class:`openquake.commonlib.logictree.LtSourceModel` tuples """ oq = self.oqparam spinning_off = self.oqparam.pointsource_distance == {'default': 0.0} if spinning_off: logging.info('Removing nodal plane and hypocenter distributions') dist = ('no' if os.environ.get('OQ_DISTRIBUTE') == 'no' else 'processpool') smlt_dir = os.path.dirname(self.source_model_lt.filename) converter = sourceconverter.SourceConverter( oq.investigation_time, oq.rupture_mesh_spacing, oq.complex_fault_mesh_spacing, oq.width_of_mfd_bin, oq.area_source_discretization, oq.minimum_magnitude, not spinning_off, oq.source_id) if oq.calculation_mode.startswith('ucerf'): [grp] = nrml.to_python(oq.inputs["source_model"], converter) dic = {'ucerf': grp} elif self.in_memory: logging.info('Reading the source model(s) in parallel') smap = parallel.Starmap( nrml.read_source_models, distribute=dist, hdf5path=self.hdf5.filename if self.hdf5 else None) for sm in self.source_model_lt.gen_source_models(self.gsim_lt): for name in sm.names.split(): fname = os.path.abspath(os.path.join(smlt_dir, name)) smap.submit([fname], converter) dic = {sm.fname: sm for sm in smap} else: dic = {} # consider only the effective realizations idx = 0 if self.hdf5: sources = hdf5.create(self.hdf5, 'source_info', source_info_dt) hdf5.create(self.hdf5, 'source_geom', point3d) hdf5.create(self.hdf5, 'source_mfds', hdf5.vstr) grp_id = 0 for sm in self.source_model_lt.gen_source_models(self.gsim_lt): if 'ucerf' in dic: sg = copy.copy(dic['ucerf']) sm.src_groups = [sg] sg.id = grp_id src = sg[0].new(sm.ordinal, sm.names) # one source src.src_group_id = grp_id src.id = idx if oq.number_of_logic_tree_samples: src.samples = sm.samples sg.sources = [src] idx += 1 grp_id += 1 data = [((sg.id, src.source_id, src.code, 0, 0, -1, src.num_ruptures, 0, 0, 0, idx))] hdf5.extend(sources, numpy.array(data, source_info_dt)) else: self.apply_uncertainties(sm, idx, dic) yield sm if self.hdf5: hdf5.extend(self.hdf5['source_mfds'], numpy.array(list(self.mfds), hdf5.vstr)) # log if some source file is being used more than once dupl = 0 for fname, hits in self.fname_hits.items(): if hits > 1: logging.info('%s has been considered %d times', fname, hits) if not self.changes: dupl += hits if self.changes: logging.info('Applied %d changes to the composite source model', self.changes)
def get_source_models(oqparam, gsim_lt, source_model_lt, monitor, in_memory=True, srcfilter=None): """ Build all the source models generated by the logic tree. :param oqparam: an :class:`openquake.commonlib.oqvalidation.OqParam` instance :param gsim_lt: a :class:`openquake.commonlib.logictree.GsimLogicTree` instance :param source_model_lt: a :class:`openquake.commonlib.logictree.SourceModelLogicTree` instance :param monitor: a `openquake.baselib.performance.Monitor` instance :param in_memory: if True, keep in memory the sources, else just collect the TRTs :param srcfilter: a SourceFilter instance with an .filename pointing to the cache file :returns: an iterator over :class:`openquake.commonlib.logictree.LtSourceModel` tuples """ make_sm = SourceModelFactory() spinning_off = oqparam.pointsource_distance == {'default': 0.0} if spinning_off: logging.info('Removing nodal plane and hypocenter distributions') dist = 'no' if os.environ.get('OQ_DISTRIBUTE') == 'no' else 'processpool' smlt_dir = os.path.dirname(source_model_lt.filename) converter = sourceconverter.SourceConverter( oqparam.investigation_time, oqparam.rupture_mesh_spacing, oqparam.complex_fault_mesh_spacing, oqparam.width_of_mfd_bin, oqparam.area_source_discretization, oqparam.minimum_magnitude, not spinning_off, oqparam.source_id) if oqparam.calculation_mode.startswith('ucerf'): [grp] = nrml.to_python(oqparam.inputs["source_model"], converter) elif in_memory: logging.info('Reading the source model(s) in parallel') smap = parallel.Starmap( nrml.read_source_models, monitor=monitor, distribute=dist) for sm in source_model_lt.gen_source_models(gsim_lt): for name in sm.names.split(): fname = os.path.abspath(os.path.join(smlt_dir, name)) smap.submit([fname], converter) dic = {sm.fname: sm for sm in smap} # consider only the effective realizations nr = 0 idx = 0 grp_id = 0 if monitor.hdf5: sources = hdf5.create(monitor.hdf5, 'source_info', source_info_dt) hdf5.create(monitor.hdf5, 'source_geom', point3d) filename = None source_ids = set() for sm in source_model_lt.gen_source_models(gsim_lt): apply_unc = functools.partial( source_model_lt.apply_uncertainties, sm.path) src_groups = [] for name in sm.names.split(): fname = os.path.abspath(os.path.join(smlt_dir, name)) if oqparam.calculation_mode.startswith('ucerf'): sg = copy.copy(grp) sg.id = grp_id src = sg[0].new(sm.ordinal, sm.names) # one source source_ids.add(src.source_id) src.src_group_id = grp_id src.id = idx if oqparam.number_of_logic_tree_samples: src.samples = sm.samples sg.sources = [src] src_groups.append(sg) idx += 1 grp_id += 1 data = [((sg.id, src.source_id, src.code, 0, 0, src.num_ruptures, 0, 0, 0))] hdf5.extend(sources, numpy.array(data, source_info_dt)) elif in_memory: newsm = make_sm(fname, dic[fname], apply_unc, oqparam.investigation_time) for sg in newsm: nr += sum(src.num_ruptures for src in sg) # sample a source for each group if os.environ.get('OQ_SAMPLE_SOURCES'): sg.sources = random_filtered_sources( sg.sources, srcfilter, sg.id + oqparam.random_seed) for src in sg: source_ids.add(src.source_id) src.src_group_id = grp_id src.id = idx idx += 1 sg.id = grp_id grp_id += 1 src_groups.append(sg) if monitor.hdf5: store_sm(newsm, filename, monitor) else: # just collect the TRT models groups = logictree.read_source_groups(fname) for group in groups: source_ids.update(src['id'] for src in group) src_groups.extend(groups) if grp_id >= TWO16: # the limit is really needed only for event based calculations raise ValueError('There is a limit of %d src groups!' % TWO16) for brid, srcids in source_model_lt.info.applytosources.items(): for srcid in srcids: if srcid not in source_ids: raise ValueError( 'The source %s is not in the source model, please fix ' 'applyToSources in %s or the source model' % (srcid, source_model_lt.filename)) num_sources = sum(len(sg.sources) for sg in src_groups) sm.src_groups = src_groups trts = [mod.trt for mod in src_groups] source_model_lt.tectonic_region_types.update(trts) logging.info( 'Processed source model %d with %d gsim path(s) and %d ' 'sources', sm.ordinal + 1, sm.num_gsim_paths, num_sources) gsim_file = oqparam.inputs.get('gsim_logic_tree') if gsim_file: # check TRTs for src_group in src_groups: if src_group.trt not in gsim_lt.values: raise ValueError( "Found in %r a tectonic region type %r inconsistent " "with the ones in %r" % (sm, src_group.trt, gsim_file)) yield sm logging.info('The composite source model has {:,d} ruptures'.format(nr)) # log if some source file is being used more than once dupl = 0 for fname, hits in make_sm.fname_hits.items(): if hits > 1: logging.info('%s has been considered %d times', fname, hits) if not make_sm.changes: dupl += hits if (dupl and not oqparam.optimize_same_id_sources and not oqparam.is_event_based()): logging.warning( 'You are doing redundant calculations: please make sure ' 'that different sources have different IDs and set ' 'optimize_same_id_sources=true in your .ini file') if make_sm.changes: logging.info('Applied %d changes to the composite source model', make_sm.changes)
def get_source_models(oqparam, gsim_lt, source_model_lt, monitor, in_memory=True, srcfilter=None): """ Build all the source models generated by the logic tree. :param oqparam: an :class:`openquake.commonlib.oqvalidation.OqParam` instance :param gsim_lt: a :class:`openquake.commonlib.logictree.GsimLogicTree` instance :param source_model_lt: a :class:`openquake.commonlib.logictree.SourceModelLogicTree` instance :param monitor: a `openquake.baselib.performance.Monitor` instance :param in_memory: if True, keep in memory the sources, else just collect the TRTs :param srcfilter: a SourceFilter instance with an .filename pointing to the cache file :returns: an iterator over :class:`openquake.commonlib.logictree.LtSourceModel` tuples """ make_sm = SourceModelFactory() spinning_off = oqparam.pointsource_distance == {'default': 0.0} if spinning_off: logging.info('Removing nodal plane and hypocenter distributions') dist = 'no' if os.environ.get('OQ_DISTRIBUTE') == 'no' else 'processpool' smlt_dir = os.path.dirname(source_model_lt.filename) converter = sourceconverter.SourceConverter( oqparam.investigation_time, oqparam.rupture_mesh_spacing, oqparam.complex_fault_mesh_spacing, oqparam.width_of_mfd_bin, oqparam.area_source_discretization, oqparam.minimum_magnitude, not spinning_off, oqparam.source_id) if oqparam.calculation_mode.startswith('ucerf'): [grp] = nrml.to_python(oqparam.inputs["source_model"], converter) elif in_memory: logging.info('Reading the source model(s) in parallel') smap = parallel.Starmap(nrml.read_source_models, monitor=monitor, distribute=dist) for sm in source_model_lt.gen_source_models(gsim_lt): for name in sm.names.split(): fname = os.path.abspath(os.path.join(smlt_dir, name)) smap.submit([fname], converter) dic = {sm.fname: sm for sm in smap} # consider only the effective realizations nr = 0 idx = 0 grp_id = 0 if monitor.hdf5: sources = hdf5.create(monitor.hdf5, 'source_info', source_info_dt) hdf5.create(monitor.hdf5, 'source_geom', point3d) filename = None source_ids = set() for sm in source_model_lt.gen_source_models(gsim_lt): apply_unc = functools.partial(source_model_lt.apply_uncertainties, sm.path) src_groups = [] for name in sm.names.split(): fname = os.path.abspath(os.path.join(smlt_dir, name)) if oqparam.calculation_mode.startswith('ucerf'): sg = copy.copy(grp) sg.id = grp_id src = sg[0].new(sm.ordinal, sm.names) # one source source_ids.add(src.source_id) src.src_group_id = grp_id src.id = idx if oqparam.number_of_logic_tree_samples: src.samples = sm.samples sg.sources = [src] src_groups.append(sg) idx += 1 grp_id += 1 data = [((sg.id, src.source_id, src.code, 0, 0, src.num_ruptures, 0, 0, 0))] hdf5.extend(sources, numpy.array(data, source_info_dt)) elif in_memory: newsm = make_sm(fname, dic[fname], apply_unc, oqparam.investigation_time) for sg in newsm: nr += sum(src.num_ruptures for src in sg) # sample a source for each group if os.environ.get('OQ_SAMPLE_SOURCES'): sg.sources = random_filtered_sources( sg.sources, srcfilter, sg.id + oqparam.random_seed) for src in sg: source_ids.add(src.source_id) src.src_group_id = grp_id src.id = idx idx += 1 sg.id = grp_id grp_id += 1 src_groups.append(sg) if monitor.hdf5: store_sm(newsm, filename, monitor) else: # just collect the TRT models groups = logictree.read_source_groups(fname) for group in groups: source_ids.update(src['id'] for src in group) src_groups.extend(groups) if grp_id >= TWO16: # the limit is really needed only for event based calculations raise ValueError('There is a limit of %d src groups!' % TWO16) for brid, srcids in source_model_lt.info.applytosources.items(): for srcid in srcids: if srcid not in source_ids: raise ValueError( 'The source %s is not in the source model, please fix ' 'applyToSources in %s or the source model' % (srcid, source_model_lt.filename)) num_sources = sum(len(sg.sources) for sg in src_groups) sm.src_groups = src_groups trts = [mod.trt for mod in src_groups] source_model_lt.tectonic_region_types.update(trts) logging.info( 'Processed source model %d with %d gsim path(s) and %d ' 'sources', sm.ordinal + 1, sm.num_gsim_paths, num_sources) gsim_file = oqparam.inputs.get('gsim_logic_tree') if gsim_file: # check TRTs for src_group in src_groups: if src_group.trt not in gsim_lt.values: raise ValueError( "Found in %r a tectonic region type %r inconsistent " "with the ones in %r" % (sm, src_group.trt, gsim_file)) yield sm logging.info('The composite source model has {:,d} ruptures'.format(nr)) # log if some source file is being used more than once dupl = 0 for fname, hits in make_sm.fname_hits.items(): if hits > 1: logging.info('%s has been considered %d times', fname, hits) if not make_sm.changes: dupl += hits if (dupl and not oqparam.optimize_same_id_sources and not oqparam.is_event_based()): logging.warning( 'You are doing redundant calculations: please make sure ' 'that different sources have different IDs and set ' 'optimize_same_id_sources=true in your .ini file') if make_sm.changes: logging.info('Applied %d changes to the composite source model', make_sm.changes)
def get_composite_source_model(oqparam, h5=None): """ Parse the XML and build a complete composite source model in memory. :param oqparam: an :class:`openquake.commonlib.oqvalidation.OqParam` instance :param h5: an open hdf5.File where to store the source info """ source_model_lt = get_source_model_lt(oqparam) trts = source_model_lt.tectonic_region_types trts_lower = {trt.lower() for trt in trts} reqv = oqparam.inputs.get('reqv', {}) for trt in reqv: if trt in oqparam.discard_trts: continue elif trt.lower() not in trts_lower: raise ValueError('Unknown TRT=%s in %s [reqv]' % (trt, oqparam.inputs['job_ini'])) gsim_lt = get_gsim_lt(oqparam, trts or ['*']) p = source_model_lt.num_paths * gsim_lt.get_num_paths() if oqparam.number_of_logic_tree_samples: logging.info('Considering {:_d} logic tree paths out of {:_d}'.format( oqparam.number_of_logic_tree_samples, p)) else: # full enumeration if (oqparam.is_event_based() and (oqparam.ground_motion_fields or oqparam.hazard_curves_from_gmfs) and p > oqparam.max_potential_paths): raise ValueError( 'There are too many potential logic tree paths (%d):' 'use sampling instead of full enumeration or reduce the ' 'source model with oq reduce_sm' % p) logging.info('Potential number of logic tree paths = {:_d}'.format(p)) if source_model_lt.on_each_source: logging.info('There is a logic tree on each source') csm = get_csm(oqparam, source_model_lt, gsim_lt, h5) if oqparam.is_event_based(): csm.init_serials(oqparam.ses_seed) if h5: info = hdf5.create(h5, 'source_info', source_info_dt) data = [] mags = set() n = len(csm.full_lt.sm_rlzs) for sg in csm.src_groups: for src in sg: eri = src.grp_ids[0] % n data.append((eri, src.grp_ids[0], src.source_id, src.code, src.num_ruptures, 0, 0, 0, src.checksum, src._wkt)) if hasattr(src, 'mags'): # UCERF srcmags = ['%.2f' % mag for mag in src.mags] elif hasattr(src, 'data'): # nonparametric srcmags = ['%.2f' % item[0].mag for item in src.data] else: srcmags = ['%.2f' % item[0] for item in src.get_annual_occurrence_rates()] mags.update(srcmags) if h5: hdf5.extend(info, numpy.array(data, source_info_dt)) h5['source_mags'] = numpy.array(sorted(mags)) csm.gsim_lt.check_imts(oqparam.imtls) return csm