def store_sm(smodel, filename, monitor): """ :param smodel: a :class:`openquake.hazardlib.nrml.SourceModel` instance :param filename: path to an hdf5 file (cache_XXX.hdf5) :param monitor: a Monitor instance with an .hdf5 attribute """ h5 = monitor.hdf5 with monitor('store source model'): sources = h5['source_info'] source_geom = h5['source_geom'] gid = len(source_geom) for sg in smodel: if filename: with hdf5.File(filename, 'r+') as hdf5cache: hdf5cache['grp-%02d' % sg.id] = sg srcs = [] geoms = [] for src in sg: srcgeom = src.geom() n = len(srcgeom) geom = numpy.zeros(n, point3d) geom['lon'], geom['lat'], geom['depth'] = srcgeom.T srcs.append((sg.id, src.source_id, src.code, gid, gid + n, src.num_ruptures, 0, 0, 0)) geoms.append(geom) gid += n if geoms: hdf5.extend(source_geom, numpy.concatenate(geoms)) if sources: hdf5.extend(sources, numpy.array(srcs, source_info_dt))
def flush(self): """ Save the measurements on the performance file (or on stdout) """ for child in self.children: child.flush() data = self.get_data() if len(data) == 0: # no information return [] # reset monitor self.duration = 0 self.mem = 0 self.counts = 0 if self.hdf5path: h5 = h5py.File(self.hdf5path) try: pdata = h5['performance_data'] except KeyError: pdata = hdf5.create(h5, 'performance_data', perf_dt) hdf5.extend(pdata, data) h5.close() # else print(data[0]) on stdout return data
def extend(self, key, array): """ Extend the dataset associated to the given key; create it if needed :param key: name of the dataset :param array: array to store """ try: dset = self.hdf5[key] except KeyError: dset = hdf5.create(self.hdf5, key, array.dtype, shape=(None,) + array.shape[1:]) hdf5.extend(dset, array) return dset
def extend(self, key, array, **attrs): """ Extend the dataset associated to the given key; create it if needed :param key: name of the dataset :param array: array to store :param attrs: a dictionary of attributes """ try: dset = self.hdf5[key] except KeyError: dset = hdf5.create(self.hdf5, key, array.dtype, shape=(None,) + array.shape[1:]) hdf5.extend(dset, array) for k, v in attrs.items(): dset.attrs[k] = v return dset
def agg(self, acc, result): """ Aggregate losses and store them in the datastore. :param acc: accumulator dictionary :param result: dictionary coming from event_based_risk """ self.gmfbytes += result.pop('gmfbytes') with self.monitor('saving event loss tables', autoflush=True): if self.oqparam.asset_loss_table: for lr, array in sorted(result.pop('ASSLOSS').items()): hdf5.extend(self.ass_loss_table[lr], array) self.ass_bytes += array.nbytes for lr, array in sorted(result.pop('AGGLOSS').items()): hdf5.extend(self.agg_loss_table[lr], array) self.agg_bytes += array.nbytes self.datastore.hdf5.flush() return acc + result
def flush(self): """ Save the measurements on the performance file (or on stdout) """ if not self._flush: raise RuntimeError( 'Monitor(%r).flush() must not be called in a worker' % self.operation) for child in self.children: child.hdf5 = self.hdf5 child.flush() data = self.get_data() if len(data) == 0: # no information return [] elif self.hdf5: hdf5.extend(self.hdf5['performance_data'], data) # reset monitor self.duration = 0 self.mem = 0 self.counts = 0 return data
def combine_curves_and_save_gmfs(self, acc, res): """ Combine the hazard curves (if any) and save the gmfs (if any) sequentially; notice that the gmfs may come from different tasks in any order. :param acc: an accumulator for the hazard curves :param res: a dictionary rlzi, imt -> [gmf_array, curves_by_imt] :returns: a new accumulator """ sav_mon = self.monitor('saving gmfs') agg_mon = self.monitor('aggregating hcurves') for rlzi in res: gmfa, curves = res[rlzi] if gmfa is not None: with sav_mon: hdf5.extend(self.datastore['gmf_data/%04d' % rlzi], gmfa) if curves is not None: # aggregate hcurves with agg_mon: self.agg_dicts(acc, {rlzi: curves}) sav_mon.flush() agg_mon.flush() self.datastore.flush() return acc
def get_source_models(oqparam, gsim_lt, source_model_lt, monitor, in_memory=True, srcfilter=None): """ Build all the source models generated by the logic tree. :param oqparam: an :class:`openquake.commonlib.oqvalidation.OqParam` instance :param gsim_lt: a :class:`openquake.commonlib.logictree.GsimLogicTree` instance :param source_model_lt: a :class:`openquake.commonlib.logictree.SourceModelLogicTree` instance :param monitor: a `openquake.baselib.performance.Monitor` instance :param in_memory: if True, keep in memory the sources, else just collect the TRTs :param srcfilter: a SourceFilter instance with an .filename pointing to the cache file :returns: an iterator over :class:`openquake.commonlib.logictree.LtSourceModel` tuples """ make_sm = SourceModelFactory() spinning_off = oqparam.pointsource_distance == {'default': 0.0} if spinning_off: logging.info('Removing nodal plane and hypocenter distributions') dist = 'no' if os.environ.get('OQ_DISTRIBUTE') == 'no' else 'processpool' smlt_dir = os.path.dirname(source_model_lt.filename) converter = sourceconverter.SourceConverter( oqparam.investigation_time, oqparam.rupture_mesh_spacing, oqparam.complex_fault_mesh_spacing, oqparam.width_of_mfd_bin, oqparam.area_source_discretization, oqparam.minimum_magnitude, not spinning_off, oqparam.source_id) if oqparam.calculation_mode.startswith('ucerf'): [grp] = nrml.to_python(oqparam.inputs["source_model"], converter) elif in_memory: logging.info('Reading the source model(s) in parallel') smap = parallel.Starmap( nrml.read_source_models, monitor=monitor, distribute=dist) for sm in source_model_lt.gen_source_models(gsim_lt): for name in sm.names.split(): fname = os.path.abspath(os.path.join(smlt_dir, name)) smap.submit([fname], converter) dic = {sm.fname: sm for sm in smap} # consider only the effective realizations nr = 0 idx = 0 grp_id = 0 if monitor.hdf5: sources = hdf5.create(monitor.hdf5, 'source_info', source_info_dt) hdf5.create(monitor.hdf5, 'source_geom', point3d) filename = None source_ids = set() for sm in source_model_lt.gen_source_models(gsim_lt): apply_unc = functools.partial( source_model_lt.apply_uncertainties, sm.path) src_groups = [] for name in sm.names.split(): fname = os.path.abspath(os.path.join(smlt_dir, name)) if oqparam.calculation_mode.startswith('ucerf'): sg = copy.copy(grp) sg.id = grp_id src = sg[0].new(sm.ordinal, sm.names) # one source source_ids.add(src.source_id) src.src_group_id = grp_id src.id = idx if oqparam.number_of_logic_tree_samples: src.samples = sm.samples sg.sources = [src] src_groups.append(sg) idx += 1 grp_id += 1 data = [((sg.id, src.source_id, src.code, 0, 0, src.num_ruptures, 0, 0, 0))] hdf5.extend(sources, numpy.array(data, source_info_dt)) elif in_memory: newsm = make_sm(fname, dic[fname], apply_unc, oqparam.investigation_time) for sg in newsm: nr += sum(src.num_ruptures for src in sg) # sample a source for each group if os.environ.get('OQ_SAMPLE_SOURCES'): sg.sources = random_filtered_sources( sg.sources, srcfilter, sg.id + oqparam.random_seed) for src in sg: source_ids.add(src.source_id) src.src_group_id = grp_id src.id = idx idx += 1 sg.id = grp_id grp_id += 1 src_groups.append(sg) if monitor.hdf5: store_sm(newsm, filename, monitor) else: # just collect the TRT models groups = logictree.read_source_groups(fname) for group in groups: source_ids.update(src['id'] for src in group) src_groups.extend(groups) if grp_id >= TWO16: # the limit is really needed only for event based calculations raise ValueError('There is a limit of %d src groups!' % TWO16) for brid, srcids in source_model_lt.info.applytosources.items(): for srcid in srcids: if srcid not in source_ids: raise ValueError( 'The source %s is not in the source model, please fix ' 'applyToSources in %s or the source model' % (srcid, source_model_lt.filename)) num_sources = sum(len(sg.sources) for sg in src_groups) sm.src_groups = src_groups trts = [mod.trt for mod in src_groups] source_model_lt.tectonic_region_types.update(trts) logging.info( 'Processed source model %d with %d gsim path(s) and %d ' 'sources', sm.ordinal + 1, sm.num_gsim_paths, num_sources) gsim_file = oqparam.inputs.get('gsim_logic_tree') if gsim_file: # check TRTs for src_group in src_groups: if src_group.trt not in gsim_lt.values: raise ValueError( "Found in %r a tectonic region type %r inconsistent " "with the ones in %r" % (sm, src_group.trt, gsim_file)) yield sm logging.info('The composite source model has {:,d} ruptures'.format(nr)) # log if some source file is being used more than once dupl = 0 for fname, hits in make_sm.fname_hits.items(): if hits > 1: logging.info('%s has been considered %d times', fname, hits) if not make_sm.changes: dupl += hits if (dupl and not oqparam.optimize_same_id_sources and not oqparam.is_event_based()): logging.warning( 'You are doing redundant calculations: please make sure ' 'that different sources have different IDs and set ' 'optimize_same_id_sources=true in your .ini file') if make_sm.changes: logging.info('Applied %d changes to the composite source model', make_sm.changes)
def build_events_from_sources(self): """ Prefilter the composite source model and store the source_info """ gsims_by_trt = self.csm.full_lt.get_gsims_by_trt() sources = self.csm.get_sources() # weighting the heavy sources nrups = parallel.Starmap(count_ruptures, [(src, ) for src in sources if src.code in b'AMC'], h5=self.datastore.hdf5).reduce() for src in sources: src.nsites = 1 # avoid 0 weight try: src.num_ruptures = nrups[src.source_id] except KeyError: src.num_ruptures = src.count_ruptures() maxweight = sum(sg.weight for sg in self.csm.src_groups) / ( self.oqparam.concurrent_tasks or 1) eff_ruptures = AccumDict(accum=0) # trt => potential ruptures calc_times = AccumDict(accum=numpy.zeros(3, F32)) # nr, ns, dt allargs = [] if self.oqparam.is_ucerf(): # manage the filtering in a special way for sg in self.csm.src_groups: for src in sg: src.src_filter = self.srcfilter srcfilter = nofilter # otherwise it would be ultra-slow else: srcfilter = self.srcfilter logging.info('Building ruptures') for sg in self.csm.src_groups: if not sg.sources: continue logging.info('Sending %s', sg) par = self.param.copy() par['gsims'] = gsims_by_trt[sg.trt] for src_group in sg.split(maxweight): allargs.append((src_group, srcfilter, par)) smap = parallel.Starmap(sample_ruptures, allargs, h5=self.datastore.hdf5) mon = self.monitor('saving ruptures') self.nruptures = 0 for dic in smap: # NB: dic should be a dictionary, but when the calculation dies # for an OOM it can become None, thus giving a very confusing error if dic is None: raise MemoryError('You ran out of memory!') rup_array = dic['rup_array'] if len(rup_array) == 0: continue if dic['calc_times']: calc_times += dic['calc_times'] if dic['eff_ruptures']: eff_ruptures += dic['eff_ruptures'] with mon: n = len(rup_array) rup_array['id'] = numpy.arange(self.nruptures, self.nruptures + n) self.nruptures += n hdf5.extend(self.datastore['ruptures'], rup_array) hdf5.extend(self.datastore['rupgeoms'], rup_array.geom) if len(self.datastore['ruptures']) == 0: raise RuntimeError('No ruptures were generated, perhaps the ' 'investigation time is too short') # must be called before storing the events self.store_rlz_info(eff_ruptures) # store full_lt self.store_source_info(calc_times) imp = calc.RuptureImporter(self.datastore) with self.monitor('saving ruptures and events'): imp.import_rups(self.datastore.getitem('ruptures')[()])
def get_models(self): """ :yields: :class:`openquake.commonlib.logictree.LtSourceModel` tuples """ oq = self.oqparam spinning_off = self.oqparam.pointsource_distance == {'default': 0.0} if spinning_off: logging.info('Removing nodal plane and hypocenter distributions') dist = ('no' if os.environ.get('OQ_DISTRIBUTE') == 'no' else 'processpool') smlt_dir = os.path.dirname(self.source_model_lt.filename) converter = sourceconverter.SourceConverter( oq.investigation_time, oq.rupture_mesh_spacing, oq.complex_fault_mesh_spacing, oq.width_of_mfd_bin, oq.area_source_discretization, oq.minimum_magnitude, not spinning_off, oq.source_id) if oq.calculation_mode.startswith('ucerf'): [grp] = nrml.to_python(oq.inputs["source_model"], converter) dic = {'ucerf': grp} elif self.in_memory: logging.info('Reading the source model(s) in parallel') smap = parallel.Starmap( nrml.read_source_models, distribute=dist, hdf5path=self.hdf5.filename if self.hdf5 else None) # NB: hdf5path is None in logictree_test.py for sm in self.source_model_lt.gen_source_models(self.gsim_lt): for name in sm.names.split(): fname = os.path.abspath(os.path.join(smlt_dir, name)) smap.submit([fname], converter) dic = {sm.fname: sm for sm in smap} else: dic = {} # consider only the effective realizations idx = 0 if self.hdf5: sources = hdf5.create(self.hdf5, 'source_info', source_info_dt) hdf5.create(self.hdf5, 'source_geom', point3d) hdf5.create(self.hdf5, 'source_mfds', hdf5.vstr) grp_id = 0 for sm in self.source_model_lt.gen_source_models(self.gsim_lt): if 'ucerf' in dic: sg = copy.copy(dic['ucerf']) sm.src_groups = [sg] sg.id = grp_id src = sg[0].new(sm.ordinal, sm.names) # one source src.src_group_id = grp_id src.id = idx if oq.number_of_logic_tree_samples: src.samples = sm.samples sg.sources = [src] idx += 1 grp_id += 1 data = [((sg.id, src.source_id, src.code, 0, 0, -1, src.num_ruptures, 0, 0, 0, idx))] hdf5.extend(sources, numpy.array(data, source_info_dt)) else: self.apply_uncertainties(sm, idx, dic) yield sm if self.hdf5: hdf5.extend(self.hdf5['source_mfds'], numpy.array(list(self.mfds), hdf5.vstr)) if self.hdf5: self.hdf5['source_mags'] = sorted(self.mags) # log if some source file is being used more than once dupl = 0 for fname, hits in self.fname_hits.items(): if hits > 1: logging.info('%s has been considered %d times', fname, hits) if not self.changes: dupl += hits if self.changes: logging.info('Applied %d changes to the composite source model', self.changes)
def _store_results(smap, lt_models, source_model_lt, gsim_lt, oq, h5): mags = set() changes = 0 fname_hits = collections.Counter() groups = [[] for _ in lt_models] # (fileno, src_groups) for dic in smap: ltm = lt_models[dic['ordinal']] groups[ltm.ordinal].append((dic['fileno'], dic['src_groups'])) fname_hits += dic['fname_hits'] changes += dic['changes'] mags.update(dic['mags']) gsim_file = oq.inputs.get('gsim_logic_tree') if gsim_file: # check TRTs for src_group in dic['src_groups']: if src_group.trt not in gsim_lt.values: raise ValueError("Found in %r a tectonic region type %r " "inconsistent with the ones in %r" % (ltm, src_group.trt, gsim_file)) # global checks idx = 0 grp_id = 0 for ltm in lt_models: for fileno, grps in sorted(groups[ltm.ordinal]): for grp in grps: grp.id = grp_id for src in grp: src.src_group_id = grp_id src.id = idx idx += 1 ltm.src_groups.append(grp) grp_id += 1 if grp_id >= TWO16: # the limit is only for event based calculations raise ValueError('There is a limit of %d src groups!' % TWO16) # check applyToSources source_ids = set(src.source_id for grp in ltm.src_groups for src in grp) for brid, srcids in source_model_lt.info.\ applytosources.items(): if brid in ltm.path: for srcid in srcids: if srcid not in source_ids: raise ValueError( "The source %s is not in the source model," " please fix applyToSources in %s or the " "source model" % (srcid, source_model_lt.filename)) if h5: sources = h5['source_info'] for sg in ltm.src_groups: sg.info['grp_id'] = sg.id hdf5.extend(sources, sg.info) if h5: h5['source_mags'] = numpy.array(sorted(mags)) # log if some source file is being used more than once dupl = 0 for fname, hits in fname_hits.items(): if hits > 1: logging.info('%s has been considered %d times', fname, hits) if not changes: dupl += hits if changes: logging.info('Applied %d changes to the composite source model', changes) return lt_models