def build_events_from_sources(self, srcfilter): """ Prefilter the composite source model and store the source_info """ oq = self.oqparam gsims_by_trt = self.csm.info.get_gsims_by_trt() logging.info('Building ruptures') eff_ruptures = AccumDict(accum=0) # grp_id => potential ruptures calc_times = AccumDict(accum=numpy.zeros(3, F32)) # nr, ns, dt ses_idx = 0 allargs = [] for sm_id, sm in enumerate(self.csm.source_models): logging.info('Sending %s', sm) for sg in sm.src_groups: if not sg.sources: continue par = self.param.copy() par['gsims'] = gsims_by_trt[sg.trt] if sg.atomic: # do not split the group allargs.append((sg, srcfilter, par)) else: # traditional groups for block in self.block_splitter(sg.sources, key=by_grp): if 'ucerf' in oq.calculation_mode: for i in range(oq.ses_per_logic_tree_path): par = par.copy() # avoid mutating the dict par['ses_seeds'] = [(ses_idx, oq.ses_seed + i + 1)] allargs.append((block, srcfilter, par)) ses_idx += 1 else: allargs.append((block, srcfilter, par)) smap = parallel.Starmap(self.build_ruptures.__func__, allargs, h5=self.datastore.hdf5) mon = self.monitor('saving ruptures') for dic in smap: if dic['calc_times']: calc_times += dic['calc_times'] if dic['eff_ruptures']: eff_ruptures += dic['eff_ruptures'] if dic['rup_array']: with mon: self.rupser.save(dic['rup_array']) self.rupser.close() if not self.rupser.nruptures: raise RuntimeError('No ruptures were generated, perhaps the ' 'investigation time is too short') # logic tree reduction, must be called before storing the events self.store_rlz_info(eff_ruptures) self.init_logic_tree(self.csm.info) with self.monitor('store source_info'): self.store_source_info(calc_times) logging.info('Reordering the ruptures and storing the events') attrs = self.datastore.getitem('ruptures').attrs sorted_ruptures = self.datastore.getitem('ruptures')[()] # order the ruptures by rup_id sorted_ruptures.sort(order='serial') ngroups = len(self.csm.info.trt_by_grp) grp_indices = numpy.zeros((ngroups, 2), U32) grp_ids = sorted_ruptures['grp_id'] for grp_id, [startstop] in get_indices(grp_ids).items(): grp_indices[grp_id] = startstop self.datastore['ruptures'] = sorted_ruptures self.datastore['ruptures']['id'] = numpy.arange(len(sorted_ruptures)) self.datastore.set_attrs('ruptures', grp_indices=grp_indices, **attrs) with self.monitor('saving events'): self.save_events(sorted_ruptures)
def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ oq = self.oqparam if oq.hazard_calculation_id and not oq.compare_with_classical: with util.read(self.oqparam.hazard_calculation_id) as parent: self.full_lt = parent['full_lt'] self.calc_stats() # post-processing return {} assert oq.max_sites_per_tile > oq.max_sites_disagg, ( oq.max_sites_per_tile, oq.max_sites_disagg) psd = self.set_psd() # must go before to set the pointsource_distance run_preclassical(self.csm, oq, self.datastore) # exit early if we want to perform only a preclassical if oq.calculation_mode == 'preclassical': recs = [tuple(row) for row in self.csm.source_info.values()] self.datastore['source_info'] = numpy.array( recs, readinput.source_info_dt) self.datastore['full_lt'] = self.csm.full_lt self.datastore.swmr_on() # fixes HDF5 error in build_hazard return acc0 = self.acc0() # create the rup/ datasets BEFORE swmr_on() smap = parallel.Starmap(classical, self.get_args(acc0), h5=self.datastore.hdf5) smap.monitor.save('srcfilter', self.src_filter()) self.datastore.swmr_on() smap.h5 = self.datastore.hdf5 self.calc_times = AccumDict(accum=numpy.zeros(3, F32)) try: acc = smap.reduce(self.agg_dicts, acc0) self.store_rlz_info(acc.eff_ruptures) finally: source_ids = self.store_source_info(self.calc_times) if self.by_task: logging.info('Storing by_task information') num_tasks = max(self.by_task) + 1, er = self.datastore.create_dset('by_task/eff_ruptures', U32, num_tasks) es = self.datastore.create_dset('by_task/eff_sites', U32, num_tasks) si = self.datastore.create_dset('by_task/srcids', hdf5.vstr, num_tasks, fillvalue=None) for task_no, rec in self.by_task.items(): effrups, effsites, srcids = rec er[task_no] = effrups es[task_no] = effsites si[task_no] = ' '.join(source_ids[s] for s in srcids) self.by_task.clear() if self.calc_times: # can be empty in case of errors self.numctxs = sum(arr[0] for arr in self.calc_times.values()) numsites = sum(arr[1] for arr in self.calc_times.values()) logging.info('Total number of contexts: {:_d}'.format( int(self.numctxs))) logging.info('Average number of sites per context: %d', numsites / self.numctxs) if psd: psdist = max(max(psd.ddic[trt].values()) for trt in psd.ddic) if psdist and self.maxradius >= psdist / 2: logging.warning( 'The pointsource_distance of %d km is too ' 'small compared to a maxradius of %d km', psdist, self.maxradius) self.calc_times.clear() # save a bit of memory return acc
def acc0(self): """ Initial accumulator, a dict grp_id -> ProbabilityMap(L, G) """ zd = AccumDict() num_levels = len(self.oqparam.imtls.array) rparams = {'grp_id', 'occurrence_rate', 'weight', 'probs_occur', 'clon_', 'clat_', 'rrup_'} gsims_by_trt = self.full_lt.get_gsims_by_trt() n = len(self.full_lt.sm_rlzs) trts = list(self.full_lt.gsim_lt.values) for sm in self.full_lt.sm_rlzs: for grp_id in self.full_lt.grp_ids(sm.ordinal): trt = trts[grp_id // n] gsims = gsims_by_trt[trt] cm = ContextMaker(trt, gsims) rparams.update(cm.REQUIRES_RUPTURE_PARAMETERS) for dparam in cm.REQUIRES_DISTANCES: rparams.add(dparam + '_') zd.eff_ruptures = AccumDict(accum=0) # trt -> eff_ruptures if self.few_sites: self.rparams = sorted(rparams) for k in self.rparams: # variable length arrays if k == 'grp_id': self.datastore.create_dset('rup/' + k, U16) elif k == 'probs_occur': # vlen self.datastore.create_dset('rup/' + k, hdf5.vfloat64) elif k.endswith('_'): # array of shape (U, N) self.datastore.create_dset( 'rup/' + k, F32, shape=(None, self.N), compression='gzip') else: self.datastore.create_dset('rup/' + k, F32) else: self.rparams = {} self.by_task = {} # task_no => src_ids self.totrups = 0 # total number of ruptures before collapsing self.maxradius = 0 self.gidx = {tuple(grp_ids): i for i, grp_ids in enumerate(self.datastore['grp_ids'])} # estimate max memory per core max_num_gsims = max(len(gsims) for gsims in gsims_by_trt.values()) max_num_grp_ids = max(len(grp_ids) for grp_ids in self.gidx) pmapbytes = self.N * num_levels * max_num_gsims * max_num_grp_ids * 8 if pmapbytes > TWO32: logging.warning( TOOBIG % (self.N, num_levels, max_num_gsims, max_num_grp_ids, humansize(pmapbytes))) logging.info(MAXMEMORY % (self.N, num_levels, max_num_gsims, max_num_grp_ids, humansize(pmapbytes))) self.Ns = len(self.csm.source_info) if self.oqparam.disagg_by_src: sources = self.get_source_ids() self.datastore.create_dset( 'disagg_by_src', F32, (self.N, self.R, self.M, self.L1, self.Ns)) self.datastore.set_shape_attrs( 'disagg_by_src', site_id=self.N, rlz_id=self.R, imt=list(self.oqparam.imtls), lvl=self.L1, src_id=sources) return zd
def get_fragility_functions(fname, continuous_fragility_discretization, steps_per_interval=None): """ :param fname: path of the fragility file :param continuous_fragility_discretization: continuous_fragility_discretization parameter :param steps_per_interval: steps_per_interval parameter :returns: damage_states list and dictionary taxonomy -> functions """ [fmodel] = read_nodes(fname, lambda el: el.tag.endswith('fragilityModel'), nodefactory['fragilityModel']) # ~fmodel.description is ignored limit_states = ~fmodel.limitStates tag = 'ffc' if fmodel['format'] == 'continuous' else 'ffd' fragility_functions = AccumDict() # taxonomy -> functions for ffs in fmodel.getnodes('ffs'): add_zero_value = False # NB: the noDamageLimit is only defined for discrete fragility # functions. It is a way to set the starting point of the functions: # if noDamageLimit is at the left of each IMLs, it means that the # function starts at zero at the given point, so we need to add # noDamageLimit to the list of IMLs and zero to the list of poes nodamage = ffs.attrib.get('noDamageLimit') taxonomy = ~ffs.taxonomy imt_str, imls, min_iml, max_iml, imlUnit = ~ffs.IML if fmodel['format'] == 'discrete': if nodamage is not None and nodamage < imls[0]: # discrete fragility imls = [nodamage] + imls add_zero_value = True if steps_per_interval: gen_imls = scientific.fine_graining(imls, steps_per_interval) else: gen_imls = imls else: # continuous: if min_iml is None: raise InvalidFile('Missing attribute minIML, line %d' % ffs.IML.lineno) elif max_iml is None: raise InvalidFile('Missing attribute maxIML, line %d' % ffs.IML.lineno) gen_imls = numpy.linspace(min_iml, max_iml, continuous_fragility_discretization) fragility_functions[taxonomy] = scientific.FragilityFunctionList( [], imt=imt_str, imls=list(gen_imls), no_damage_limit=nodamage, continuous_fragility_discretization= continuous_fragility_discretization, steps_per_interval=steps_per_interval) lstates = [] for ff in ffs.getnodes(tag): ls = ff['ls'] # limit state lstates.append(ls) if tag == 'ffc': with context(fname, ff): mean_stddev = ~ff.params fragility_functions[taxonomy].append( scientific.FragilityFunctionContinuous(ls, *mean_stddev)) else: # discrete with context(fname, ff): poes = ~ff.poEs if add_zero_value: poes = [0.] + poes fragility_functions[taxonomy].append( scientific.FragilityFunctionDiscrete( ls, imls, poes, nodamage)) if lstates != limit_states: raise InvalidFile("Expected limit states %s, got %s in %s" % (limit_states, lstates, fname)) fragility_functions.damage_states = ['no_damage'] + limit_states return fragility_functions
def get_composite_source_model(oqparam, h5=None): """ Parse the XML and build a complete composite source model in memory. :param oqparam: an :class:`openquake.commonlib.oqvalidation.OqParam` instance :param h5: an open hdf5.File where to store the source info """ logging.info('Reading the CompositeSourceModel') full_lt = get_full_lt(oqparam) if oqparam.cachedir and not oqparam.is_ucerf(): csm = _get_cachedir(oqparam, full_lt, h5) else: csm = get_csm(oqparam, full_lt, h5) et_ids = csm.get_et_ids() logging.info('%d effective smlt realization(s)', len(full_lt.sm_rlzs)) grp_id = {tuple(arr): i for i, arr in enumerate(et_ids)} data = {} # src_id -> row mags = AccumDict(accum=set()) # trt -> mags wkts = [] lens = [] for sg in csm.src_groups: if hasattr(sg, 'mags'): # UCERF mags[sg.trt].update('%.2f' % mag for mag in sg.mags) for src in sg: lens.append(len(src.et_ids)) src.grp_id = grp_id[tuple(src.et_ids)] row = [ src.source_id, src.grp_id, src.code, 0, 0, 0, src.id, full_lt.trti[src.tectonic_region_type] ] wkts.append(src._wkt) # this is a bit slow but okay data[src.source_id] = row if hasattr(src, 'mags'): # UCERF continue # already accounted for in sg.mags elif hasattr(src, 'data'): # nonparametric srcmags = ['%.2f' % item[0].mag for item in src.data] else: srcmags = [ '%.2f' % item[0] for item in src.get_annual_occurrence_rates() ] mags[sg.trt].update(srcmags) logging.info('There are %d groups and %d sources with len(et_ids)=%.1f', len(csm.src_groups), sum(len(sg) for sg in csm.src_groups), numpy.mean(lens)) if h5: attrs = dict(atomic=any(grp.atomic for grp in csm.src_groups)) # avoid hdf5 damned bug by creating source_info in advance hdf5.create(h5, 'source_info', source_info_dt, attrs=attrs) h5['source_wkt'] = numpy.array(wkts, hdf5.vstr) h5['et_ids'] = et_ids mags_by_trt = {} for trt in mags: mags_by_trt[trt] = arr = numpy.array(sorted(mags[trt])) h5['source_mags/' + trt] = arr oqparam.maximum_distance.interp(mags_by_trt) csm.gsim_lt.check_imts(oqparam.imtls) csm.source_info = data # src_id -> row if os.environ.get('OQ_CHECK_INPUT'): source.check_complex_faults(csm.get_sources()) return csm
def compute_ruptures(sources, sitecol, siteidx, rlzs_assoc, monitor): """ :param sources: List of commonlib.source.Source tuples :param sitecol: a :class:`openquake.hazardlib.site.SiteCollection` instance :param siteidx: always equal to 0 :param rlzs_assoc: a :class:`openquake.commonlib.source.RlzsAssoc` instance :param monitor: monitor instance :returns: a dictionary trt_model_id -> [Rupture instances] """ assert siteidx == 0, ( 'siteidx can be nonzero only for the classical_tiling calculations: ' 'tiling with the EventBasedRuptureCalculator is an error') # NB: by construction each block is a non-empty list with # sources of the same trt_model_id trt_model_id = sources[0].trt_model_id oq = monitor.oqparam trt = sources[0].tectonic_region_type try: max_dist = oq.maximum_distance[trt] except KeyError: max_dist = oq.maximum_distance['default'] cmaker = ContextMaker(rlzs_assoc.gsims_by_trt_id[trt_model_id]) params = cmaker.REQUIRES_RUPTURE_PARAMETERS rup_data_dt = numpy.dtype( [('rupserial', U32), ('multiplicity', U16), ('numsites', U32)] + [ (param, F32) for param in params]) eb_ruptures = [] rup_data = [] calc_times = [] rup_mon = monitor('filtering ruptures', measuremem=False) # Compute and save stochastic event sets for src in sources: t0 = time.time() s_sites = src.filter_sites_by_distance_to_source(max_dist, sitecol) if s_sites is None: continue rupture_filter = RuptureFilter( s_sites, max_dist, oq.imtls, cmaker.gsims, oq.truncation_level, oq.minimum_intensity) num_occ_by_rup = sample_ruptures( src, oq.ses_per_logic_tree_path, rlzs_assoc.csm_info) # NB: the number of occurrences is very low, << 1, so it is # more efficient to filter only the ruptures that occur, i.e. # to call sample_ruptures *before* the filtering for ebr in build_eb_ruptures( src, num_occ_by_rup, rupture_filter, oq.random_seed, rup_mon): nsites = len(ebr.indices) rc = cmaker.make_rupture_context(ebr.rupture) ruptparams = tuple(getattr(rc, param) for param in params) rup_data.append((ebr.serial, len(ebr.etags), nsites) + ruptparams) eb_ruptures.append(ebr) dt = time.time() - t0 calc_times.append((src.id, dt)) res = AccumDict({trt_model_id: eb_ruptures}) res.calc_times = calc_times res.rup_data = numpy.array(rup_data, rup_data_dt) res.trt = trt return res
def full_disaggregation(self, curves): """ Run the disaggregation phase. :param curves: a list of hazard curves, one per site The curves can be all None if iml_disagg is set in the job.ini """ oq = self.oqparam tl = oq.truncation_level src_filter = SourceFilter(self.sitecol, oq.maximum_distance, use_rtree=False) csm = self.csm.filter(src_filter) # fine filtering self.datastore['csm_info'] = csm.info eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) self.bin_edges = {} # build trt_edges trts = tuple( sorted( set(sg.trt for smodel in csm.source_models for sg in smodel.src_groups))) trt_num = {trt: i for i, trt in enumerate(trts)} self.trts = trts # build mag_edges min_mag = min(sg.min_mag for smodel in csm.source_models for sg in smodel.src_groups) max_mag = max(sg.max_mag for smodel in csm.source_models for sg in smodel.src_groups) mag_edges = oq.mag_bin_width * numpy.arange( int(numpy.floor(min_mag / oq.mag_bin_width)), int(numpy.ceil(max_mag / oq.mag_bin_width) + 1)) # build dist_edges maxdist = max(oq.maximum_distance(trt, max_mag) for trt in trts) dist_edges = oq.distance_bin_width * numpy.arange( 0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1)) # build eps_edges eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build lon_edges, lat_edges per sid bbs = src_filter.get_bounding_boxes(mag=max_mag) lon_edges, lat_edges = {}, {} # by sid for sid, bb in zip(self.sitecol.sids, bbs): lon_edges[sid], lat_edges[sid] = disagg.lon_lat_bins( bb, oq.coordinate_bin_width) self.bin_edges = mag_edges, dist_edges, lon_edges, lat_edges, eps_edges self.save_bin_edges() # build all_args all_args = [] maxweight = csm.get_maxweight(oq.concurrent_tasks) mon = self.monitor('disaggregation') R = len(self.rlzs_assoc.realizations) iml4 = disagg.make_iml4(R, oq.imtls, oq.iml_disagg, oq.poes_disagg or (None, ), curves) self.imldict = {} # sid, rlzi, poe, imt -> iml for s in self.sitecol.sids: for r in range(R): for p, poe in enumerate(oq.poes_disagg or [None]): for m, imt in enumerate(oq.imtls): self.imldict[s, r, poe, imt] = iml4[s, r, m, p] for smodel in csm.source_models: sm_id = smodel.ordinal for trt, groups in groupby(smodel.src_groups, operator.attrgetter('trt')).items(): trti = trt_num[trt] sources = sum([grp.sources for grp in groups], []) rlzs_by_gsim = self.rlzs_assoc.get_rlzs_by_gsim(trt, sm_id) cmaker = ContextMaker(rlzs_by_gsim, src_filter.integration_distance) for block in csm.split_in_blocks(maxweight, sources): all_args.append((src_filter, block, cmaker, iml4, trti, self.bin_edges, oq, mon)) self.num_ruptures = [0] * len(self.trts) self.cache_info = numpy.zeros(3) # operations, cache_hits, num_zeros results = parallel.Starmap(compute_disagg, all_args).reduce(self.agg_result, AccumDict(accum={})) ops, hits, num_zeros = self.cache_info logging.info('Cache speedup %s', ops / (ops - hits)) logging.info('Discarded zero matrices: %d', num_zeros) return results
def build_events_from_sources(self): """ Prefilter the composite source model and store the source_info """ gsims_by_trt = self.csm.full_lt.get_gsims_by_trt() sources = self.csm.get_sources() # weighting the heavy sources nrups = parallel.Starmap(count_ruptures, [(src, ) for src in sources if src.code in b'AMC'], h5=self.datastore.hdf5).reduce() for src in sources: src.nsites = 1 # avoid 0 weight try: src.num_ruptures = nrups[src.source_id] except KeyError: src.num_ruptures = src.count_ruptures() maxweight = sum(sg.weight for sg in self.csm.src_groups) / ( self.oqparam.concurrent_tasks or 1) eff_ruptures = AccumDict(accum=0) # trt => potential ruptures calc_times = AccumDict(accum=numpy.zeros(3, F32)) # nr, ns, dt allargs = [] if self.oqparam.is_ucerf(): # manage the filtering in a special way for sg in self.csm.src_groups: for src in sg: src.src_filter = self.srcfilter srcfilter = nofilter # otherwise it would be ultra-slow else: srcfilter = self.srcfilter logging.info('Building ruptures') for sg in self.csm.src_groups: if not sg.sources: continue logging.info('Sending %s', sg) par = self.param.copy() par['gsims'] = gsims_by_trt[sg.trt] for src_group in sg.split(maxweight): allargs.append((src_group, srcfilter, par)) smap = parallel.Starmap(sample_ruptures, allargs, h5=self.datastore.hdf5) mon = self.monitor('saving ruptures') self.nruptures = 0 for dic in smap: # NB: dic should be a dictionary, but when the calculation dies # for an OOM it can become None, thus giving a very confusing error if dic is None: raise MemoryError('You ran out of memory!') rup_array = dic['rup_array'] if len(rup_array) == 0: continue if dic['calc_times']: calc_times += dic['calc_times'] if dic['eff_ruptures']: eff_ruptures += dic['eff_ruptures'] with mon: n = len(rup_array) rup_array['id'] = numpy.arange(self.nruptures, self.nruptures + n) self.nruptures += n hdf5.extend(self.datastore['ruptures'], rup_array) hdf5.extend(self.datastore['rupgeoms'], rup_array.geom) if len(self.datastore['ruptures']) == 0: raise RuntimeError('No ruptures were generated, perhaps the ' 'investigation time is too short') # must be called before storing the events self.store_rlz_info(eff_ruptures) # store full_lt self.store_source_info(calc_times) imp = calc.RuptureImporter(self.datastore) with self.monitor('saving ruptures and events'): imp.import_rups(self.datastore.getitem('ruptures')[()])
def build_events_from_sources(self): """ Prefilter the composite source model and store the source_info """ oq = self.oqparam gsims_by_trt = self.csm.gsim_lt.values def weight_src(src): return src.num_ruptures logging.info('Building ruptures') smap = parallel.Starmap( self.build_ruptures.__func__, monitor=self.monitor()) eff_ruptures = AccumDict(accum=0) # grp_id => potential ruptures calc_times = AccumDict(accum=numpy.zeros(3, F32)) ses_idx = 0 for sm_id, sm in enumerate(self.csm.source_models): logging.info('Sending %s', sm) for sg in sm.src_groups: if not sg.sources: continue par = self.param.copy() par['gsims'] = gsims_by_trt[sg.trt] for block in self.block_splitter( sg.sources, weight_src, by_grp): if 'ucerf' in oq.calculation_mode: for i in range(oq.ses_per_logic_tree_path): par['ses_seeds'] = [(ses_idx, oq.ses_seed + i + 1)] smap.submit(block, self.src_filter, par) ses_idx += 1 else: smap.submit(block, self.src_filter, par) mon = self.monitor('saving ruptures') for dic in smap: if dic['calc_times']: calc_times += dic['calc_times'] if dic['eff_ruptures']: eff_ruptures += dic['eff_ruptures'] if dic['rup_array']: with mon: self.rupser.save(dic['rup_array']) self.rupser.close() if not self.rupser.nruptures: raise RuntimeError('No ruptures were generated, perhaps the ' 'investigation time is too short') # logic tree reduction, must be called before storing the events self.store_rlz_info(eff_ruptures) store_rlzs_by_grp(self.datastore) self.init_logic_tree(self.csm.info) with self.monitor('store source_info', autoflush=True): self.store_source_info(calc_times) logging.info('Reordering the ruptures and storing the events') attrs = self.datastore.getitem('ruptures').attrs sorted_ruptures = self.datastore.getitem('ruptures').value # order the ruptures by serial sorted_ruptures.sort(order='serial') ngroups = len(self.csm.info.trt_by_grp) grp_indices = numpy.zeros((ngroups, 2), U32) grp_ids = sorted_ruptures['grp_id'] for grp_id, [startstop] in get_indices(grp_ids).items(): grp_indices[grp_id] = startstop self.datastore['ruptures'] = sorted_ruptures self.datastore.set_attrs('ruptures', grp_indices=grp_indices, **attrs) self.save_events(sorted_ruptures)
def scenario_damage(riskinputs, param, monitor): """ Core function for a damage computation. :param riskinputs: :class:`openquake.risklib.riskinput.RiskInput` objects :param monitor: :class:`openquake.baselib.performance.Monitor` instance :param param: dictionary of extra parameters :returns: a dictionary {'d_asset': [(l, r, a, mean-stddev), ...], 'd_event': dict eid -> array of shape (L, D) + optional consequences} `d_asset` and `d_tag` are related to the damage distributions. """ crmodel = monitor.read('crmodel') L = len(crmodel.loss_types) D = len(crmodel.damage_states) consequences = crmodel.get_consequences() # algorithm used to compute the discrete damage distributions approx_ddd = param['approx_ddd'] z = numpy.zeros((L, D - 1), F32 if approx_ddd else U32) d_event = AccumDict(accum=z) res = {'d_event': d_event, 'd_asset': []} for name in consequences: res['avg_' + name] = [] res[name + '_by_event'] = AccumDict(accum=numpy.zeros(L, F64)) # using F64 here is necessary: with F32 the non-commutativity # of addition would hurt too much with multiple tasks seed = param['master_seed'] num_events = param['num_events'] # per realization for ri in riskinputs: # here instead F32 floats are ok acc = [] # (aid, eid, lid, ds...) ri.hazard_getter.init() for out in ri.gen_outputs(crmodel, monitor): r = out.rlzi ne = num_events[r] # total number of events for l, loss_type in enumerate(crmodel.loss_types): for asset, fractions in zip(ri.assets, out[loss_type]): aid = asset['ordinal'] if approx_ddd: ddds = fractions * asset['number'] else: ddds = bin_ddd(fractions, asset['number'], seed + aid) # ddds has shape E', D with E' == len(out.eids) for e, ddd in enumerate(ddds): dmg = ddd[1:] if dmg.sum(): eid = out.eids[e] # (aid, eid, l) is unique acc.append((aid, eid, l) + tuple(dmg)) d_event[eid][l] += ddd[1:] tot = ddds.sum(axis=0) # shape D nodamage = asset['number'] * (ne - len(ddds)) tot[0] += nodamage res['d_asset'].append((l, r, aid, tot)) # TODO: use the ddd, not the fractions in compute_csq csq = crmodel.compute_csq(asset, fractions, loss_type) for name, values in csq.items(): res['avg_%s' % name].append( (l, r, asset['ordinal'], values.sum(axis=0))) by_event = res[name + '_by_event'] for eid, value in zip(out.eids, values): by_event[eid][l] += value res['aed'] = numpy.array(acc, param['asset_damage_dt']) return res
def scenario_damage(riskinputs, crmodel, param, monitor): """ Core function for a damage computation. :param riskinputs: :class:`openquake.risklib.riskinput.RiskInput` objects :param crmodel: a :class:`openquake.risklib.riskinput.CompositeRiskModel` instance :param monitor: :class:`openquake.baselib.performance.Monitor` instance :param param: dictionary of extra parameters :returns: a dictionary {'d_asset': [(l, r, a, mean-stddev), ...], 'd_event': dict eid -> array of shape (L, D) + optional consequences} `d_asset` and `d_tag` are related to the damage distributions. """ L = len(crmodel.loss_types) D = len(crmodel.damage_states) consequences = crmodel.get_consequences() haz_mon = monitor('getting hazard', measuremem=False) rsk_mon = monitor('aggregating risk', measuremem=False) d_event = AccumDict(accum=numpy.zeros((L, D - 1), U32)) res = {'d_event': d_event} for name in consequences: res[name + '_by_event'] = AccumDict(accum=numpy.zeros(L, F64)) # using F64 here is necessary: with F32 the non-commutativity # of addition would hurt too much with multiple tasks seed = param['master_seed'] # algorithm used to compute the discrete damage distributions make_ddd = approx_ddd if param['approx_ddd'] else bin_ddd for ri in riskinputs: # otherwise test 4b will randomly break with last digit changes # in dmg_by_event :-( result = dict(d_asset=[]) for name in consequences: result[name + '_by_asset'] = [] ddic = AccumDict(accum=numpy.zeros((L, D - 1), F32)) # aid,eid->dd with haz_mon: ri.hazard_getter.init() for out in ri.gen_outputs(crmodel, monitor): with rsk_mon: r = out.rlzi for l, loss_type in enumerate(crmodel.loss_types): for asset, fractions in zip(ri.assets, out[loss_type]): aid = asset['ordinal'] ddds = make_ddd(fractions, asset['number'], seed + aid) for e, ddd in enumerate(ddds): eid = out.eids[e] ddic[aid, eid][l] = ddd[1:] d_event[eid][l] += ddd[1:] if make_ddd is approx_ddd: ms = mean_std(fractions * asset['number']) else: ms = mean_std(ddds) result['d_asset'].append((l, r, asset['ordinal'], ms)) # TODO: use the ddd, not the fractions in compute_csq csq = crmodel.compute_csq(asset, fractions, loss_type) for name, values in csq.items(): result[name + '_by_asset'].append( (l, r, asset['ordinal'], mean_std(values))) by_event = res[name + '_by_event'] for eid, value in zip(out.eids, values): by_event[eid][l] += value with rsk_mon: result['aed'] = aed = numpy.zeros(len(ddic), param['aed_dt']) for i, ((aid, eid), dd) in enumerate(sorted(ddic.items())): aed[i] = (aid, eid, dd) yield result yield res
def run_preclassical(csm, oqparam, h5): """ :param csm: a CompositeSourceModel with attribute .srcfilter :param oqparam: the parameters in job.ini file :param h5: a DataStore instance """ # do nothing for atomic sources except counting the ruptures for src in csm.get_sources(atomic=True): src.num_ruptures = src.count_ruptures() src.nsites = len(csm.sitecol) if csm.sitecol else 1 # run preclassical for non-atomic sources sources_by_grp = groupby(csm.get_sources(atomic=False), lambda src: (src.grp_id, msr_name(src))) param = dict(maximum_distance=oqparam.maximum_distance, pointsource_distance=oqparam.pointsource_distance, ps_grid_spacing=oqparam.ps_grid_spacing, split_sources=oqparam.split_sources) srcfilter = SourceFilter( csm.sitecol.reduce(10000) if csm.sitecol else None, oqparam.maximum_distance) if csm.sitecol: logging.info('Sending %s', srcfilter.sitecol) if oqparam.ps_grid_spacing: # produce a preclassical task for each group allargs = ((srcs, srcfilter, param) for srcs in sources_by_grp.values()) else: # produce many preclassical task maxw = sum(len(srcs) for srcs in sources_by_grp.values()) / ( oqparam.concurrent_tasks or 1) allargs = ((blk, srcfilter, param) for srcs in sources_by_grp.values() for blk in block_splitter(srcs, maxw)) res = parallel.Starmap( preclassical, allargs, h5=h5, distribute=None if len(sources_by_grp) > 1 else 'no').reduce() if res and res['before'] != res['after']: logging.info( 'Reduced the number of sources from {:_d} -> {:_d}'.format( res['before'], res['after'])) if res and h5: csm.update_source_info(res['calc_times'], nsites=True) acc = AccumDict(accum=0) code2cls = get_code2cls() for grp_id, srcs in res.items(): # srcs can be empty if the minimum_magnitude filter is on if srcs and not isinstance(grp_id, str): newsg = SourceGroup(srcs[0].tectonic_region_type) newsg.sources = srcs csm.src_groups[grp_id] = newsg for src in srcs: acc[src.code] += int(src.num_ruptures) for val, key in sorted((val, key) for key, val in acc.items()): cls = code2cls[key].__name__ logging.info('{} ruptures: {:_d}'.format(cls, val)) # sanity check for sg in csm.src_groups: for src in sg: assert src.num_ruptures assert src.nsites # store ps_grid data, if any for key, sources in res.items(): if isinstance(key, str) and key.startswith('ps_grid/'): arrays = [] for ps in sources: if hasattr(ps, 'location'): lonlats = [ps.location.x, ps.location.y] for src in getattr(ps, 'pointsources', []): lonlats.extend([src.location.x, src.location.y]) arrays.append(F32(lonlats)) h5[key] = arrays
def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ oq = self.oqparam if oq.hazard_calculation_id and not oq.compare_with_classical: with datastore.read(self.oqparam.hazard_calculation_id) as parent: self.full_lt = parent['full_lt'] self.store_stats() # post-processing return {} assert oq.max_sites_per_tile > oq.max_sites_disagg, ( oq.max_sites_per_tile, oq.max_sites_disagg) psd = self.set_psd() # must go before to set the pointsource_distance run_preclassical(self.csm, oq, self.datastore) # exit early if we want to perform only a preclassical if oq.calculation_mode == 'preclassical': recs = [tuple(row) for row in self.csm.source_info.values()] self.datastore['source_info'] = numpy.array( recs, readinput.source_info_dt) self.datastore['full_lt'] = self.csm.full_lt self.datastore.swmr_on() # fixes HDF5 error in build_hazard return self.create_dsets() # create the rup/ datasets BEFORE swmr_on() grp_ids = numpy.arange(len(self.csm.src_groups)) self.calc_times = AccumDict(accum=numpy.zeros(3, F32)) weights = [rlz.weight for rlz in self.realizations] pgetter = getters.PmapGetter(self.datastore, weights, self.sitecol.sids, oq.imtls) srcidx = { rec[0]: i for i, rec in enumerate(self.csm.source_info.values()) } self.haz = Hazard(self.datastore, self.full_lt, pgetter, srcidx, self.monitor('storing _poes', measuremem=True)) args = self.get_args(grp_ids, self.haz.cmakers) self.counts = collections.Counter(arg[0][0].grp_id for arg in args) logging.info('grp_id->ntasks: %s', list(self.counts.values())) h5 = self.datastore.hdf5 if self.N > oq.max_sites_per_tile: smap = parallel.Starmap(classical_tile, args, h5=h5) else: smap = parallel.Starmap(classical, args, h5=h5) smap.monitor.save('sitecol', self.sitecol) self.datastore.swmr_on() smap.h5 = self.datastore.hdf5 acc = {} for grp_id, num_tasks in self.counts.items(): if num_tasks > 1: self.haz.init(acc, grp_id) logging.info('Sending %d tasks', len(args)) smap.reduce(self.agg_dicts, acc) logging.debug("busy time: %s", smap.busytime) self.haz.store_disagg(acc) if not oq.hazard_calculation_id: self.haz.store_disagg() self.store_info(psd) logging.info('Saving _poes') for grp_id in list(acc): if isinstance(grp_id, int): self.haz.store_poes(grp_id, acc.pop(grp_id)) return True
def scenario_damage(riskinputs, param, monitor): """ Core function for a damage computation. :param riskinputs: :class:`openquake.risklib.riskinput.RiskInput` objects :param monitor: :class:`openquake.baselib.performance.Monitor` instance :param param: dictionary of extra parameters :returns: a dictionary of arrays """ crmodel = monitor.read('crmodel') L = len(crmodel.loss_types) D = len(crmodel.damage_states) consequences = crmodel.get_consequences() # algorithm used to compute the discrete damage distributions float_dmg_dist = param['float_dmg_dist'] z = numpy.zeros((L, D - 1), F32 if float_dmg_dist else U32) d_event = AccumDict(accum=z) res = {'d_event': d_event, 'd_asset': []} for name in consequences: res['avg_' + name] = [] res[name + '_by_event'] = AccumDict(accum=numpy.zeros(L, F64)) # using F64 here is necessary: with F32 the non-associativity # of addition would hurt too much with multiple tasks seed = param['master_seed'] num_events = param['num_events'] # per realization acc = [] # (aid, eid, lid, ds...) sec_sims = param['secondary_simulations'].items() for ri in riskinputs: # here instead F32 floats are ok R = ri.hazard_getter.num_rlzs for out in ri.gen_outputs(crmodel, monitor): for r in range(R): ne = num_events[r] # total number of events ok = out['haz'].rlz.to_numpy() == r # events beloging to rlz r if ok.sum() == 0: continue eids = out['eids'][ok] for lti, loss_type in enumerate(crmodel.loss_types): for asset, fractions in zip( out['assets'], out[loss_type][:, ok]): aid = asset['ordinal'] if float_dmg_dist: damages = fractions * asset['number'] if sec_sims: run_sec_sims( damages, out['haz'][ok], sec_sims, seed + aid) else: damages = bin_ddd( fractions, asset['number'], seed + aid) # damages has shape E', D with E' == len(eids) for e, ddd in enumerate(damages): dmg = ddd[1:] if dmg.sum(): eid = eids[e] # (aid, eid, l) is unique acc.append((aid, eid, lti) + tuple(dmg)) d_event[eid][lti] += ddd[1:] tot = damages.sum(axis=0) # (E', D) -> D nodamage = asset['number'] * (ne - len(damages)) tot[0] += nodamage res['d_asset'].append((lti, r, aid, tot)) # TODO: use the ddd, not the fractions in compute_csq csq = crmodel.compute_csq(asset, fractions, loss_type) for name, values in csq.items(): res['avg_%s' % name].append( (lti, r, asset['ordinal'], values.sum(axis=0))) by_event = res[name + '_by_event'] for eid, value in zip(eids, values): by_event[eid][lti] += value res['aed'] = numpy.array(acc, param['asset_damage_dt']) return res
def compute_disagg(dstore, rctx, cmaker, hmap4, trti, bin_edges, oq, monitor): # see https://bugs.launchpad.net/oq-engine/+bug/1279247 for an explanation # of the algorithm used """ :param dstore: a DataStore instance :param rctx: an array of rupture parameters :param cmaker: a :class:`openquake.hazardlib.gsim.base.ContextMaker` instance :param hmap4: an ArrayWrapper of shape (N, M, P, Z) :param trti: tectonic region type index :param magi: magnitude bin index :param bin_egdes: a quartet (dist_edges, lon_edges, lat_edges, eps_edges) :param monitor: monitor of the currently running job :returns: a dictionary sid, imti -> 6D-array """ RuptureContext.temporal_occurrence_model = PoissonTOM( oq.investigation_time) with monitor('reading contexts', measuremem=True): dstore.open('r') ctxs, close_ctxs = read_ctxs( dstore, rctx, req_site_params=cmaker.REQUIRES_SITES_PARAMETERS) magi = numpy.searchsorted(bin_edges[0], rctx[0]['mag']) - 1 if magi == -1: # when the magnitude is on the edge magi = 0 dis_mon = monitor('disaggregate', measuremem=False) ms_mon = monitor('disagg mean_std', measuremem=True) N, M, P, Z = hmap4.shape g_by_z = AccumDict(accum={}) # dict s -> z -> g for g, rlzs in enumerate(cmaker.gsims.values()): for (s, z), r in numpy.ndenumerate(hmap4.rlzs): if r in rlzs: g_by_z[s][z] = g eps3 = disagg._eps3(cmaker.trunclevel, oq.num_epsilon_bins) res = {'trti': trti, 'magi': magi} imts = [from_string(im) for im in oq.imtls] with ms_mon: # compute mean and std for a single IMT to save memory # the size is N * U * G * 16 bytes disagg.set_mean_std(ctxs, imts, cmaker.gsims) # disaggregate by site, IMT for s, iml3 in enumerate(hmap4): if not g_by_z[s] or not close_ctxs[s]: # g_by_z[s] is empty in test case_7 continue # dist_bins, lon_bins, lat_bins, eps_bins bins = (bin_edges[1], bin_edges[2][s], bin_edges[3][s], bin_edges[4]) iml2 = dict(zip(imts, iml3)) with dis_mon: # 7D-matrix #distbins, #lonbins, #latbins, #epsbins, M, P, Z matrix = disagg.disaggregate(close_ctxs[s], g_by_z[s], iml2, eps3, s, bins) # 7D-matrix for m in range(M): mat6 = matrix[..., m, :, :] if mat6.any(): res[s, m] = output(mat6) return res
def execute(self): oq = self.oqparam self.set_param() self.offset = 0 srcfilter = self.src_filter(self.datastore.tempname) self.indices = AccumDict(accum=[]) # sid, idx -> indices if oq.hazard_calculation_id: # from ruptures self.datastore.parent = util.read(oq.hazard_calculation_id) self.init_logic_tree(self.datastore.parent['full_lt']) else: # from sources self.build_events_from_sources(srcfilter) if (oq.ground_motion_fields is False and oq.hazard_curves_from_gmfs is False): return {} if not oq.imtls: raise InvalidFile('There are no intensity measure types in %s' % oq.inputs['job_ini']) N = len(self.sitecol.complete) if oq.ground_motion_fields: nrups = len(self.datastore['ruptures']) self.datastore.create_dset('gmf_data/data', oq.gmf_data_dt()) self.datastore.create_dset('gmf_data/sigma_epsilon', sig_eps_dt(oq.imtls)) self.datastore.create_dset('gmf_data/indices', hdf5.vuint32, shape=(N, 2), fillvalue=None) self.datastore.create_dset('gmf_data/events_by_sid', U32, (N, )) self.datastore.create_dset('gmf_data/time_by_rup', time_dt, (nrups, ), fillvalue=None) if oq.hazard_curves_from_gmfs: self.param['rlz_by_event'] = self.datastore['events']['rlz_id'] # compute_gmfs in parallel self.datastore.swmr_on() logging.info('Reading %d ruptures', len(self.datastore['ruptures'])) iterargs = ( (rgetter, srcfilter, self.param) for rgetter in gen_rupture_getters(self.datastore, srcfilter)) acc = parallel.Starmap(self.core_task.__func__, iterargs, h5=self.datastore.hdf5, num_cores=oq.num_cores).reduce( self.agg_dicts, self.acc0()) if self.indices: dset = self.datastore['gmf_data/indices'] num_evs = self.datastore['gmf_data/events_by_sid'] logging.info('Saving gmf_data/indices') with self.monitor('saving gmf_data/indices', measuremem=True): self.datastore['gmf_data/imts'] = ' '.join(oq.imtls) for sid in self.sitecol.complete.sids: start = numpy.array(self.indices[sid, 0]) stop = numpy.array(self.indices[sid, 1]) dset[sid, 0] = start dset[sid, 1] = stop num_evs[sid] = (stop - start).sum() avg_events_by_sid = num_evs[()].sum() / N logging.info('Found ~%d GMVs per site', avg_events_by_sid) elif oq.ground_motion_fields: raise RuntimeError('No GMFs were generated, perhaps they were ' 'all below the minimum_intensity threshold') return acc
def get_risk_models(oqparam, kind='vulnerability fragility consequence ' 'vulnerability_retrofitted'): """ :param oqparam: an OqParam instance :param kind: a space-separated string with the kinds of risk models to read :returns: a dictionary riskid -> loss_type, kind -> function """ kinds = kind.split() rmodels = AccumDict() for kind in kinds: for key in sorted(oqparam.inputs): mo = re.match('(occupants|%s)_%s$' % (COST_TYPE_REGEX, kind), key) if mo: loss_type = mo.group(1) # the cost_type in the key # can be occupants, structural, nonstructural, ... rmodel = nrml.to_python(oqparam.inputs[key]) if len(rmodel) == 0: raise InvalidFile('%s is empty!' % oqparam.inputs[key]) rmodels[loss_type, kind] = rmodel if rmodel.lossCategory is None: # NRML 0.4 continue cost_type = str(rmodel.lossCategory) rmodel_kind = rmodel.__class__.__name__ kind_ = kind.replace('_retrofitted', '') # strip retrofitted if not rmodel_kind.lower().startswith(kind_): raise ValueError('Error in the file "%s_file=%s": is ' 'of kind %s, expected %s' % (key, oqparam.inputs[key], rmodel_kind, kind.capitalize() + 'Model')) if cost_type != loss_type: raise ValueError( 'Error in the file "%s_file=%s": lossCategory is of ' 'type "%s", expected "%s"' % (key, oqparam.inputs[key], rmodel.lossCategory, loss_type)) rdict = AccumDict(accum={}) rdict.limit_states = [] for (loss_type, kind), rm in sorted(rmodels.items()): if kind == 'fragility': # build a copy of the FragilityModel with different IM levels newfm = rm.build(oqparam.continuous_fragility_discretization, oqparam.steps_per_interval) for (imt, riskid), ffl in sorted(newfm.items()): if not rdict.limit_states: rdict.limit_states.extend(rm.limitStates) # we are rejecting the case of loss types with different # limit states; this may change in the future assert rdict.limit_states == rm.limitStates, ( rdict.limit_states, rm.limitStates) rdict[riskid][loss_type, kind] = ffl # TODO: see if it is possible to remove the attribute # below, used in classical_damage ffl.steps_per_interval = oqparam.steps_per_interval elif kind == 'consequence': for riskid, cf in sorted(rm.items()): rdict[riskid][loss_type, kind] = cf else: # vulnerability, vulnerability_retrofitted cl_risk = oqparam.calculation_mode in ('classical', 'classical_risk') # only for classical_risk reduce the loss_ratios # to make sure they are strictly increasing for (imt, riskid), rf in sorted(rm.items()): rdict[riskid][loss_type, kind] = (rf.strictly_increasing() if cl_risk else rf) return rdict
def classical(group, src_filter, gsims, param, monitor=Monitor()): """ Compute the hazard curves for a set of sources belonging to the same tectonic region type for all the GSIMs associated to that TRT. The arguments are the same as in :func:`calc_hazard_curves`, except for ``gsims``, which is a list of GSIM instances. :returns: a dictionary {grp_id: pmap} with attributes .grp_ids, .calc_times, .eff_ruptures """ if getattr(group, 'src_interdep', None) == 'mutex': mutex_weight = { src.source_id: weight for src, weight in zip(group.sources, group.srcs_weights) } else: mutex_weight = None grp_ids = set() for src in group: grp_ids.update(src.src_group_ids) maxdist = src_filter.integration_distance imtls = param['imtls'] trunclevel = param.get('truncation_level') cmaker = ContextMaker(gsims, maxdist, param['filter_distance'], monitor) pmap = AccumDict({ grp_id: ProbabilityMap(len(imtls.array), len(gsims)) for grp_id in grp_ids }) # AccumDict of arrays with 4 elements weight, nsites, calc_time, split pmap.calc_times = AccumDict(accum=numpy.zeros(4)) pmap.eff_ruptures = AccumDict() # grp_id -> num_ruptures for src, s_sites in src_filter(group): # filter now t0 = time.time() indep = group.rup_interdep == 'indep' if mutex_weight else True try: poemap = cmaker.poe_map(src, s_sites, imtls, trunclevel, indep) except Exception as err: etype, err, tb = sys.exc_info() msg = '%s (source id=%s)' % (str(err), src.source_id) raise etype(msg).with_traceback(tb) if mutex_weight: # mutex sources weight = mutex_weight[src.source_id] for sid in poemap: pcurve = pmap[group.id].setdefault(sid, 0) pcurve += poemap[sid] * weight elif poemap: for grp_id in src.src_group_ids: pmap[grp_id] |= poemap src_id = src.source_id.split(':', 1)[0] pmap.calc_times[src_id] += numpy.array( [src.weight, len(s_sites), time.time() - t0, 1]) # storing the number of contributing ruptures too pmap.eff_ruptures += { grp_id: getattr(poemap, 'eff_ruptures', 0) for grp_id in src.src_group_ids } if mutex_weight and group.grp_probability is not None: pmap[group.id] *= group.grp_probability return pmap
def event_based_risk(riskinputs, crmodel, param, monitor): """ :param riskinputs: :class:`openquake.risklib.riskinput.RiskInput` objects :param crmodel: a :class:`openquake.risklib.riskinput.CompositeRiskModel` instance :param param: a dictionary of parameters :param monitor: :class:`openquake.baselib.performance.Monitor` instance :returns: a dictionary of numpy arrays of shape (L, R) """ L = len(crmodel.lti) tempname = param['tempname'] for ri in riskinputs: with monitor('getting hazard'): ri.hazard_getter.init() hazard = ri.hazard_getter.get_hazard() mon = monitor('build risk curves', measuremem=False) A = len(ri.aids) R = ri.hazard_getter.num_rlzs try: avg = numpy.zeros((A, R, L), F32) except MemoryError: raise MemoryError( 'Building array avg of shape (%d, %d, %d)' % (A, R, L)) result = dict(aids=ri.aids, avglosses=avg) acc = AccumDict() # accumulator eidx -> agglosses aid2idx = {aid: idx for idx, aid in enumerate(ri.aids)} if 'builder' in param: builder = param['builder'] P = len(builder.return_periods) all_curves = numpy.zeros((A, R, P), builder.loss_dt) # update the result dictionary and the agg array with each output for out in ri.gen_outputs(crmodel, monitor, tempname, hazard): if len(out.eids) == 0: # this happens for sites with no events continue r = out.rlzi agglosses = numpy.zeros((len(out.eids), L), F32) for l, loss_type in enumerate(crmodel.loss_types): loss_ratios = out[loss_type] if loss_ratios is None: # for GMFs below the minimum_intensity continue avalues = riskmodels.get_values(loss_type, ri.assets) for a, asset in enumerate(ri.assets): aval = avalues[a] aid = asset['ordinal'] idx = aid2idx[aid] ratios = loss_ratios[a] # length E # average losses avg[idx, r, l] = ( ratios.sum(axis=0) * param['ses_ratio'] * aval) # agglosses agglosses[:, l] += ratios * aval if 'builder' in param: with mon: # this is the heaviest part all_curves[idx, r][loss_type] = ( builder.build_curve(aval, ratios, r)) # NB: I could yield the agglosses per output, but then I would # have millions of small outputs with big data transfer and slow # saving time acc += dict(zip(out.eids, agglosses)) if 'builder' in param: clp = param['conditional_loss_poes'] result['curves-rlzs'], result['curves-stats'] = builder.pair( all_curves, param['stats']) if R > 1 and param['individual_curves'] is False: del result['curves-rlzs'] if clp: result['loss_maps-rlzs'], result['loss_maps-stats'] = ( builder.build_maps(all_curves, clp, param['stats'])) if R > 1 and param['individual_curves'] is False: del result['loss_maps-rlzs'] # store info about the GMFs, must be done at the end result['agglosses'] = (numpy.array(list(acc)), numpy.array(list(acc.values()))) yield result
def reduce(self, agg=operator.add, acc=None): if acc is None: acc = AccumDict() for result in self: acc = agg(acc, result) return acc
def full_disaggregation(self): """ Run the disaggregation phase. """ oq = self.oqparam tl = oq.truncation_level src_filter = self.src_filter() if hasattr(self, 'csm'): for sg in self.csm.src_groups: if sg.atomic: raise NotImplementedError( 'Atomic groups are not supported yet') self.full_lt = self.datastore['full_lt'] self.poes_disagg = oq.poes_disagg or (None,) self.imts = list(oq.imtls) self.ws = [rlz.weight for rlz in self.full_lt.get_realizations()] self.pgetter = getters.PmapGetter( self.datastore, self.ws, self.sitecol.sids) # build array rlzs (N, Z) if oq.rlz_index is None: Z = oq.num_rlzs_disagg rlzs = numpy.zeros((self.N, Z), int) if self.R > 1: for sid in self.sitecol.sids: curves = numpy.array( [pc.array for pc in self.pgetter.get_pcurves(sid)]) mean = getters.build_stat_curve( curves, oq.imtls, stats.mean_curve, self.ws) rlzs[sid] = util.closest_to_ref(curves, mean.array)[:Z] self.datastore['best_rlzs'] = rlzs else: Z = len(oq.rlz_index) rlzs = numpy.zeros((self.N, Z), int) for z in range(Z): rlzs[:, z] = oq.rlz_index[z] assert Z <= self.R, (Z, self.R) self.Z = Z self.rlzs = rlzs if oq.iml_disagg: # no hazard curves are needed self.poe_id = {None: 0} curves = [[None for z in range(Z)] for s in range(self.N)] self.ok_sites = set(self.sitecol.sids) else: self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)} curves = [self.get_curve(sid, rlzs[sid]) for sid in self.sitecol.sids] self.ok_sites = set(self.check_poes_disagg(curves, rlzs)) self.iml4 = _iml4(rlzs, oq.iml_disagg, oq.imtls, self.poes_disagg, curves) if oq.disagg_by_src: self.build_disagg_by_src(rlzs) eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build trt_edges trts = tuple(self.full_lt.trts) trt_num = {trt: i for i, trt in enumerate(trts)} self.trts = trts # build mag_edges mags = [float(mag) for mag in self.datastore['source_mags']] mag_edges = oq.mag_bin_width * numpy.arange( int(numpy.floor(min(mags) / oq.mag_bin_width)), int(numpy.ceil(max(mags) / oq.mag_bin_width) + 1)) # build dist_edges maxdist = max(oq.maximum_distance(trt) for trt in trts) dist_edges = oq.distance_bin_width * numpy.arange( 0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1)) # build eps_edges eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build lon_edges, lat_edges per sid bbs = src_filter.get_bounding_boxes(mag=max(mags)) lon_edges, lat_edges = {}, {} # by sid for sid, bb in zip(self.sitecol.sids, bbs): lon_edges[sid], lat_edges[sid] = disagg.lon_lat_bins( bb, oq.coordinate_bin_width) self.bin_edges = mag_edges, dist_edges, lon_edges, lat_edges, eps_edges shapedic = self.save_bin_edges() del shapedic['trt'] shapedic['N'] = self.N shapedic['M'] = len(oq.imtls) shapedic['P'] = len(oq.poes_disagg) shapedic['Z'] = Z shapedic['concurrent_tasks'] = oq.concurrent_tasks nbytes, msg = get_array_nbytes(shapedic) if nbytes > oq.max_data_transfer: raise ValueError('Estimated data transfer too big\n%s' % msg) logging.info('Estimated data transfer: %s', msg) self.imldict = {} # sid, rlz, poe, imt -> iml for s in self.sitecol.sids: for z, rlz in enumerate(rlzs[s]): for p, poe in enumerate(self.poes_disagg): for m, imt in enumerate(oq.imtls): self.imldict[s, rlz, poe, imt] = self.iml4[s, m, p, z] # submit #groups disaggregation tasks dstore = (self.datastore.parent if self.datastore.parent else self.datastore) indices = get_indices(dstore, oq.concurrent_tasks or 1) self.datastore.swmr_on() smap = parallel.Starmap(compute_disagg, h5=self.datastore.hdf5) for grp_id, trt in self.full_lt.trt_by_grp.items(): logging.info('Group #%d, sending rup_data for %s', grp_id, trt) trti = trt_num[trt] cmaker = ContextMaker( trt, self.full_lt.get_rlzs_by_gsim(grp_id), {'truncation_level': oq.truncation_level, 'maximum_distance': src_filter.integration_distance, 'filter_distance': oq.filter_distance, 'imtls': oq.imtls}) for idxs in indices[grp_id]: smap.submit((dstore, idxs, cmaker, self.iml4, trti, self.bin_edges)) results = smap.reduce(self.agg_result, AccumDict(accum={})) return results # sid -> trti-> 8D array
def get_risk_models(oqparam, kind=None): """ :param oqparam: an OqParam instance :param kind: vulnerability|vulnerability_retrofitted|fragility|consequence; if None it is extracted from the oqparam.file_type attribute :returns: a dictionary taxonomy -> loss_type -> function """ kind = kind or oqparam.file_type rmodels = AccumDict() rmodels.limit_states = [] for key in sorted(oqparam.inputs): mo = re.match('(occupants|%s)_%s$' % (COST_TYPE_REGEX, kind), key) if mo: key_type = mo.group(1) # the cost_type in the key # can be occupants, structural, nonstructural, ... rmodel = nrml.to_python(oqparam.inputs[key]) rmodels[key_type] = rmodel if rmodel.lossCategory is None: # NRML 0.4 continue cost_type = str(rmodel.lossCategory) rmodel_kind = rmodel.__class__.__name__ kind_ = kind.replace('_retrofitted', '') # strip retrofitted if not rmodel_kind.lower().startswith(kind_): raise ValueError('Error in the file "%s_file=%s": is ' 'of kind %s, expected %s' % (key, oqparam.inputs[key], rmodel_kind, kind.capitalize() + 'Model')) if cost_type != key_type: raise ValueError( 'Error in the file "%s_file=%s": lossCategory is of type ' '"%s", expected "%s"' % (key, oqparam.inputs[key], rmodel.lossCategory, key_type)) rdict = AccumDict(accum={}) rdict.limit_states = [] if kind == 'fragility': limit_states = [] for loss_type, fm in sorted(rmodels.items()): # build a copy of the FragilityModel with different IM levels newfm = fm.build(oqparam.continuous_fragility_discretization, oqparam.steps_per_interval) for (imt, taxo), ffl in newfm.items(): if not limit_states: limit_states.extend(fm.limitStates) # we are rejecting the case of loss types with different # limit states; this may change in the future assert limit_states == fm.limitStates, (limit_states, fm.limitStates) rdict[taxo][loss_type] = ffl # TODO: see if it is possible to remove the attribute # below, used in classical_damage ffl.steps_per_interval = oqparam.steps_per_interval rdict.limit_states = [str(ls) for ls in limit_states] elif kind == 'consequence': rdict = rmodels else: # vulnerability cl_risk = oqparam.calculation_mode in ('classical', 'classical_risk') # only for classical_risk reduce the loss_ratios # to make sure they are strictly increasing for loss_type, rm in rmodels.items(): for (imt, taxo), rf in rm.items(): rdict[taxo][loss_type] = (rf.strictly_increasing() if cl_risk else rf) return rdict
def classical(group, src_filter, gsims, param, monitor=Monitor()): """ Compute the hazard curves for a set of sources belonging to the same tectonic region type for all the GSIMs associated to that TRT. The arguments are the same as in :func:`calc_hazard_curves`, except for ``gsims``, which is a list of GSIM instances. :returns: a dictionary {grp_id: pmap} with attributes .grp_ids, .calc_times, .eff_ruptures """ if getattr(group, 'src_interdep', None) == 'mutex': mutex_weight = { src.source_id: weight for src, weight in zip(group.sources, group.srcs_weights) } srcs = group.sources else: mutex_weight = None srcs = sum([split_source(src) for src in group], []) grp_ids = set() for src in group: grp_ids.update(src.src_group_ids) maxdist = src_filter.integration_distance with GroundShakingIntensityModel.forbid_instantiation(): imtls = param['imtls'] trunclevel = param.get('truncation_level') cmaker = ContextMaker(gsims, maxdist) ctx_mon = monitor('make_contexts', measuremem=False) poe_mon = monitor('get_poes', measuremem=False) pmap = AccumDict({ grp_id: ProbabilityMap(len(imtls.array), len(gsims)) for grp_id in grp_ids }) # AccumDict of arrays with 4 elements weight, nsites, calc_time, split pmap.calc_times = AccumDict(accum=numpy.zeros(4)) pmap.eff_ruptures = AccumDict() # grp_id -> num_ruptures for src, s_sites in src_filter(srcs): # filter now t0 = time.time() indep = group.rup_interdep == 'indep' if mutex_weight else True poemap = cmaker.poe_map(src, s_sites, imtls, trunclevel, ctx_mon, poe_mon, indep) if mutex_weight: # mutex sources weight = mutex_weight[src.source_id] for sid in poemap: pcurve = pmap[group.id].setdefault(sid, 0) pcurve += poemap[sid] * weight elif poemap: for grp_id in src.src_group_ids: pmap[grp_id] |= poemap src_id = src.source_id.split(':', 1)[0] pmap.calc_times[src_id] += numpy.array( [src.weight, len(s_sites), time.time() - t0, 1]) # storing the number of contributing ruptures too pmap.eff_ruptures += { grp_id: getattr(poemap, 'eff_ruptures', 0) for grp_id in src.src_group_ids } if mutex_weight and group.grp_probability is not None: pmap[group.id] *= group.grp_probability return pmap
def ucerf_risk(riskinput, riskmodel, param, monitor): """ :param riskinput: a :class:`openquake.risklib.riskinput.RiskInput` object :param riskmodel: a :class:`openquake.risklib.riskinput.CompositeRiskModel` instance :param param: a dictionary of parameters :param monitor: :class:`openquake.baselib.performance.Monitor` instance :returns: a dictionary of numpy arrays of shape (L, R) """ with monitor('%s.init' % riskinput.hazard_getter.__class__.__name__): riskinput.hazard_getter.init() eids = riskinput.hazard_getter.eids A = len(riskinput.aids) E = len(eids) assert not param['insured_losses'] L = len(riskmodel.lti) R = riskinput.hazard_getter.num_rlzs param['lrs_dt'] = numpy.dtype([('rlzi', U16), ('ratios', (F32, L))]) agg = numpy.zeros((E, R, L), F32) avg = AccumDict(accum={} if riskinput.by_site or not param['avg_losses'] else numpy.zeros(A, F64)) result = dict(aids=riskinput.aids, avglosses=avg) # update the result dictionary and the agg array with each output for out in riskmodel.gen_outputs(riskinput, monitor): if len(out.eids) == 0: # this happens for sites with no events continue r = out.rlzi idx = riskinput.hazard_getter.eid2idx for l, loss_ratios in enumerate(out): if loss_ratios is None: # for GMFs below the minimum_intensity continue loss_type = riskmodel.loss_types[l] indices = numpy.array([idx[eid] for eid in out.eids]) for a, asset in enumerate(out.assets): ratios = loss_ratios[a] # shape (E, I) aid = asset.ordinal losses = ratios * asset.value(loss_type) # average losses if param['avg_losses']: rat = ratios.sum(axis=0) * param['ses_ratio'] lba = avg[l, r] try: lba[aid] += rat except KeyError: lba[aid] = rat # this is the critical loop: it is important to keep it # vectorized in terms of the event indices agg[indices, r, l] += losses[:, 0] # 0 == no insured it = ((eid, r, losses) for eid, all_losses in zip(eids, agg) for r, losses in enumerate(all_losses) if losses.sum()) result['agglosses'] = numpy.fromiter(it, param['elt_dt']) # store info about the GMFs, must be done at the end result['gmdata'] = riskinput.gmdata return result
def get_risk_functions(oqparam, kind='vulnerability fragility consequence ' 'vulnerability_retrofitted'): """ :param oqparam: an OqParam instance :param kind: a space-separated string with the kinds of risk models to read :returns: a list of risk functions """ kinds = kind.split() rmodels = AccumDict() for kind in kinds: for key in sorted(oqparam.inputs): mo = re.match('(occupants|%s)_%s$' % (COST_TYPE_REGEX, kind), key) if mo: loss_type = mo.group(1) # the cost_type in the key # can be occupants, structural, nonstructural, ... rmodel = nrml.to_python(oqparam.inputs[key]) if len(rmodel) == 0: raise InvalidFile('%s is empty!' % oqparam.inputs[key]) rmodels[loss_type, kind] = rmodel if rmodel.lossCategory is None: # NRML 0.4 continue cost_type = str(rmodel.lossCategory) rmodel_kind = rmodel.__class__.__name__ kind_ = kind.replace('_retrofitted', '') # strip retrofitted if not rmodel_kind.lower().startswith(kind_): raise ValueError( 'Error in the file "%s_file=%s": is ' 'of kind %s, expected %s' % ( key, oqparam.inputs[key], rmodel_kind, kind.capitalize() + 'Model')) if cost_type != loss_type: raise ValueError( 'Error in the file "%s_file=%s": lossCategory is of ' 'type "%s", expected "%s"' % (key, oqparam.inputs[key], rmodel.lossCategory, loss_type)) cl_risk = oqparam.calculation_mode in ('classical', 'classical_risk') rlist = RiskFuncList() rlist.limit_states = [] for (loss_type, kind), rm in sorted(rmodels.items()): if kind == 'fragility': for (imt, riskid), ffl in sorted(rm.items()): if not rlist.limit_states: rlist.limit_states.extend(rm.limitStates) # we are rejecting the case of loss types with different # limit states; this may change in the future assert rlist.limit_states == rm.limitStates, ( rlist.limit_states, rm.limitStates) ffl.loss_type = loss_type ffl.kind = kind rlist.append(ffl) elif kind == 'consequence': for riskid, cf in sorted(rm.items()): rf = hdf5.ArrayWrapper( cf, dict(id=riskid, loss_type=loss_type, kind=kind)) rlist.append(rf) else: # vulnerability, vulnerability_retrofitted # only for classical_risk reduce the loss_ratios # to make sure they are strictly increasing for (imt, riskid), rf in sorted(rm.items()): rf = rf.strictly_increasing() if cl_risk else rf rf.loss_type = loss_type rf.kind = kind rlist.append(rf) return rlist
def sample_cluster(sources, srcfilter, num_ses, param): """ Yields ruptures generated by a cluster of sources. :param sources: A sequence of sources of the same group :param num_ses: Number of stochastic event sets :param param: a dictionary of additional parameters including ses_per_logic_tree_path :yields: dictionaries with keys rup_array, calc_times, eff_ruptures """ eb_ruptures = [] ses_seed = param['ses_seed'] numpy.random.seed(sources[0].serial(ses_seed)) [et_id] = set(src.et_id for src in sources) # AccumDict of arrays with 3 elements nsites, nruptures, calc_time calc_times = AccumDict(accum=numpy.zeros(3, numpy.float32)) # Set the parameters required to compute the number of occurrences # of the group of sources # assert param['oqparam'].number_of_logic_tree_samples > 0 samples = getattr(sources[0], 'samples', 1) tom = getattr(sources, 'temporal_occurrence_model') rate = tom.occurrence_rate time_span = tom.time_span # Note that using a single time interval corresponding to the product # of the investigation time and the number of realisations as we do # here is admitted only in the case of a time-independent model grp_num_occ = numpy.random.poisson(rate * time_span * samples * num_ses) # Now we process the sources included in the group. Possible cases: # * The group is a cluster. In this case we choose one rupture per each # source; uncertainty in the ruptures can be handled in this case # using mutually exclusive ruptures (note that this is admitted # only for nons-parametric sources). # * The group contains mutually exclusive sources. In this case we # choose one source and then one rupture from this source. rup_counter = {} rup_data = {} for rlz_num in range(grp_num_occ): if sources.cluster: for src, _ in srcfilter.filter(sources): # Track calculation time t0 = time.time() rup = src.get_one_rupture(ses_seed) # The problem here is that we do not know a-priori the # number of occurrences of a given rupture. if src.id not in rup_counter: rup_counter[src.id] = {} rup_data[src.id] = {} if rup.idx not in rup_counter[src.id]: rup_counter[src.id][rup.idx] = 1 rup_data[src.id][rup.idx] = [rup, src.id, et_id] else: rup_counter[src.id][rup.idx] += 1 # Store info dt = time.time() - t0 calc_times[src.id] += numpy.array( [len(rup_data[src.id]), src.nsites, dt]) elif param['src_interdep'] == 'mutex': raise NotImplementedError('src_interdep == mutex') # Create event based ruptures for src_key in rup_data: for rup_key in rup_data[src_key]: rup, source_id, et_id = rup_data[src_key][rup_key] cnt = rup_counter[src_key][rup_key] ebr = EBRupture(rup, source_id, et_id, cnt) eb_ruptures.append(ebr) return eb_ruptures, calc_times
def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ oq = self.oqparam if oq.hazard_calculation_id and not oq.compare_with_classical: with util.read(self.oqparam.hazard_calculation_id) as parent: self.full_lt = parent['full_lt'] self.calc_stats() # post-processing return {} srcfilter = self.src_filter() srcs = self.csm.get_sources() if oq.is_ucerf(): logging.info('Prefiltering UCERFSources') for src in srcs: if hasattr(src, 'start'): src.src_filter = srcfilter # hack for .iter_ruptures src.all_ridx = src.get_ridx() calc_times = parallel.Starmap.apply( preclassical, (srcs, srcfilter), concurrent_tasks=oq.concurrent_tasks or 1, num_cores=oq.num_cores, h5=self.datastore.hdf5).reduce() if oq.calculation_mode == 'preclassical': self.store_source_info(calc_times, nsites=True) self.datastore['full_lt'] = self.csm.full_lt self.datastore.swmr_on() # fixes HDF5 error in build_hazard return self.update_source_info(calc_times, nsites=True) # if OQ_SAMPLE_SOURCES is set extract one source for group ss = os.environ.get('OQ_SAMPLE_SOURCES') if ss: for sg in self.csm.src_groups: if not sg.atomic: srcs = [src for src in sg if src.nsites] sg.sources = [srcs[0]] mags = self.datastore['source_mags'] # by TRT if len(mags) == 0: # everything was discarded raise RuntimeError('All sources were discarded!?') gsims_by_trt = self.full_lt.get_gsims_by_trt() mags_by_trt = {} for trt in mags: mags_by_trt[trt] = mags[trt][()] psd = oq.pointsource_distance if psd is not None: psd.interp(mags_by_trt) for trt, dic in psd.ddic.items(): # the sum is zero for {'default': [(1, 0), (10, 0)]} if sum(dic.values()): it = list(dic.items()) md = '%s->%d ... %s->%d' % (it[0] + it[-1]) logging.info('ps_dist %s: %s', trt, md) imts_with_period = [imt for imt in oq.imtls if imt == 'PGA' or imt.startswith('SA')] imts_ok = len(imts_with_period) == len(oq.imtls) if (imts_ok and psd and psd.suggested()) or ( imts_ok and oq.minimum_intensity): aw = get_effect(mags_by_trt, self.sitecol.one(), gsims_by_trt, oq) if psd: dic = {trt: [(float(mag), int(dst)) for mag, dst in psd.ddic[trt].items()] for trt in psd.ddic if trt != 'default'} logging.info('pointsource_distance=\n%s', pprint.pformat(dic)) if len(vars(aw)) > 1: # more than _extra self.datastore['effect_by_mag_dst'] = aw smap = parallel.Starmap(classical, h5=self.datastore.hdf5, num_cores=oq.num_cores) smap.monitor.save('srcfilter', self.src_filter()) rlzs_by_gsim_list = self.submit_tasks(smap) rlzs_by_g = [] for rlzs_by_gsim in rlzs_by_gsim_list: for rlzs in rlzs_by_gsim.values(): rlzs_by_g.append(rlzs) self.datastore['rlzs_by_g'] = [U32(rlzs) for rlzs in rlzs_by_g] acc0 = self.acc0() # create the rup/ datasets BEFORE swmr_on() poes_shape = (self.N, len(oq.imtls.array), len(rlzs_by_g)) # NLG size = numpy.prod(poes_shape) * 8 logging.info('Requiring %s for ProbabilityMap of shape %s', humansize(size), poes_shape) self.datastore.create_dset('_poes', F64, poes_shape) self.datastore.swmr_on() smap.h5 = self.datastore.hdf5 self.calc_times = AccumDict(accum=numpy.zeros(3, F32)) try: acc = smap.reduce(self.agg_dicts, acc0) self.store_rlz_info(acc.eff_ruptures) finally: with self.monitor('store source_info'): self.store_source_info(self.calc_times) if self.by_task: logging.info('Storing by_task information') num_tasks = max(self.by_task) + 1, er = self.datastore.create_dset('by_task/eff_ruptures', U32, num_tasks) es = self.datastore.create_dset('by_task/eff_sites', U32, num_tasks) si = self.datastore.create_dset('by_task/srcids', hdf5.vstr, num_tasks, fillvalue=None) for task_no, rec in self.by_task.items(): effrups, effsites, srcids = rec er[task_no] = effrups es[task_no] = effsites si[task_no] = ' '.join(srcids) self.by_task.clear() self.numrups = sum(arr[0] for arr in self.calc_times.values()) numsites = sum(arr[1] for arr in self.calc_times.values()) logging.info('Effective number of ruptures: {:_d}/{:_d}'.format( int(self.numrups), self.totrups)) logging.info('Effective number of sites per rupture: %d', numsites / self.numrups) if psd: psdist = max(max(psd.ddic[trt].values()) for trt in psd.ddic) if psdist and self.maxradius >= psdist / 2: logging.warning('The pointsource_distance of %d km is too ' 'small compared to a maxradius of %d km', psdist, self.maxradius) self.calc_times.clear() # save a bit of memory return acc
def compute(self): """ Submit disaggregation tasks and return the results """ logging.info('Reading ruptures') oq = self.oqparam dstore = (self.datastore.parent if self.datastore.parent else self.datastore) mags = set() for trt, dset in self.datastore['source_mags'].items(): mags.update(dset[:]) mags = sorted(mags) allargs = [] totweight = sum(d['rctx']['nsites'].sum() for n, d in dstore.items() if n.startswith('mag_') and len(d['rctx'])) et_ids = dstore['et_ids'][:] rlzs_by_gsim = self.full_lt.get_rlzs_by_gsim_list(et_ids) G = max(len(rbg) for rbg in rlzs_by_gsim) maxw = 2 * 1024**3 / (16 * G * self.M) # at max 2 GB maxweight = min(numpy.ceil(totweight / (oq.concurrent_tasks or 1)), maxw) num_eff_rlzs = len(self.full_lt.sm_rlzs) task_inputs = [] U = 0 totrups = 0 for mag in mags: rctx = dstore['mag_%s/rctx' % mag][:] totrups += len(rctx) for grp_id, gids in enumerate(et_ids): idxs, = numpy.where(rctx['grp_id'] == grp_id) if len(idxs) == 0: continue trti = gids[0] // num_eff_rlzs trt = self.trts[trti] cmaker = ContextMaker( trt, rlzs_by_gsim[grp_id], { 'truncation_level': oq.truncation_level, 'maximum_distance': oq.maximum_distance, 'collapse_level': oq.collapse_level, 'imtls': oq.imtls }) for blk in block_splitter(rctx[idxs], maxweight, nsites): nr = len(blk) U = max(U, blk.weight) allargs.append((dstore, numpy.array(blk), cmaker, self.hmap4, trti, self.bin_edges, oq)) task_inputs.append((trti, mag, nr)) logging.info('Found {:_d} ruptures'.format(totrups)) nbytes, msg = get_array_nbytes(dict(M=self.M, G=G, U=U, F=2)) logging.info('Maximum mean_std per task:\n%s', msg) s = self.shapedic sd = dict(N=s['N'], M=s['M'], P=s['P'], Z=s['Z'], D=s['dist'], E=s['eps'], Lo=s['lon'], La=s['lat']) sd['tasks'] = numpy.ceil(len(allargs)) nbytes, msg = get_array_nbytes(sd) if nbytes > oq.max_data_transfer: raise ValueError( 'Estimated data transfer too big\n%s > max_data_transfer=%s' % (msg, humansize(oq.max_data_transfer))) logging.info('Estimated data transfer:\n%s', msg) sd.pop('tasks') sd['mags_trt'] = sum( len(mags) for mags in self.datastore['source_mags'].values()) nbytes, msg = get_array_nbytes(sd) logging.info('Estimated memory on the master:\n%s', msg) dt = numpy.dtype([('trti', U8), ('mag', '|S4'), ('nrups', U32)]) self.datastore['disagg_task'] = numpy.array(task_inputs, dt) self.datastore.swmr_on() smap = parallel.Starmap(compute_disagg, allargs, h5=self.datastore.hdf5) results = smap.reduce(self.agg_result, AccumDict(accum={})) return results # imti, sid -> trti, magi -> 6D array
def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ oq = self.oqparam if oq.hazard_calculation_id and not oq.compare_with_classical: with util.read(self.oqparam.hazard_calculation_id) as parent: self.full_lt = parent['full_lt'] self.calc_stats() # post-processing return {} mags = self.datastore['source_mags'] # by TRT if len(mags) == 0: # everything was discarded raise RuntimeError('All sources were discarded!?') gsims_by_trt = self.full_lt.get_gsims_by_trt() if oq.pointsource_distance is not None: for trt in gsims_by_trt: oq.pointsource_distance[trt] = getdefault( oq.pointsource_distance, trt) mags_by_trt = {} for trt in mags: mags_by_trt[trt] = mags[trt][()] imts_with_period = [imt for imt in oq.imtls if imt == 'PGA' or imt.startswith('SA')] imts_ok = len(imts_with_period) == len(oq.imtls) if (imts_ok and oq.pointsource_distance and oq.pointsource_distance.suggested()) or ( imts_ok and oq.minimum_intensity): aw, self.psd = get_effect( mags_by_trt, self.sitecol.one(), gsims_by_trt, oq) if len(vars(aw)) > 1: # more than _extra self.datastore['effect_by_mag_dst'] = aw elif oq.pointsource_distance: self.psd = oq.pointsource_distance.interp(mags_by_trt) else: self.psd = {} smap = parallel.Starmap(classical, h5=self.datastore.hdf5, num_cores=oq.num_cores) self.submit_tasks(smap) acc0 = self.acc0() # create the rup/ datasets BEFORE swmr_on() self.datastore.swmr_on() smap.h5 = self.datastore.hdf5 self.calc_times = AccumDict(accum=numpy.zeros(3, F32)) try: acc = smap.reduce(self.agg_dicts, acc0) self.store_rlz_info(acc.eff_ruptures) finally: with self.monitor('store source_info'): self.store_source_info(self.calc_times) if self.by_task: logging.info('Storing by_task information') num_tasks = max(self.by_task) + 1, er = self.datastore.create_dset('by_task/eff_ruptures', U32, num_tasks) es = self.datastore.create_dset('by_task/eff_sites', U32, num_tasks) si = self.datastore.create_dset('by_task/srcids', hdf5.vstr, num_tasks, fillvalue=None) for task_no, rec in self.by_task.items(): effrups, effsites, srcids = rec er[task_no] = effrups es[task_no] = effsites si[task_no] = ' '.join(srcids) self.by_task.clear() self.numrups = sum(arr[0] for arr in self.calc_times.values()) numsites = sum(arr[1] for arr in self.calc_times.values()) logging.info('Effective number of ruptures: {:_d}/{:_d}'.format( int(self.numrups), self.totrups)) logging.info('Effective number of sites per rupture: %d', numsites / self.numrups) if self.psd: psdist = max(max(self.psd[trt].values()) for trt in self.psd) if psdist != -1 and self.maxradius >= psdist / 2: logging.warning('The pointsource_distance of %d km is too ' 'small compared to a maxradius of %d km', psdist, self.maxradius) self.calc_times.clear() # save a bit of memory return acc
def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ oq = self.oqparam if oq.hazard_calculation_id and not oq.compare_with_classical: with util.read(self.oqparam.hazard_calculation_id) as parent: self.full_lt = parent['full_lt'] self.calc_stats() # post-processing return {} assert oq.max_sites_per_tile > oq.max_sites_disagg, ( oq.max_sites_per_tile, oq.max_sites_disagg) psd = self.set_psd() srcfilter = self.src_filter() performance.Monitor.save(self.datastore, 'srcfilter', srcfilter) srcs = self.csm.get_sources(atomic=False) if srcs: res = parallel.Starmap.apply(preclassical, (srcs, self.params), concurrent_tasks=oq.concurrent_tasks or 1, h5=self.datastore.hdf5).reduce() if oq.calculation_mode == 'preclassical': self.store_source_info(res['calc_times'], nsites=True) self.datastore['full_lt'] = self.csm.full_lt self.datastore.swmr_on() # fixes HDF5 error in build_hazard return self.update_source_info(res['calc_times'], nsites=True) sources_by_grp = groupby(res['sources'], operator.attrgetter('grp_id')) else: for src in self.csm.get_sources(atomic=True): src.num_ruptures = src.count_ruptures() src.nsites = self.N sources_by_grp = {} self.csm.src_groups = [sg for sg in self.csm.src_groups if sg.atomic] if oq.ps_grid_spacing: smap = parallel.Starmap( grid_point_sources, h5=self.datastore.hdf5, distribute=None if len(sources_by_grp) > 1 else 'no') for grp_id, sources in sources_by_grp.items(): smap.submit((sources, oq.ps_grid_spacing)) dic = smap.reduce() before, after = 0, 0 for grp_id, sources in sources_by_grp.items(): before += len(sources) after += len(dic[grp_id]) sg = SourceGroup(sources[0].tectonic_region_type) sg.sources = dic[grp_id] self.csm.src_groups.append(sg) logging.info('Reduced point sources %d->%d', before, after) else: for grp_id, sources in sources_by_grp.items(): sg = SourceGroup(sources[0].tectonic_region_type) sg.sources = sources self.csm.src_groups.append(sg) smap = parallel.Starmap(classical, h5=self.datastore.hdf5) self.submit_tasks(smap) acc0 = self.acc0() # create the rup/ datasets BEFORE swmr_on() self.datastore.swmr_on() smap.h5 = self.datastore.hdf5 self.calc_times = AccumDict(accum=numpy.zeros(3, F32)) try: acc = smap.reduce(self.agg_dicts, acc0) self.store_rlz_info(acc.eff_ruptures) finally: source_ids = self.store_source_info(self.calc_times) if self.by_task: logging.info('Storing by_task information') num_tasks = max(self.by_task) + 1, er = self.datastore.create_dset('by_task/eff_ruptures', U32, num_tasks) es = self.datastore.create_dset('by_task/eff_sites', U32, num_tasks) si = self.datastore.create_dset('by_task/srcids', hdf5.vstr, num_tasks, fillvalue=None) for task_no, rec in self.by_task.items(): effrups, effsites, srcids = rec er[task_no] = effrups es[task_no] = effsites si[task_no] = ' '.join(source_ids[s] for s in srcids) self.by_task.clear() if self.calc_times: # can be empty in case of errors self.numrups = sum(arr[0] for arr in self.calc_times.values()) numsites = sum(arr[1] for arr in self.calc_times.values()) logging.info('Effective number of ruptures: {:_d}/{:_d}'.format( int(self.numrups), self.totrups)) logging.info('Effective number of sites per rupture: %d', numsites / self.numrups) if psd: psdist = max(max(psd.ddic[trt].values()) for trt in psd.ddic) if psdist and self.maxradius >= psdist / 2: logging.warning( 'The pointsource_distance of %d km is too ' 'small compared to a maxradius of %d km', psdist, self.maxradius) self.calc_times.clear() # save a bit of memory return acc