def get_effect(mags, sitecol, gsims_by_trt, oq): """ :returns: an ArrayWrapper effect_by_mag_dst_trt Also updates oq.maximum_distance.magdist and oq.pointsource_distance """ dist_bins = { trt: oq.maximum_distance.get_dist_bins(trt) for trt in gsims_by_trt } # computing the effect make sense only if all IMTs have the same # unity of measure; for simplicity we will consider only PGA and SA effect = {} imts_with_period = [ imt for imt in oq.imtls if imt == 'PGA' or imt.startswith('SA') ] imts_ok = len(imts_with_period) == len(oq.imtls) aw = hdf5.ArrayWrapper((), dist_bins) if sitecol is None: return aw if len(sitecol) >= oq.max_sites_disagg and imts_ok: logging.info('Computing effect of the ruptures') mon = performance.Monitor('rupture effect') eff_by_mag = parallel.Starmap.apply( get_effect_by_mag, (mags, sitecol.one(), gsims_by_trt, oq.maximum_distance, oq.imtls, mon)).reduce() aw.array = eff_by_mag effect.update({ trt: Effect({mag: eff_by_mag[mag][:, t] for mag in eff_by_mag}, dist_bins[trt]) for t, trt in enumerate(gsims_by_trt) }) minint = oq.minimum_intensity.get('default', 0) for trt, eff in effect.items(): if minint: oq.maximum_distance.magdist[trt] = eff.dist_by_mag(minint) # replace pointsource_distance with a dict trt -> mag -> dst if oq.pointsource_distance['default']: oq.pointsource_distance[trt] = eff.dist_by_mag( eff.collapse_value(oq.pointsource_distance['default'])) elif oq.pointsource_distance['default']: # replace pointsource_distance with a dict trt -> mag -> dst for trt in gsims_by_trt: try: dst = getdefault(oq.pointsource_distance, trt) except TypeError: # 'NoneType' object is not subscriptable dst = getdefault(oq.maximum_distance, trt) oq.pointsource_distance[trt] = {mag: dst for mag in mags} return aw
def make_figure_dist_by_mag(extractors, what): """ $ oq plot "dist_by_mag?" """ # NB: matplotlib is imported inside since it is a costly import import matplotlib.pyplot as plt [ex] = extractors effect = ex.get('effect') mags = ['%.2f' % mag for mag in effect.mags] fig, ax = plt.subplots() trti = 0 for trt, dists in effect.dist_bins.items(): dic = dict(zip(mags, effect[:, :, trti])) if ex.oqparam.pointsource_distance: pdist = getdefault(ex.oqparam.pointsource_distance, trt) else: pdist = None eff = Effect(dic, dists, pdist) dist_by_mag = eff.dist_by_mag() ax.plot(effect.mags, list(dist_by_mag.values()), label=trt, color='red') if pdist: dist_by_mag = eff.dist_by_mag(eff.collapse_value) ax.plot(effect.mags, list(dist_by_mag.values()), label=trt, color='green') ax.set_xlabel('Mag') ax.set_ylabel('Dist') ax.set_title('Integration Distance at intensity=%s' % eff.zero_value) trti += 1 ax.legend() return plt
def pre_execute(self): oq = self.oqparam oq.ground_motion_fields = False super().pre_execute() self.param['lba'] = lba = ( LossesByAsset(self.assetcol, oq.loss_names, self.policy_name, self.policy_dict)) self.param['ses_ratio'] = oq.ses_ratio self.param['aggregate_by'] = oq.aggregate_by self.param['highest_losses'] = oq.highest_losses self.param['minimum_loss'] = [getdefault(oq.minimum_asset_loss, ln) for ln in oq.loss_names] self.param['ael_dt'] = ael_dt(oq.loss_names, rlz=True) self.A = A = len(self.assetcol) dt = ael_dt(oq.loss_names) for r in range(self.R): self.datastore.create_dset('asset_loss_table/rlz-%d' % r, dt) self.param.pop('oqparam', None) # unneeded self.L = L = len(lba.loss_names) A = len(self.assetcol) self.datastore.create_dset('avg_losses-stats', F32, (A, 1, L)) # mean shp = self.assetcol.tagcol.agg_shape((L,), oq.aggregate_by) elt_dt = [('event_id', U32), ('rlzi', U16), ('loss', (F32, shp))] elt_nbytes = 4 * self.E * numpy.prod(shp) logging.info('Approx size of the event loss table: %s', general.humansize(elt_nbytes)) if elt_nbytes / (oq.concurrent_tasks or 1) > TWO32: raise RuntimeError('The event loss table is too big to be transfer' 'red with %d tasks' % oq.concurrent_tasks) self.datastore.create_dset('losses_by_event', elt_dt) self.zerolosses = numpy.zeros(shp, F32) # to get the multi-index self.datastore.create_dset('gmf_info', gmf_info_dt)
def _interp(param, name, trt): try: mdd = param[name] except KeyError: return magdepdist([(MINMAG, 1000), (MAXMAG, 1000)]) if isinstance(mdd, IntegrationDistance): return mdd(trt) elif isinstance(mdd, dict): return magdepdist(getdefault(mdd, trt)) return mdd
def get_edges_shapedic(oq, sitecol, mags_by_trt): """ :returns: (mag dist lon lat eps trt) edges and shape dictionary """ tl = oq.truncation_level if oq.rlz_index is None: Z = oq.num_rlzs_disagg or 1 else: Z = len(oq.rlz_index) eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build mag_edges mags = set() trts = [] for trt, _mags in mags_by_trt.items(): mags.update(float(mag) for mag in _mags) trts.append(trt) mags = sorted(mags) mag_edges = oq.mag_bin_width * numpy.arange( int(numpy.floor(min(mags) / oq.mag_bin_width)), int(numpy.ceil(max(mags) / oq.mag_bin_width) + 1)) # build dist_edges maxdist = max(filters.getdefault(oq.maximum_distance, trt) for trt in trts) dist_edges = oq.distance_bin_width * numpy.arange( 0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1)) # build eps_edges eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build lon_edges, lat_edges per sid lon_edges, lat_edges = {}, {} # by sid for site in sitecol: loc = site.location lon_edges[site.id], lat_edges[site.id] = lon_lat_bins( loc.x, loc.y, maxdist, oq.coordinate_bin_width) # sanity check: the shapes of the lon lat edges are consistent assert_same_shape(list(lon_edges.values())) assert_same_shape(list(lat_edges.values())) bin_edges = [mag_edges, dist_edges, lon_edges, lat_edges, eps_edges] edges = [mag_edges, dist_edges, lon_edges[0], lat_edges[0], eps_edges] shape = [len(edge) - 1 for edge in edges] + [len(trts)] shapedic = dict(zip(BIN_NAMES, shape)) shapedic['N'] = len(sitecol) shapedic['M'] = len(oq.imtls) shapedic['P'] = len(oq.poes_disagg or (None, )) shapedic['Z'] = Z return bin_edges + [trts], shapedic
def get_effect(mags, sitecol, gsims_by_trt, oq): """ :returns: an ArrayWrapper effect_by_mag_dst_trt Updates oq.maximum_distance.magdist and oq.pointsource_distance """ dist_bins = { trt: oq.maximum_distance.get_dist_bins(trt) for trt in gsims_by_trt } aw = hdf5.ArrayWrapper((), dist_bins) if sitecol is None: return aw # computing the effect make sense only if all IMTs have the same # unity of measure; for simplicity we will consider only PGA and SA effect = {} imts_with_period = [ imt for imt in oq.imtls if imt == 'PGA' or imt.startswith('SA') ] imts_ok = len(imts_with_period) == len(oq.imtls) psd = oq.pointsource_distance['default'] effect_ok = imts_ok and (psd or oq.minimum_intensity) if effect_ok: logging.info('Computing effect of the ruptures') eff_by_mag = parallel.Starmap.apply( get_effect_by_mag, (mags, sitecol.one(), gsims_by_trt, oq.maximum_distance, oq.imtls)).reduce() aw.array = eff_by_mag effect.update({ trt: Effect({mag: eff_by_mag[mag][:, t] for mag in eff_by_mag}, dist_bins[trt]) for t, trt in enumerate(gsims_by_trt) }) minint = oq.minimum_intensity.get('default', 0) for trt, eff in effect.items(): if minint: oq.maximum_distance.magdist[trt] = eff.dist_by_mag(minint) # replace pointsource_distance with a dict trt -> mag -> dst if psd: oq.pointsource_distance[trt] = eff.dist_by_mag( eff.collapse_value(psd)) elif psd: # like in case_24 with PGV for trt in dist_bins: pdist = getdefault(oq.pointsource_distance, trt) oq.pointsource_distance[trt] = {mag: pdist for mag in mags} return aw
def __init__(self, trt, gsims, param=None, monitor=Monitor()): param = param or {} self.max_sites_disagg = param.get('max_sites_disagg', 10) self.collapse_ctxs = param.get('collapse_ctxs', False) self.trt = trt self.gsims = gsims self.maximum_distance = (param.get('maximum_distance') or IntegrationDistance({})) self.trunclevel = param.get('truncation_level') self.effect = param.get('effect') for req in self.REQUIRES: reqset = set() for gsim in gsims: reqset.update(getattr(gsim, 'REQUIRES_' + req)) setattr(self, 'REQUIRES_' + req, reqset) psd = param.get('pointsource_distance', {'default': {}}) self.pointsource_distance = getdefault(psd, trt) # can be 0 or {} # NB: self.pointsource_distance is a dict mag -> pdist, possibly empty self.filter_distance = 'rrup' self.imtls = param.get('imtls', {}) self.imts = [imt_module.from_string(imt) for imt in self.imtls] self.reqv = param.get('reqv') if self.reqv is not None: self.REQUIRES_DISTANCES.add('repi') if hasattr(gsims, 'items'): # gsims is actually a dict rlzs_by_gsim # since the ContextMaker must be used on ruptures with the # same TRT, given a realization there is a single gsim self.gsim_by_rlzi = {} for gsim, rlzis in gsims.items(): for rlzi in rlzis: self.gsim_by_rlzi[rlzi] = gsim self.mon = monitor self.ctx_mon = monitor('make_contexts', measuremem=False) self.loglevels = DictArray(self.imtls) self.shift_hypo = param.get('shift_hypo') with warnings.catch_warnings(): # avoid RuntimeWarning: divide by zero encountered in log warnings.simplefilter("ignore") for imt, imls in self.imtls.items(): if imt != 'MMI': self.loglevels[imt] = numpy.log(imls)
def pre_execute(self): oq = self.oqparam oq.ground_motion_fields = False super().pre_execute() self.param['lba'] = lba = (LossesByAsset(self.assetcol, oq.loss_names, self.policy_name, self.policy_dict)) self.param['ses_ratio'] = oq.ses_ratio self.param['aggregate_by'] = oq.aggregate_by ct = oq.concurrent_tasks or 1 self.param['maxweight'] = int(oq.ebrisk_maxsize / ct) self.A = A = len(self.assetcol) self.L = L = len(lba.loss_names) self.check_number_loss_curves() mal = { lt: getdefault(oq.minimum_asset_loss, lt) for lt in oq.loss_names } logging.info('minimum_asset_loss=%s', mal) if (oq.aggregate_by and self.E * A > oq.max_potential_gmfs and any(val == 0 for val in mal.values()) and not sum(oq.minimum_asset_loss.values())): logging.warning('The calculation is really big; you should set ' 'minimum_asset_loss') self.param['minimum_asset_loss'] = mal elt_dt = [('event_id', U32), ('loss', (F32, (L, )))] for idxs, attrs in gen_indices(self.assetcol.tagcol, oq.aggregate_by): idx = ','.join(map(str, idxs)) + ',' self.datastore.create_dset('event_loss_table/' + idx, elt_dt, attrs=attrs) self.param.pop('oqparam', None) # unneeded self.datastore.create_dset('avg_losses-stats', F32, (A, 1, L)) # mean elt_nbytes = 4 * self.E * L if elt_nbytes / (oq.concurrent_tasks or 1) > TWO32: raise RuntimeError('The event loss table is too big to be transfer' 'red with %d tasks' % oq.concurrent_tasks) self.datastore.create_dset('losses_by_event', elt_dt) self.datastore.create_dset('gmf_info', gmf_info_dt)
def __init__(self, trt, gsims, oq, monitor=Monitor()): if isinstance(oq, dict): param = oq self.cross_correl = param.get('cross_correl') # cond_spectra_test else: # OqParam param = vars(oq) param['split_sources'] = oq.split_sources param['min_iml'] = oq.min_iml param['reqv'] = oq.get_reqv() param['af'] = getattr(oq, 'af', None) self.cross_correl = oq.cross_correl self.imtls = oq.imtls self.af = param.get('af', None) self.max_sites_disagg = param.get('max_sites_disagg', 10) self.max_sites_per_tile = param.get('max_sites_per_tile', 50_000) self.time_per_task = param.get('time_per_task', 60) self.disagg_by_src = param.get('disagg_by_src') self.collapse_level = int(param.get('collapse_level', 0)) self.disagg_by_src = param.get('disagg_by_src', False) self.trt = trt self.gsims = gsims self.maximum_distance = _interp(param, 'maximum_distance', trt) if 'pointsource_distance' not in param: self.pointsource_distance = 1000. else: self.pointsource_distance = getdefault( param['pointsource_distance'], trt) self.minimum_distance = param.get('minimum_distance', 0) self.investigation_time = param.get('investigation_time') if self.investigation_time: self.tom = registry['PoissonTOM'](self.investigation_time) self.ses_seed = param.get('ses_seed', 42) self.ses_per_logic_tree_path = param.get('ses_per_logic_tree_path', 1) self.truncation_level = param.get('truncation_level') self.num_epsilon_bins = param.get('num_epsilon_bins', 1) self.ps_grid_spacing = param.get('ps_grid_spacing') self.split_sources = param.get('split_sources') self.effect = param.get('effect') self.use_recarray = use_recarray(gsims) for req in self.REQUIRES: reqset = set() for gsim in gsims: reqset.update(getattr(gsim, 'REQUIRES_' + req)) setattr(self, 'REQUIRES_' + req, reqset) if 'imtls' in param: self.imtls = param['imtls'] elif 'hazard_imtls' in param: self.imtls = DictArray(param['hazard_imtls']) elif not hasattr(self, 'imtls'): raise KeyError('Missing imtls in ContextMaker!') try: self.min_iml = param['min_iml'] except KeyError: self.min_iml = [0. for imt in self.imtls] self.reqv = param.get('reqv') if self.reqv is not None: self.REQUIRES_DISTANCES.add('repi') reqs = (sorted(self.REQUIRES_RUPTURE_PARAMETERS) + sorted(self.REQUIRES_SITES_PARAMETERS) + sorted(self.REQUIRES_DISTANCES)) dic = {} for req in reqs: if req in site_param_dt: dt = site_param_dt[req] if isinstance(dt, tuple): # (string_, size) dic[req] = b'' else: dic[req] = dt(0) else: dic[req] = 0. dic['occurrence_rate'] = numpy.float64(0) dic['sids'] = numpy.uint32(0) self.ctx_builder = RecordBuilder(**dic) self.loglevels = DictArray(self.imtls) if self.imtls else {} self.shift_hypo = param.get('shift_hypo') with warnings.catch_warnings(): # avoid RuntimeWarning: divide by zero encountered in log warnings.simplefilter("ignore") for imt, imls in self.imtls.items(): if imt != 'MMI': self.loglevels[imt] = numpy.log(imls) self.init_monitoring(monitor)
def _read_risk_data(self): # read the risk model (if any), the exposure (if any) and then the # site collection, possibly extracted from the exposure. oq = self.oqparam self.load_crmodel() # must be called first if (not oq.imtls and 'shakemap' not in oq.inputs and oq.ground_motion_fields): raise InvalidFile('There are no intensity measure types in %s' % oq.inputs['job_ini']) if oq.hazard_calculation_id: with util.read(oq.hazard_calculation_id) as dstore: haz_sitecol = dstore['sitecol'].complete if ('amplification' in oq.inputs and 'ampcode' not in haz_sitecol.array.dtype.names): haz_sitecol.add_col('ampcode', site.ampcode_dt) else: haz_sitecol = readinput.get_site_collection(oq, self.datastore) if hasattr(self, 'rup'): # for scenario we reduce the site collection to the sites # within the maximum distance from the rupture haz_sitecol, _dctx = self.cmaker.filter(haz_sitecol, self.rup) haz_sitecol.make_complete() if 'site_model' in oq.inputs: self.datastore['site_model'] = readinput.get_site_model(oq) oq_hazard = (self.datastore.parent['oqparam'] if self.datastore.parent else None) if 'exposure' in oq.inputs: exposure = self.read_exposure(haz_sitecol) self.datastore['assetcol'] = self.assetcol self.datastore['cost_calculator'] = exposure.cost_calculator if hasattr(readinput.exposure, 'exposures'): self.datastore['assetcol/exposures'] = (numpy.array( exposure.exposures, hdf5.vstr)) elif 'assetcol' in self.datastore.parent: assetcol = self.datastore.parent['assetcol'] if oq.region: region = wkt.loads(oq.region) self.sitecol = haz_sitecol.within(region) if oq.shakemap_id or 'shakemap' in oq.inputs: self.sitecol, self.assetcol = self.read_shakemap( haz_sitecol, assetcol) self.datastore['sitecol'] = self.sitecol self.datastore['assetcol'] = self.assetcol logging.info('Extracted %d/%d assets', len(self.assetcol), len(assetcol)) nsites = len(self.sitecol) if (oq.spatial_correlation != 'no' and nsites > MAXSITES): # hard-coded, heuristic raise ValueError(CORRELATION_MATRIX_TOO_LARGE % nsites) elif hasattr(self, 'sitecol') and general.not_equal( self.sitecol.sids, haz_sitecol.sids): self.assetcol = assetcol.reduce(self.sitecol) self.datastore['assetcol'] = self.assetcol logging.info('Extracted %d/%d assets', len(self.assetcol), len(assetcol)) else: self.assetcol = assetcol else: # no exposure self.sitecol = haz_sitecol if self.sitecol and oq.imtls: logging.info('Read N=%d hazard sites and L=%d hazard levels', len(self.sitecol), oq.imtls.size) if oq_hazard: parent = self.datastore.parent if 'assetcol' in parent: check_time_event(oq, parent['assetcol'].occupancy_periods) elif oq.job_type == 'risk' and 'exposure' not in oq.inputs: raise ValueError('Missing exposure both in hazard and risk!') if oq_hazard.time_event and oq_hazard.time_event != oq.time_event: raise ValueError( 'The risk configuration file has time_event=%s but the ' 'hazard was computed with time_event=%s' % (oq.time_event, oq_hazard.time_event)) if oq.job_type == 'risk': tmap_arr, tmap_lst = logictree.taxonomy_mapping( self.oqparam.inputs.get('taxonomy_mapping'), self.assetcol.tagcol.taxonomy) self.crmodel.tmap = tmap_lst if len(tmap_arr): self.datastore['taxonomy_mapping'] = tmap_arr taxonomies = set(taxo for items in self.crmodel.tmap for taxo, weight in items if taxo != '?') # check that we are covering all the taxonomies in the exposure missing = taxonomies - set(self.crmodel.taxonomies) if self.crmodel and missing: raise RuntimeError('The exposure contains the taxonomies %s ' 'which are not in the risk model' % missing) if len(self.crmodel.taxonomies) > len(taxonomies): logging.info('Reducing risk model from %d to %d taxonomies', len(self.crmodel.taxonomies), len(taxonomies)) self.crmodel = self.crmodel.reduce(taxonomies) self.crmodel.tmap = tmap_lst self.crmodel.reduce_cons_model(self.assetcol.tagcol) if hasattr(self, 'sitecol') and self.sitecol: if 'site_model' in oq.inputs: assoc_dist = (oq.region_grid_spacing * 1.414 if oq.region_grid_spacing else 5 ) # Graeme's 5km sm = readinput.get_site_model(oq) self.sitecol.complete.assoc(sm, assoc_dist) self.datastore['sitecol'] = self.sitecol # store amplification functions if any self.af = None if 'amplification' in oq.inputs: logging.info('Reading %s', oq.inputs['amplification']) df = readinput.get_amplification(oq) check_amplification(df, self.sitecol) self.amplifier = Amplifier(oq.imtls, df, oq.soil_intensities) if oq.amplification_method == 'kernel': # TODO: need to add additional checks on the main calculation # methodology since the kernel method is currently tested only # for classical PSHA self.af = AmplFunction.from_dframe(df) self.amplifier = None else: self.amplifier = None # manage secondary perils sec_perils = oq.get_sec_perils() for sp in sec_perils: sp.prepare(self.sitecol) # add columns as needed mal = { lt: getdefault(oq.minimum_asset_loss, lt) for lt in oq.loss_names } if mal: logging.info('minimum_asset_loss=%s', mal) self.param = dict(individual_curves=oq.individual_curves, ps_grid_spacing=oq.ps_grid_spacing, collapse_level=oq.collapse_level, split_sources=oq.split_sources, avg_losses=oq.avg_losses, amplifier=self.amplifier, sec_perils=sec_perils, ses_seed=oq.ses_seed, minimum_asset_loss=mal) # compute exposure stats if hasattr(self, 'assetcol'): save_agg_values(self.datastore, self.assetcol, oq.loss_names, oq.aggregate_by)
def weight(rec, md=getdefault(maxdist, trt_by_grp[grp_id])): xyz = spherical_to_cartesian(*rec['hypo']) nsites = len(kdt.query_ball_point(xyz, md, eps=.001)) return rec['n_occ'] * numpy.ceil((nsites + 1) / 1000)
def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ oq = self.oqparam if oq.hazard_calculation_id and not oq.compare_with_classical: with util.read(self.oqparam.hazard_calculation_id) as parent: self.csm_info = parent['csm_info'] self.calc_stats() # post-processing return {} mags = self.datastore['source_mags'][()] if len(mags) == 0: # everything was discarded raise RuntimeError('All sources were discarded!?') gsims_by_trt = self.csm_info.get_gsims_by_trt() dist_bins = { trt: oq.maximum_distance.get_dist_bins(trt) for trt in gsims_by_trt } if oq.pointsource_distance: logging.info('Computing effect of the ruptures') mon = self.monitor('rupture effect') effect = parallel.Starmap.apply( get_effect_by_mag, (mags, self.sitecol.one(), gsims_by_trt, oq.maximum_distance, oq.imtls, mon)).reduce() self.datastore['effect'] = effect self.datastore.set_attrs('effect', **dist_bins) self.effect = { trt: Effect({mag: effect[mag][:, t] for mag in effect}, dist_bins[trt], getdefault(oq.pointsource_distance, trt)) for t, trt in enumerate(gsims_by_trt) } for trt, eff in self.effect.items(): oq.maximum_distance.magdist[trt] = eff.dist_by_mag() oq.pointsource_distance[trt] = eff.dist_by_mag( eff.collapse_value) else: self.effect = {} smap = parallel.Starmap(self.core_task.__func__, h5=self.datastore.hdf5, num_cores=oq.num_cores) smap.task_queue = list(self.gen_task_queue()) # really fast acc0 = self.acc0() # create the rup/ datasets BEFORE swmr_on() self.datastore.swmr_on() smap.h5 = self.datastore.hdf5 self.calc_times = AccumDict(accum=numpy.zeros(3, F32)) try: acc = smap.get_results().reduce(self.agg_dicts, acc0) self.store_rlz_info(acc.eff_ruptures) finally: with self.monitor('store source_info'): self.store_source_info(self.calc_times) if self.by_task: logging.info('Storing by_task information') num_tasks = max(self.by_task) + 1, er = self.datastore.create_dset('by_task/eff_ruptures', U32, num_tasks) es = self.datastore.create_dset('by_task/eff_sites', U32, num_tasks) si = self.datastore.create_dset('by_task/srcids', hdf5.vuint32, num_tasks, fillvalue=None) for task_no, rec in self.by_task.items(): effrups, effsites, srcids = rec er[task_no] = effrups es[task_no] = effsites si[task_no] = srcids self.by_task.clear() numrups = sum(arr[0] for arr in self.calc_times.values()) numsites = sum(arr[1] for arr in self.calc_times.values()) logging.info('Effective number of ruptures: %d/%d', numrups, self.totrups) logging.info('Effective number of sites per rupture: %d', numsites / numrups) self.calc_times.clear() # save a bit of memory return acc
def event_based(proxies, full_lt, oqparam, dstore, monitor): """ Compute GMFs and optionally hazard curves """ alldata = AccumDict(accum=[]) sig_eps = [] times = [] # rup_id, nsites, dt hcurves = {} # key -> poes trt_smr = proxies[0]['trt_smr'] fmon = monitor('filtering ruptures', measuremem=False) cmon = monitor('computing gmfs', measuremem=False) with dstore: trt = full_lt.trts[trt_smr // len(full_lt.sm_rlzs)] srcfilter = SourceFilter(dstore['sitecol'], oqparam.maximum_distance(trt)) rupgeoms = dstore['rupgeoms'] rlzs_by_gsim = full_lt._rlzs_by_gsim(trt_smr) param = vars(oqparam).copy() param['imtls'] = oqparam.imtls param['min_iml'] = oqparam.min_iml param['maximum_distance'] = oqparam.maximum_distance(trt) cmaker = ContextMaker(trt, rlzs_by_gsim, param) min_mag = getdefault(oqparam.minimum_magnitude, trt) for proxy in proxies: t0 = time.time() with fmon: if proxy['mag'] < min_mag: continue sids = srcfilter.close_sids(proxy, trt) if len(sids) == 0: # filtered away continue proxy.geom = rupgeoms[proxy['geom_id']] ebr = proxy.to_ebr(cmaker.trt) # after the geometry is set try: computer = GmfComputer(ebr, srcfilter.sitecol.filtered(sids), cmaker, oqparam.correl_model, oqparam.cross_correl, oqparam._amplifier, oqparam._sec_perils) except FarAwayRupture: continue with cmon: data = computer.compute_all(sig_eps) dt = time.time() - t0 times.append((computer.ebrupture.id, len(computer.ctx.sids), dt)) for key in data: alldata[key].extend(data[key]) for key, val in sorted(alldata.items()): if key in 'eid sid rlz': alldata[key] = U32(alldata[key]) else: alldata[key] = F32(alldata[key]) gmfdata = strip_zeros(pandas.DataFrame(alldata)) if len(gmfdata) and oqparam.hazard_curves_from_gmfs: hc_mon = monitor('building hazard curves', measuremem=False) for (sid, rlz), df in gmfdata.groupby(['sid', 'rlz']): with hc_mon: poes = calc.gmvs_to_poes(df, oqparam.imtls, oqparam.ses_per_logic_tree_path) for m, imt in enumerate(oqparam.imtls): hcurves[rsi2str(rlz, sid, imt)] = poes[m] times = numpy.array([tup + (monitor.task_no, ) for tup in times], time_dt) times.sort(order='rup_id') if not oqparam.ground_motion_fields: gmfdata = () return dict(gmfdata=gmfdata, hcurves=hcurves, times=times, sig_eps=numpy.array(sig_eps, sig_eps_dt(oqparam.imtls)))
def gen_task_queue(self): """ Build a task queue to be attached to the Starmap instance """ oq = self.oqparam gsims_by_trt = self.full_lt.get_gsims_by_trt() src_groups = self.csm.src_groups def srcweight(src): trt = src.tectonic_region_type g = len(gsims_by_trt[trt]) m = (oq.maximum_distance(trt) / 300)**2 return src.weight * g * m logging.info('Weighting the sources') totweight = sum(sum(srcweight(src) for src in sg) for sg in src_groups) param = dict(truncation_level=oq.truncation_level, imtls=oq.imtls, filter_distance=oq.filter_distance, reqv=oq.get_reqv(), maximum_distance=oq.maximum_distance, pointsource_distance=oq.pointsource_distance, shift_hypo=oq.shift_hypo, max_weight=oq.max_weight, collapse_ctxs=oq.collapse_ctxs, max_sites_disagg=oq.max_sites_disagg) srcfilter = self.src_filter(self.datastore.tempname) C = oq.concurrent_tasks or 1 if oq.calculation_mode == 'preclassical': f1 = f2 = preclassical C *= 50 # use more tasks because there will be slow tasks elif oq.disagg_by_src: # do not split the sources f1, f2 = classical, classical else: f1, f2 = classical, classical_split_filter for sg in src_groups: gsims = gsims_by_trt[sg.trt] if sg.atomic: # do not split atomic groups nb = 1 yield f1, (sg, srcfilter, gsims, param) else: # regroup the sources in blocks blks = (groupby(sg, operator.attrgetter('source_id')).values() if oq.disagg_by_src else block_splitter( sg, totweight / C, srcweight)) blocks = list(blks) nb = len(blocks) for block in blocks: logging.debug('Sending %d source(s) with weight %d', len(block), sum(src.weight for src in block)) yield f2, (block, srcfilter, gsims, param) w = sum(src.weight for src in sg) logging.info('TRT = %s', sg.trt) if oq.maximum_distance.magdist: md = ', '.join('%s->%d' % item for item in sorted( oq.maximum_distance.magdist[sg.trt].items())) else: md = oq.maximum_distance(sg.trt) logging.info( 'max_dist={}, gsims={}, weight={:,d}, blocks={}'.format( md, len(gsims), int(w), nb)) if oq.pointsource_distance['default']: psd = getdefault(oq.pointsource_distance, sg.trt) msg = ', '.join('%s->%d' % it for it in sorted(psd.items())) logging.info('ps_dist=%s', msg)
def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ oq = self.oqparam if oq.hazard_calculation_id and not oq.compare_with_classical: with util.read(self.oqparam.hazard_calculation_id) as parent: self.csm_info = parent['csm_info'] self.calc_stats() # post-processing return {} mags = self.datastore['source_mags'][()] gsims_by_trt = self.csm_info.get_gsims_by_trt() dist_bins = { trt: oq.maximum_distance.get_dist_bins(trt) for trt in gsims_by_trt } if oq.pointsource_distance and len(mags): logging.info('Computing effect of the ruptures') mon = self.monitor('rupture effect') effect = parallel.Starmap.apply( get_effect_by_mag, (mags, self.sitecol.one(), gsims_by_trt, oq.maximum_distance, oq.imtls, mon)).reduce() self.datastore['effect'] = effect self.datastore.set_attrs('effect', **dist_bins) self.effect = { trt: Effect({mag: effect[mag][:, t] for mag in effect}, dist_bins[trt], getdefault(oq.pointsource_distance, trt)) for t, trt in enumerate(gsims_by_trt) } for trt, eff in self.effect.items(): oq.maximum_distance.magdist[trt] = eff.dist_by_mag() oq.pointsource_distance[trt] = eff.dist_by_mag( eff.collapse_value) else: self.effect = {} if oq.calculation_mode == 'preclassical' and self.N == 1: smap = parallel.Starmap(ruptures_by_mag_dist) for func, args in self.gen_task_queue(): smap.submit(args) counts = smap.reduce() ndists = oq.maximum_distance.get_dist_bins.__defaults__[0] for mag, mag in enumerate(mags): arr = numpy.zeros((ndists, len(gsims_by_trt)), U32) for trti, trt in enumerate(gsims_by_trt): try: arr[:, trti] = counts[trt][mag] except KeyError: pass self.datastore['rups_by_mag_dist/' + mag] = arr self.datastore.set_attrs('rups_by_mag_dist', **dist_bins) self.datastore['csm_info'] = self.csm_info return {} smap = parallel.Starmap(self.core_task.__func__, h5=self.datastore.hdf5) smap.task_queue = list(self.gen_task_queue()) # really fast acc0 = self.acc0() # create the rup/ datasets BEFORE swmr_on() self.datastore.swmr_on() smap.h5 = self.datastore.hdf5 self.calc_times = AccumDict(accum=numpy.zeros(3, F32)) self.maxdists = [] try: acc = smap.get_results().reduce(self.agg_dicts, acc0) self.store_rlz_info(acc.eff_ruptures) finally: if self.maxdists: maxdist = numpy.mean(self.maxdists) logging.info( 'Using effective maximum distance for ' 'point sources %d km', maxdist) with self.monitor('store source_info'): self.store_source_info(self.calc_times) if self.sources_by_task: num_tasks = max(self.sources_by_task) + 1 sbt = numpy.zeros(num_tasks, [('eff_ruptures', U32), ('eff_sites', U32), ('srcids', hdf5.vuint32)]) for task_no in range(num_tasks): sbt[task_no] = self.sources_by_task.get( task_no, (0, 0, U32([]))) self.datastore['sources_by_task'] = sbt self.sources_by_task.clear() numrups = sum(arr[0] for arr in self.calc_times.values()) if self.totrups != numrups: logging.info('Considered %d/%d ruptures', numrups, self.totrups) self.calc_times.clear() # save a bit of memory return acc
def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ oq = self.oqparam if oq.hazard_calculation_id and not oq.compare_with_classical: with util.read(self.oqparam.hazard_calculation_id) as parent: self.full_lt = parent['full_lt'] self.calc_stats() # post-processing return {} mags = self.datastore['source_mags'] # by TRT if len(mags) == 0: # everything was discarded raise RuntimeError('All sources were discarded!?') gsims_by_trt = self.full_lt.get_gsims_by_trt() if oq.pointsource_distance is not None: for trt in gsims_by_trt: oq.pointsource_distance[trt] = getdefault( oq.pointsource_distance, trt) mags_by_trt = {} for trt in mags: mags_by_trt[trt] = mags[trt][()] imts_with_period = [imt for imt in oq.imtls if imt == 'PGA' or imt.startswith('SA')] imts_ok = len(imts_with_period) == len(oq.imtls) if (imts_ok and oq.pointsource_distance and oq.pointsource_distance.suggested()) or ( imts_ok and oq.minimum_intensity): aw, self.psd = get_effect( mags_by_trt, self.sitecol.one(), gsims_by_trt, oq) if len(vars(aw)) > 1: # more than _extra self.datastore['effect_by_mag_dst'] = aw elif oq.pointsource_distance: self.psd = oq.pointsource_distance.interp(mags_by_trt) else: self.psd = {} smap = parallel.Starmap(classical, h5=self.datastore.hdf5, num_cores=oq.num_cores) self.submit_tasks(smap) acc0 = self.acc0() # create the rup/ datasets BEFORE swmr_on() self.datastore.swmr_on() smap.h5 = self.datastore.hdf5 self.calc_times = AccumDict(accum=numpy.zeros(3, F32)) try: acc = smap.reduce(self.agg_dicts, acc0) self.store_rlz_info(acc.eff_ruptures) finally: with self.monitor('store source_info'): self.store_source_info(self.calc_times) if self.by_task: logging.info('Storing by_task information') num_tasks = max(self.by_task) + 1, er = self.datastore.create_dset('by_task/eff_ruptures', U32, num_tasks) es = self.datastore.create_dset('by_task/eff_sites', U32, num_tasks) si = self.datastore.create_dset('by_task/srcids', hdf5.vstr, num_tasks, fillvalue=None) for task_no, rec in self.by_task.items(): effrups, effsites, srcids = rec er[task_no] = effrups es[task_no] = effsites si[task_no] = ' '.join(srcids) self.by_task.clear() self.numrups = sum(arr[0] for arr in self.calc_times.values()) numsites = sum(arr[1] for arr in self.calc_times.values()) logging.info('Effective number of ruptures: {:_d}/{:_d}'.format( int(self.numrups), self.totrups)) logging.info('Effective number of sites per rupture: %d', numsites / self.numrups) if self.psd: psdist = max(max(self.psd[trt].values()) for trt in self.psd) if psdist != -1 and self.maxradius >= psdist / 2: logging.warning('The pointsource_distance of %d km is too ' 'small compared to a maxradius of %d km', psdist, self.maxradius) self.calc_times.clear() # save a bit of memory return acc
def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ oq = self.oqparam if oq.hazard_calculation_id and not oq.compare_with_classical: with util.read(self.oqparam.hazard_calculation_id) as parent: self.csm_info = parent['csm_info'] self.calc_stats() # post-processing return {} mags = self.datastore['source_mags'][()] if len(mags) == 0: # everything was discarded raise RuntimeError('All sources were discarded!?') gsims_by_trt = self.csm_info.get_gsims_by_trt() dist_bins = {trt: oq.maximum_distance.get_dist_bins(trt) for trt in gsims_by_trt} # computing the effect make sense only if all IMTs have the same # unity of measure; for simplicity we will consider only PGA and SA self.effect = {} imts_with_period = [imt for imt in oq.imtls if imt == 'PGA' or imt.startswith('SA')] imts_ok = len(imts_with_period) == len(oq.imtls) if len(self.sitecol) >= oq.max_sites_disagg and imts_ok: logging.info('Computing effect of the ruptures') mon = self.monitor('rupture effect') effect = parallel.Starmap.apply( get_effect_by_mag, (mags, self.sitecol.one(), gsims_by_trt, oq.maximum_distance, oq.imtls, mon)).reduce() self.datastore['effect_by_mag_dst_trt'] = effect self.datastore.set_attrs('effect_by_mag_dst_trt', **dist_bins) self.effect.update({ trt: Effect({mag: effect[mag][:, t] for mag in effect}, dist_bins[trt]) for t, trt in enumerate(gsims_by_trt)}) minint = oq.minimum_intensity.get('default', 0) for trt, eff in self.effect.items(): if minint: oq.maximum_distance.magdist[trt] = eff.dist_by_mag(minint) # replace pointsource_distance with a dict trt -> mag -> dst if oq.pointsource_distance['default']: oq.pointsource_distance[trt] = eff.dist_by_mag( eff.collapse_value(oq.pointsource_distance['default'])) elif oq.pointsource_distance['default']: # replace pointsource_distance with a dict trt -> mag -> dst for trt in gsims_by_trt: try: dst = getdefault(oq.pointsource_distance, trt) except TypeError: # 'NoneType' object is not subscriptable dst = getdefault(oq.maximum_distance, trt) oq.pointsource_distance[trt] = {mag: dst for mag in mags} smap = parallel.Starmap( self.core_task.__func__, h5=self.datastore.hdf5, num_cores=oq.num_cores) smap.task_queue = list(self.gen_task_queue()) # really fast acc0 = self.acc0() # create the rup/ datasets BEFORE swmr_on() self.datastore.swmr_on() smap.h5 = self.datastore.hdf5 self.calc_times = AccumDict(accum=numpy.zeros(3, F32)) try: acc = smap.get_results().reduce(self.agg_dicts, acc0) self.store_rlz_info(acc.eff_ruptures) finally: with self.monitor('store source_info'): self.store_source_info(self.calc_times) if self.by_task: logging.info('Storing by_task information') num_tasks = max(self.by_task) + 1, er = self.datastore.create_dset('by_task/eff_ruptures', U32, num_tasks) es = self.datastore.create_dset('by_task/eff_sites', U32, num_tasks) si = self.datastore.create_dset('by_task/srcids', hdf5.vuint32, num_tasks, fillvalue=None) for task_no, rec in self.by_task.items(): effrups, effsites, srcids = rec er[task_no] = effrups es[task_no] = effsites si[task_no] = srcids self.by_task.clear() self.numrups = sum(arr[0] for arr in self.calc_times.values()) numsites = sum(arr[1] for arr in self.calc_times.values()) logging.info('Effective number of ruptures: %d/%d', self.numrups, self.totrups) logging.info('Effective number of sites per rupture: %d', numsites / self.numrups) self.calc_times.clear() # save a bit of memory return acc