class ScenarioRiskCalculator(base.RiskCalculator): """ Run a scenario risk calculation """ core_func = scenario_risk epsilon_matrix = datastore.persistent_attribute('epsilon_matrix') losses_by_key = datastore.persistent_attribute('losses_by_key') gmf_by_trt_gsim = datastore.persistent_attribute('gmf_by_trt_gsim') pre_calculator = 'scenario' is_stochastic = True def pre_execute(self): """ Compute the GMFs, build the epsilons, the riskinputs, and a dictionary with the unit of measure, used in the export phase. """ if 'gmfs' in self.oqparam.inputs: self.pre_calculator = None base.RiskCalculator.pre_execute(self) logging.info('Building the epsilons') eps_dict = self.make_eps_dict( self.oqparam.number_of_ground_motion_fields) self.epsilon_matrix = numpy.array( [eps_dict[a['asset_ref']] for a in self.assetcol]) self.riskinputs = self.build_riskinputs(base.get_gmfs(self), eps_dict) def post_execute(self, result): """ Export the loss curves and the aggregated losses in CSV format """ self.losses_by_key = result
class ClassicalDamageCalculator(classical_risk.ClassicalRiskCalculator): """ Scenario damage calculator """ core_task = classical_damage damages = datastore.persistent_attribute('damages-rlzs') def check_poes(self, curves_by_trt_gsim): """ Raise an error if one PoE = 1, since it would produce a log(0) in :class:`openquake.risklib.scientific.annual_frequency_of_exceedence` """ for key, curves in curves_by_trt_gsim.items(): for imt in self.oqparam.imtls: for sid, poes in enumerate(curves[imt]): if (poes == 1).any(): raise ValueError('Found a PoE=1 for site_id=%d, %s' % (sid, imt)) def post_execute(self, result): """ Export the result in CSV format. :param result: a dictionary asset -> fractions per damage state """ damages_dt = numpy.dtype([(ds, numpy.float32) for ds in self.riskmodel.damage_states]) damages = numpy.zeros((self.N, self.R), damages_dt) for r in result: for aid, fractions in result[r].items(): damages[aid, r] = tuple(fractions) self.damages = damages
class ClassicalRiskCalculator(base.RiskCalculator): """ Classical Risk calculator """ pre_calculator = 'classical' avg_losses = datastore.persistent_attribute('avg_losses/rlzs') core_func = classical_risk def pre_execute(self): """ Associate the assets to the sites and build the riskinputs. """ super(ClassicalRiskCalculator, self).pre_execute() hazard_from_csv = 'hazard_curves' in self.oqparam.inputs if hazard_from_csv: self.sitecol, hcurves_by_imt = readinput.get_sitecol_hcurves( self.oqparam) self.sitecol, self.assets_by_site = \ self.assoc_assets_sites(self.sitecol) logging.info('Preparing the risk input') curves_by_trt_gsim = {} for dset in self.datastore['curves_by_sm'].values(): for key, curves in dset.items(): trt_id, gsim = key.split('-') curves_by_trt_gsim[int(trt_id), gsim] = curves.value self.riskinputs = self.build_riskinputs(curves_by_trt_gsim) def post_execute(self, result): """ Save the losses in a compact form. :param result: a dictionary rlz_idx -> (loss_type, asset_id) -> (avg, ins) """ fields = [] for loss_type in self.riskmodel.get_loss_types(): fields.append(('avg_loss~%s' % loss_type, float)) fields.append(('ins_loss~%s' % loss_type, float)) avg_loss_dt = numpy.dtype(fields) num_rlzs = len(self.rlzs_assoc.realizations) assets = riskinput.sorted_assets(self.assets_by_site) self.asset_no_by_id = {a.id: no for no, a in enumerate(assets)} avg_losses = numpy.zeros((num_rlzs, len(self.asset_no_by_id)), avg_loss_dt) for rlz_no in result: losses_by_lt_asset = result[rlz_no] by_asset = operator.itemgetter(1) for asset, keys in general.groupby(losses_by_lt_asset, by_asset).items(): asset_no = self.asset_no_by_id[asset] losses = [] for (loss_type, _) in keys: losses.extend(losses_by_lt_asset[loss_type, asset]) avg_losses[rlz_no][asset_no] = tuple(losses) self.avg_losses = avg_losses
class ScenarioRiskCalculator(base.RiskCalculator): """ Run a scenario risk calculation """ core_func = scenario_risk epsilon_matrix = datastore.persistent_attribute('epsilon_matrix') pre_calculator = 'scenario' is_stochastic = True def pre_execute(self): """ Compute the GMFs, build the epsilons, the riskinputs, and a dictionary with the unit of measure, used in the export phase. """ if 'gmfs' in self.oqparam.inputs: self.pre_calculator = None base.RiskCalculator.pre_execute(self) logging.info('Building the epsilons') self.epsilon_matrix = self.make_eps( self.oqparam.number_of_ground_motion_fields) sitecol, gmfs = base.get_gmfs(self) self.riskinputs = self.build_riskinputs(gmfs, self.epsilon_matrix) def post_execute(self, result): """ Compute stats for the aggregated distributions and save the results on the datastore. """ ltypes = self.riskmodel.loss_types multi_stat_dt = numpy.dtype([(lt, stat_dt) for lt in ltypes]) with self.monitor('saving outputs', autoflush=True): R = len(self.rlzs_assoc.realizations) N = len(self.assetcol) # agg losses agglosses = numpy.zeros(R, multi_stat_dt) mean, std = scientific.mean_std(result['agg']) for l, lt in enumerate(ltypes): agg = agglosses[lt] agg['mean'] = mean[l, :, 0] agg['stddev'] = std[l, :, 0] agg['mean_ins'] = mean[l, :, 1] agg['stddev_ins'] = std[l, :, 1] # average losses avglosses = numpy.zeros((N, R), multi_stat_dt) for (l, r, aid, stat) in result['avg']: avglosses[ltypes[l]][aid, r] = stat self.datastore['avglosses-rlzs'] = avglosses self.datastore['agglosses-rlzs'] = agglosses
class ScenarioRiskCalculator(base.RiskCalculator): """ Run a scenario risk calculation """ core_func = scenario_risk epsilon_matrix = datastore.persistent_attribute('epsilon_matrix') pre_calculator = 'scenario' is_stochastic = True def pre_execute(self): """ Compute the GMFs, build the epsilons, the riskinputs, and a dictionary with the unit of measure, used in the export phase. """ if 'gmfs' in self.oqparam.inputs: self.pre_calculator = None base.RiskCalculator.pre_execute(self) logging.info('Building the epsilons') eps_dict = self.make_eps_dict( self.oqparam.number_of_ground_motion_fields) self.epsilon_matrix = numpy.array( [eps_dict[a['asset_ref']] for a in self.assetcol]) self.riskinputs = self.build_riskinputs(self.gmfs, eps_dict) def post_execute(self, result): """ Compute stats for the aggregated distributions and save the results on the datastore. """ with self.monitor('saving outputs', autoflush=True): L = len(self.riskmodel.loss_types) R = len(self.rlzs_assoc.realizations) N = len(self.assetcol) arr = dict(avg=numpy.zeros((N, L, R), stat_dt), agg=numpy.zeros((L, R), stat_dt)) for (l, r), res in result.items(): for keytype, key in res: if keytype == 'agg': agg_losses = arr[keytype][l, r] mean, std = scientific.mean_std(res[keytype, key]) if key == 0: agg_losses['mean'] = mean agg_losses['stddev'] = std else: agg_losses['mean_ins'] = mean agg_losses['stddev_ins'] = std else: arr[keytype][key, l, r] = res[keytype, key] self.datastore['avglosses'] = arr['avg'] self.datastore['agglosses'] = arr['agg']
class ScenarioDamageCalculator(base.RiskCalculator): """ Scenario damage calculator """ pre_calculator = 'scenario' core_func = scenario_damage damages_by_key = datastore.persistent_attribute('damages_by_key') is_stochastic = True def pre_execute(self): if 'gmfs' in self.oqparam.inputs: self.pre_calculator = None base.RiskCalculator.pre_execute(self) self.riskinputs = self.build_riskinputs(base.get_gmfs(self)) def post_execute(self, result): self.damages_by_key = result
class ClassicalDamageCalculator(base.RiskCalculator): """ Scenario damage calculator """ core_func = classical_damage damages_by_rlz = datastore.persistent_attribute('damages_by_rlz') def pre_execute(self): """ Read the curves and build the riskinputs. """ super(ClassicalDamageCalculator, self).pre_execute() logging.info('Reading hazard curves from CSV') sites, hcurves_by_imt = readinput.get_sitecol_hcurves(self.oqparam) with self.monitor('assoc_assets_sites'): sitecol, assets_by_site = self.assoc_assets_sites(sites) num_assets = sum(len(assets) for assets in assets_by_site) num_sites = len(sitecol) logging.info('Associated %d assets to %d sites', num_assets, num_sites) logging.info('Preparing the risk input') self.riskinputs = self.build_riskinputs({ (0, 'FromFile'): hcurves_by_imt }) fake_rlz = logictree.Realization(value=('FromFile', ), weight=1, lt_path=('', ), ordinal=0, lt_uid=('*', )) self.rlzs_assoc = logictree.RlzsAssoc([fake_rlz]) def post_execute(self, result): """ Export the result in CSV format. :param result: a dictionary asset -> fractions per damage state """ self.damages_by_rlz = result
class ClassicalDamageCalculator(classical_risk.ClassicalRiskCalculator): """ Scenario damage calculator """ core_func = classical_damage damages = datastore.persistent_attribute('damages-rlzs') def post_execute(self, result): """ Export the result in CSV format. :param result: a dictionary asset -> fractions per damage state """ damages_dt = numpy.dtype([(ds, numpy.float32) for ds in self.riskmodel.damage_states]) damages = numpy.zeros((self.N, self.R), damages_dt) for r in result: for aid, fractions in result[r].items(): damages[aid, r] = tuple(fractions) self.damages = damages
class EventBasedRuptureCalculator(base.HazardCalculator): """ Event based PSHA calculator generating the ruptures only """ core_func = compute_ruptures tags = datastore.persistent_attribute('tags') sescollection = datastore.persistent_attribute('sescollection') num_ruptures = datastore.persistent_attribute('num_ruptures') counts_per_rlz = datastore.persistent_attribute('counts_per_rlz') is_stochastic = True def pre_execute(self): """ Set a seed on each source """ super(EventBasedRuptureCalculator, self).pre_execute() rnd = random.Random() rnd.seed(self.oqparam.random_seed) for src in self.csm.get_sources(): src.seed = rnd.randint(0, MAX_INT) def execute(self): """ Run in parallel `core_func(sources, sitecol, info, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ monitor = self.monitor(self.core_func.__name__) monitor.oqparam = self.oqparam sources = self.csm.get_sources() ruptures_by_trt = parallel.apply_reduce( self.core_func.__func__, (sources, self.sitecol, self.rlzs_assoc.csm_info, monitor), concurrent_tasks=self.oqparam.concurrent_tasks, weight=operator.attrgetter('weight'), key=operator.attrgetter('trt_model_id')) store_source_chunks(self.datastore) logging.info('Generated %d SESRuptures', sum(len(v) for v in ruptures_by_trt.values())) self.rlzs_assoc = self.csm.get_rlzs_assoc( lambda trt: len(ruptures_by_trt.get(trt.id, []))) return ruptures_by_trt def post_execute(self, result): """ Save the SES collection and the array counts_per_rlz """ nc = self.rlzs_assoc.csm_info.num_collections sescollection = numpy.array([{} for col_id in range(nc)]) tags = [] ordinal = 0 for trt_id in sorted(result): for sr in sorted(result[trt_id]): sr.ordinal = ordinal ordinal += 1 sescollection[sr.col_id][sr.tag] = sr tags.append(sr.tag) if len(sr.tag) > 100: logging.error( 'The tag %s is long %d characters, it will be ' 'truncated to 100 characters in the /tags array', sr.tag, len(sr.tag)) logging.info('Saving the SES collection') with self.monitor('saving ruptures', autoflush=True): self.tags = numpy.array(tags, (bytes, 100)) self.sescollection = sescollection with self.monitor('counts_per_rlz'): self.num_ruptures = numpy.array(list(map(len, sescollection))) self.counts_per_rlz = counts_per_rlz(len(self.sitecol), self.rlzs_assoc, sescollection) self.datastore['counts_per_rlz'].attrs[ 'gmfs_nbytes'] = get_gmfs_nbytes(len(self.sitecol), len(self.oqparam.imtls), self.rlzs_assoc, sescollection)
class ClassicalRiskCalculator(base.RiskCalculator): """ Classical Risk calculator """ pre_calculator = 'classical' avg_losses = datastore.persistent_attribute('avg_losses-rlzs') core_func = classical_risk def pre_execute(self): """ Associate the assets to the sites and build the riskinputs. """ if 'hazard_curves' in self.oqparam.inputs: # read hazard from file haz_sitecol, haz_curves = readinput.get_hcurves(self.oqparam) self.read_exposure() # define .assets_by_site self.load_riskmodel() self.sitecol, self.assets_by_site = self.assoc_assets_sites( haz_sitecol) curves_by_trt_gsim = {(0, 'FromFile'): haz_curves} self.rlzs_assoc = logictree.trivial_rlzs_assoc() self.save_mesh() else: # compute hazard super(ClassicalRiskCalculator, self).pre_execute() logging.info('Preparing the risk input') curves_by_trt_gsim = {} for dset in self.datastore['curves_by_sm'].values(): for key, curves in dset.items(): trt_id, gsim = key.split('-') curves_by_trt_gsim[int(trt_id), gsim] = curves.value self.assetcol = riskinput.build_asset_collection( self.assets_by_site, self.oqparam.time_event) self.riskinputs = self.build_riskinputs(curves_by_trt_gsim) self.monitor.oqparam = self.oqparam self.N = sum(len(assets) for assets in self.assets_by_site) self.L = len(self.riskmodel.loss_types) self.R = len(self.rlzs_assoc.realizations) self.I = self.oqparam.insured_losses self.Q1 = len(self.oqparam.quantile_loss_curves) + 1 def post_execute(self, result): """ Save the losses in a compact form. """ self.loss_curve_dt, self.loss_maps_dt = ( self.riskmodel.build_loss_dtypes( self.oqparam.conditional_loss_poes, self.I)) self.save_loss_curves(result) if self.oqparam.conditional_loss_poes: self.save_loss_maps(result) def save_loss_curves(self, result): """ Saving loss curves in the datastore. :param result: aggregated result of the task classical_risk """ ltypes = self.riskmodel.loss_types loss_curves = numpy.zeros((self.N, self.R), self.loss_curve_dt) for l, r, aid, lcurve in result['loss_curves']: loss_curves_lt = loss_curves[ltypes[l]] for i, name in enumerate(loss_curves_lt.dtype.names): loss_curves_lt[name][aid, r] = lcurve[i] self.datastore['loss_curves-rlzs'] = loss_curves # loss curves stats if self.R > 1: stat_curves = numpy.zeros((self.Q1, self.N), self.loss_curve_dt) for l, aid, statcurve in result['stat_curves']: stat_curves_lt = stat_curves[ltypes[l]] for name in stat_curves_lt.dtype.names: for s in range(self.Q1): stat_curves_lt[name][s, aid] = statcurve[name][s] self.datastore['loss_curves-stats'] = stat_curves def save_loss_maps(self, result): """ Saving loss maps in the datastore. :param result: aggregated result of the task classical_risk """ ltypes = self.riskmodel.loss_types loss_maps = numpy.zeros((self.N, self.R), self.loss_maps_dt) for l, r, aid, lmaps in result['loss_maps']: loss_maps_lt = loss_maps[ltypes[l]] for i, name in enumerate(loss_maps_lt.dtype.names): loss_maps_lt[name][aid, r] = lmaps[i] self.datastore['loss_maps-rlzs'] = loss_maps # loss maps stats if self.R > 1: stat_maps = numpy.zeros((self.Q1, self.N), self.loss_maps_dt) for l, aid, statmaps in result['stat_maps']: statmaps_lt = stat_maps[ltypes[l]] for name in statmaps_lt.dtype.names: for s in range(self.Q1): statmaps_lt[name][s, aid] = statmaps[name][s] self.datastore['loss_maps-stats'] = stat_maps
class EventBasedRiskCalculator(base.RiskCalculator): """ Event based PSHA calculator generating the event loss table and fixed ratios loss curves. """ pre_calculator = 'event_based' core_func = event_based_risk epsilon_matrix = datastore.persistent_attribute('epsilon_matrix') spec_indices = datastore.persistent_attribute('spec_indices') is_stochastic = True def pre_execute(self): """ Read the precomputed ruptures (or compute them on the fly) and prepare some datasets in the datastore. """ super(EventBasedRiskCalculator, self).pre_execute() if not self.riskmodel: # there is no riskmodel, exit early self.execute = lambda: None self.post_execute = lambda result: None return oq = self.oqparam if self.riskmodel.covs: epsilon_sampling = oq.epsilon_sampling else: epsilon_sampling = 1 # only one ignored epsilon correl_model = readinput.get_correl_model(oq) gsims_by_col = self.rlzs_assoc.get_gsims_by_col() assets_by_site = self.assets_by_site # the following is needed to set the asset idx attribute self.assetcol = riskinput.build_asset_collection( assets_by_site, oq.time_event) self.spec_indices = numpy.array( [a['asset_ref'] in oq.specific_assets for a in self.assetcol]) logging.info('Populating the risk inputs') rup_by_tag = sum(self.datastore['sescollection'], AccumDict()) all_ruptures = [rup_by_tag[tag] for tag in sorted(rup_by_tag)] for i, rup in enumerate(all_ruptures): rup.ordinal = i num_samples = min(len(all_ruptures), epsilon_sampling) self.epsilon_matrix = eps = riskinput.make_eps(assets_by_site, num_samples, oq.master_seed, oq.asset_correlation) logging.info('Generated %d epsilons', num_samples * len(eps)) self.riskinputs = list( self.riskmodel.build_inputs_from_ruptures( self.sitecol.complete, all_ruptures, gsims_by_col, oq.truncation_level, correl_model, eps, oq.concurrent_tasks or 1)) logging.info('Built %d risk inputs', len(self.riskinputs)) # preparing empty datasets loss_types = self.riskmodel.loss_types self.L = len(loss_types) self.R = len(self.rlzs_assoc.realizations) self.outs = OUTPUTS self.datasets = {} # ugly: attaching an attribute needed in the task function self.monitor.num_outputs = len(self.outs) self.monitor.num_assets = self.count_assets() for o, out in enumerate(self.outs): self.datastore.hdf5.create_group(out) for l, loss_type in enumerate(loss_types): for r, rlz in enumerate(self.rlzs_assoc.realizations): key = '/%s/%s' % (loss_type, rlz.uid) if o == AGGLOSS: # loss tables dset = self.datastore.create_dset(out + key, elt_dt) elif o == SPECLOSS: # specific losses dset = self.datastore.create_dset(out + key, ela_dt) self.datasets[o, l, r] = dset def execute(self): """ Run the event_based_risk calculator and aggregate the results """ return apply_reduce(self.core_func.__func__, (self.riskinputs, self.riskmodel, self.rlzs_assoc, self.assets_by_site, self.epsilon_matrix, self.oqparam.specific_assets, self.monitor), concurrent_tasks=self.oqparam.concurrent_tasks, agg=self.agg, acc=cube(self.monitor.num_outputs, self.L, self.R, list), weight=operator.attrgetter('weight'), key=operator.attrgetter('col_id')) def agg(self, acc, result): """ Aggregate list of arrays in longer lists. :param acc: accumulator array of shape (O, L, R) :param result: a numpy array of shape (O, L, R) """ for idx, arrays in numpy.ndenumerate(result): # TODO: special case for avg_losses, they can be summed directly if idx[0] == AVGLOSS: # arrays has only 1 element acc[idx] = [sum(acc[idx] + arrays)] else: acc[idx].extend(arrays) return acc def post_execute(self, result): """ Save the event loss table in the datastore. :param result: a numpy array of shape (O, L, R) containing lists of arrays """ insured_losses = self.oqparam.insured_losses ses_ratio = self.oqparam.ses_ratio saved = {out: 0 for out in self.outs} N = len(self.assetcol) R = len(self.rlzs_assoc.realizations) ltypes = self.riskmodel.loss_types # average losses multi_avg_dt = numpy.dtype([(lt, (F32, 2)) for lt in ltypes]) avg_losses = numpy.zeros((N, R), multi_avg_dt) # loss curves multi_lr_dt = numpy.dtype([ (ltype, (F32, cbuilder.curve_resolution)) for ltype, cbuilder in zip(ltypes, self.riskmodel.curve_builders) ]) rcurves = numpy.zeros((N, R), multi_lr_dt) icurves = numpy.zeros((N, R), multi_lr_dt) with self.monitor('saving loss table', autoflush=True, measuremem=True): for (o, l, r), data in numpy.ndenumerate(result): if not data: # empty list continue elif o == IC and not insured_losses: # no insured curves continue lt = self.riskmodel.loss_types[l] cb = self.riskmodel.curve_builders[l] if o in (AGGLOSS, SPECLOSS): # data is a list of arrays losses = numpy.concatenate(data) self.datasets[o, l, r].extend(losses) saved[self.outs[o]] += losses.nbytes elif o == AVGLOSS: # average losses avg_losses_lt = avg_losses[lt] asset_values = self.assetcol[lt] [avgloss] = data for i, avalue in enumerate(asset_values): avg_losses_lt[i, r] = tuple(avgloss[i] * avalue) elif cb.user_provided: # risk curves # data is a list of dicts asset idx -> counts poes = cb.build_poes(N, data, ses_ratio) if o == RC: rcurves[lt][:, r] = poes elif insured_losses: icurves[lt][:, r] = poes saved[self.outs[o]] += poes.nbytes self.datastore.hdf5.flush() self.datastore['avg_losses-rlzs'] = avg_losses saved['avg_losses-rlzs'] = avg_losses.nbytes self.datastore['rcurves-rlzs'] = rcurves if insured_losses: self.datastore['icurves-rlzs'] = icurves self.datastore.hdf5.flush() for out in self.outs: nbytes = saved[out] if nbytes: self.datastore[out].attrs['nbytes'] = nbytes logging.info('Saved %s in %s', humansize(nbytes), out) else: # remove empty outputs del self.datastore[out] if self.oqparam.specific_assets: self.build_specific_loss_curves( self.datastore['specific-losses-rlzs']) rlzs = self.rlzs_assoc.realizations if len(rlzs) > 1: self.compute_store_stats(rlzs, '') # generic self.compute_store_stats(rlzs, '_specific') if (self.oqparam.conditional_loss_poes and 'rcurves-rlzs' in self.datastore): self.build_loss_maps('rcurves-rlzs', 'rmaps-rlzs') if (self.oqparam.conditional_loss_poes and 'icurves-rlzs' in self.datastore): self.build_loss_maps('icurves-rlzs', 'imaps-rlzs') def build_specific_loss_curves(self, group, kind='loss'): ses_ratio = self.oqparam.ses_ratio assetcol = self.assetcol[self.spec_indices] for cb in self.riskmodel.curve_builders: for rlz, dset in group[cb.loss_type].items(): losses_by_aid = collections.defaultdict(list) for ela in dset.value: losses_by_aid[ela['ass_id']].append(ela[kind]) curves = cb.build_loss_curves(assetcol, losses_by_aid, ses_ratio) key = 'specific-loss_curves-rlzs/%s/%s' % (cb.loss_type, rlz) self.datastore[key] = curves def build_loss_maps(self, curves_key, maps_key): """ Build loss maps from the loss curves """ oq = self.oqparam rlzs = self.datastore['rlzs_assoc'].realizations curves = self.datastore[curves_key].value N = len(self.assetcol) R = len(rlzs) P = len(oq.conditional_loss_poes) loss_map_dt = numpy.dtype([(lt, (F32, P)) for lt in self.riskmodel.loss_types]) maps = numpy.zeros((N, R), loss_map_dt) for cb in self.riskmodel.curve_builders: asset_values = self.assetcol[cb.loss_type] curves_lt = curves[cb.loss_type] maps_lt = maps[cb.loss_type] for rlz in rlzs: loss_maps = scientific.calc_loss_maps( oq.conditional_loss_poes, asset_values, cb.ratios, curves_lt[:, rlz.ordinal]) for i in range(N): # NB: it does not work without the loop, there is a # ValueError: could not broadcast input array from shape # (N,1) into shape (N) maps_lt[i, rlz.ordinal] = loss_maps[i] self.datastore[maps_key] = maps # ################### methods to compute statistics #################### # def _collect_all_data(self): # return a list of list of outputs if 'rcurves-rlzs' not in self.datastore: return [] all_data = [] assets = self.assetcol['asset_ref'] rlzs = self.rlzs_assoc.realizations avg_losses = self.datastore['avg_losses-rlzs'].value r_curves = self.datastore['rcurves-rlzs'].value insured_losses = self.oqparam.insured_losses i_curves = (self.datastore['icurves-rlzs'].value if insured_losses else None) for loss_type, cbuilder in zip(self.riskmodel.loss_types, self.riskmodel.curve_builders): avglosses = avg_losses[loss_type] rcurves = r_curves[loss_type] asset_values = self.assetcol[loss_type] data = [] for rlz in rlzs: average_losses = avglosses[:, rlz.ordinal] out = scientific.Output( assets, loss_type, rlz.ordinal, rlz.weight, loss_curves=old_loss_curves(asset_values, rcurves, rlz.ordinal, cbuilder.ratios), insured_curves=old_loss_curves( asset_values, i_curves[loss_type], rlz.ordinal, cbuilder.ratios) if i_curves else None, average_losses=average_losses[:, 0], average_insured_losses=average_losses[:, 1]) data.append(out) all_data.append(data) return all_data def _collect_specific_data(self): # return a list of list of outputs if not self.oqparam.specific_assets: return [] specific_assets = set(self.oqparam.specific_assets) assetcol = self.assetcol specific_ids = [] for i, a in enumerate(self.assetcol): if a['asset_ref'] in specific_assets: specific_ids.append(i) assets = assetcol['asset_ref'] rlzs = self.rlzs_assoc.realizations specific_data = [] avglosses = self.datastore['avg_losses-rlzs'][specific_ids] for loss_type in self.riskmodel.loss_types: group = self.datastore['/specific-loss_curves-rlzs/%s' % loss_type] data = [] avglosses_lt = avglosses[loss_type] for rlz, dataset in zip(rlzs, group.values()): average_losses = avglosses_lt[:, rlz.ordinal] lcs = dataset.value losses_poes = numpy.array( # -> shape (N, 2, C) [lcs['losses'], lcs['poes']]).transpose(1, 0, 2) out = scientific.Output( assets, loss_type, rlz.ordinal, rlz.weight, loss_curves=losses_poes, insured_curves=None, # FIXME: why None? average_losses=average_losses[:, 0], average_insured_losses=average_losses[:, 1]) data.append(out) specific_data.append(data) return specific_data def compute_store_stats(self, rlzs, kind): """ Compute and store the statistical outputs """ oq = self.oqparam builder = scientific.StatsBuilder(oq.quantile_loss_curves, oq.conditional_loss_poes, [], scientific.normalize_curves_eb) if kind == '_specific': all_stats = [ builder.build(data, prefix='specific-') for data in self._collect_specific_data() ] else: all_stats = map(builder.build, self._collect_all_data()) for stat in all_stats: # there is one stat for each loss_type curves, ins_curves, maps = scientific.get_stat_curves(stat) for i, path in enumerate(stat.paths): # there are paths like # %s-stats/structural/mean # %s-stats/structural/quantile-0.1 # ... self.datastore[path % 'loss_curves'] = curves[i] if oq.insured_losses: self.datastore[path % 'ins_curves'] = ins_curves[i] if oq.conditional_loss_poes: self.datastore[path % 'loss_maps'] = maps[i] stats = scientific.SimpleStats(rlzs, oq.quantile_loss_curves) nbytes = stats.compute('avg_losses-rlzs', self.datastore) self.datastore['avg_losses-stats'].attrs['nbytes'] = nbytes self.datastore.hdf5.flush()
class ClassicalCalculator(base.HazardCalculator): """ Classical PSHA calculator """ core_task = classical source_info = datastore.persistent_attribute('source_info') def agg_dicts(self, acc, val): """ Aggregate dictionaries of hazard curves by updating the accumulator. :param acc: accumulator dictionary :param val: a nested dictionary trt_id -> ProbabilityMap """ with self.monitor('aggregate curves', autoflush=True): if hasattr(val, 'calc_times'): acc.calc_times.extend(val.calc_times) if hasattr(val, 'eff_ruptures'): acc.eff_ruptures += val.eff_ruptures for bb in getattr(val, 'bbs', []): acc.bb_dict[bb.lt_model_id, bb.site_id].update_bb(bb) acc |= val self.datastore.flush() return acc def count_eff_ruptures(self, result_dict, trt_model): """ Returns the number of ruptures in the trt_model (after filtering) or 0 if the trt_model has been filtered away. :param result_dict: a dictionary with keys (trt_id, gsim) :param trt_model: a TrtModel instance """ return (result_dict.eff_ruptures.get(trt_model.id, 0) / self.num_tiles) def zerodict(self): """ Initial accumulator, an empty ProbabilityMap """ zd = ProbabilityMap() zd.calc_times = [] zd.eff_ruptures = AccumDict() # trt_id -> eff_ruptures zd.bb_dict = {(smodel.ordinal, sid): BoundingBox(smodel.ordinal, sid) for sid in self.sitecol.sids for smodel in self.csm.source_models } if self.oqparam.poes_disagg else {} return zd def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ monitor = self.monitor.new(self.core_task.__name__) monitor.oqparam = self.oqparam curves_by_trt_id = self.taskman.reduce(self.agg_dicts, self.zerodict()) self.save_data_transfer(self.taskman) with self.monitor('store source_info', autoflush=True): self.store_source_info(curves_by_trt_id) self.rlzs_assoc = self.csm.info.get_rlzs_assoc( partial(self.count_eff_ruptures, curves_by_trt_id)) self.datastore['csm_info'] = self.csm.info return curves_by_trt_id def store_source_info(self, curves_by_trt_id): # store the information about received data received = self.taskman.received if received: tname = self.taskman.name self.datastore.save( 'job_info', { tname + '_max_received_per_task': max(received), tname + '_tot_received': sum(received), tname + '_num_tasks': len(received) }) # then save the calculation times per each source calc_times = getattr(curves_by_trt_id, 'calc_times', []) if calc_times: sources = self.csm.get_sources() info_dict = {(rec['trt_model_id'], rec['source_id']): rec for rec in self.source_info} for src_idx, dt in calc_times: src = sources[src_idx] info = info_dict[src.trt_model_id, src.source_id] info['calc_time'] += dt self.source_info = numpy.array( sorted(info_dict.values(), key=operator.itemgetter(7), reverse=True), source.source_info_dt) self.datastore.hdf5.flush() def post_execute(self, curves_by_trt_id): """ Collect the hazard curves by realization and export them. :param curves_by_trt_id: a dictionary trt_id -> hazard curves """ nsites = len(self.sitecol) imtls = self.oqparam.imtls curves_by_trt_gsim = {} with self.monitor('saving probability maps', autoflush=True): for trt_id in curves_by_trt_id: key = 'poes/%04d' % trt_id self.datastore[key] = curves_by_trt_id[trt_id] self.datastore.set_attrs(key, trt=self.csm.info.get_trt(trt_id)) gsims = self.rlzs_assoc.gsims_by_trt_id[trt_id] for i, gsim in enumerate(gsims): curves_by_trt_gsim[trt_id, gsim] = ( curves_by_trt_id[trt_id].extract(i)) self.datastore.set_nbytes('poes') with self.monitor('combine curves_by_rlz', autoflush=True): curves_by_rlz = self.rlzs_assoc.combine_curves(curves_by_trt_gsim) self.save_curves({ rlz: array_of_curves(curves, nsites, imtls) for rlz, curves in curves_by_rlz.items() }) def save_curves(self, curves_by_rlz): """ Save the dictionary curves_by_rlz """ oq = self.oqparam rlzs = self.rlzs_assoc.realizations nsites = len(self.sitecol) if oq.individual_curves: with self.monitor('save curves_by_rlz', autoflush=True): for rlz, curves in curves_by_rlz.items(): self.store_curves('rlz-%03d' % rlz.ordinal, curves, rlz) if len(rlzs) == 1: # cannot compute statistics [self.mean_curves] = curves_by_rlz.values() return with self.monitor('compute and save statistics', autoflush=True): weights = (None if oq.number_of_logic_tree_samples else [rlz.weight for rlz in rlzs]) # mean curves are always computed but stored only on request zc = zero_curves(nsites, oq.imtls) self.mean_curves = numpy.array(zc) for imt in oq.imtls: self.mean_curves[imt] = scientific.mean_curve( [curves_by_rlz.get(rlz, zc)[imt] for rlz in rlzs], weights) self.quantile = {} for q in oq.quantile_hazard_curves: self.quantile[q] = qc = numpy.array(zc) for imt in oq.imtls: curves = [curves_by_rlz[rlz][imt] for rlz in rlzs] qc[imt] = scientific.quantile_curve(curves, q, weights).reshape( (nsites, -1)) if oq.mean_hazard_curves: self.store_curves('mean', self.mean_curves) for q in self.quantile: self.store_curves('quantile-%s' % q, self.quantile[q]) def hazard_maps(self, curves): """ Compute the hazard maps associated to the curves """ maps = zero_maps(len(self.sitecol), self.oqparam.imtls, self.oqparam.poes) for imt in curves.dtype.fields: # build a matrix of size (N, P) data = calc.compute_hazard_maps(curves[imt], self.oqparam.imtls[imt], self.oqparam.poes) for poe, hmap in zip(self.oqparam.poes, data.T): maps['%s-%s' % (imt, poe)] = hmap return maps def store_curves(self, kind, curves, rlz=None): """ Store all kind of curves, optionally computing maps and uhs curves. :param kind: the kind of curves to store :param curves: an array of N curves to store :param rlz: hazard realization, if any """ oq = self.oqparam self._store('hcurves/' + kind, curves, rlz, nbytes=curves.nbytes) self.datastore['hcurves'].attrs['imtls'] = [ (imt, len(imls)) for imt, imls in self.oqparam.imtls.items() ] if oq.hazard_maps or oq.uniform_hazard_spectra: # hmaps is a composite array of shape (N, P) hmaps = self.hazard_maps(curves) self._store('hmaps/' + kind, hmaps, rlz, poes=oq.poes, nbytes=hmaps.nbytes) def _store(self, name, curves, rlz, **kw): self.datastore.hdf5[name] = curves dset = self.datastore.hdf5[name] if rlz is not None: dset.attrs['uid'] = rlz.uid for k, v in kw.items(): dset.attrs[k] = v
class EventBasedRiskCalculator(base.RiskCalculator): """ Event based PSHA calculator generating the event loss table and fixed ratios loss curves. """ pre_calculator = 'event_based_rupture' core_func = event_based_risk epsilon_matrix = datastore.persistent_attribute('epsilon_matrix') spec_indices = datastore.persistent_attribute('spec_indices') is_stochastic = True def pre_execute(self): """ Read the precomputed ruptures (or compute them on the fly) and prepare some datasets in the datastore. """ super(EventBasedRiskCalculator, self).pre_execute() if not self.riskmodel: # there is no riskmodel, exit early self.execute = lambda: None self.post_execute = lambda result: None return oq = self.oqparam if self.riskmodel.covs: epsilon_sampling = oq.epsilon_sampling else: epsilon_sampling = 1 # only one ignored epsilon correl_model = readinput.get_correl_model(oq) gsims_by_col = self.rlzs_assoc.get_gsims_by_col() assets_by_site = self.assets_by_site # the following is needed to set the asset idx attribute self.assetcol = riskinput.build_asset_collection( assets_by_site, oq.time_event) self.spec_indices = numpy.array( [a['asset_ref'] in oq.specific_assets for a in self.assetcol]) logging.info('Populating the risk inputs') rup_by_tag = sum(self.datastore['sescollection'], AccumDict()) all_ruptures = [rup_by_tag[tag] for tag in sorted(rup_by_tag)] for i, rup in enumerate(all_ruptures): rup.ordinal = i num_samples = min(len(all_ruptures), epsilon_sampling) eps_dict = riskinput.make_eps_dict(assets_by_site, num_samples, oq.master_seed, oq.asset_correlation) logging.info('Generated %d epsilons', num_samples * len(eps_dict)) self.epsilon_matrix = numpy.array( [eps_dict[a['asset_ref']] for a in self.assetcol]) self.riskinputs = list( self.riskmodel.build_inputs_from_ruptures( self.sitecol.complete, all_ruptures, gsims_by_col, oq.truncation_level, correl_model, eps_dict, oq.concurrent_tasks or 1)) logging.info('Built %d risk inputs', len(self.riskinputs)) # preparing empty datasets loss_types = self.riskmodel.loss_types self.L = len(loss_types) self.R = len(self.rlzs_assoc.realizations) self.outs = OUTPUTS self.datasets = {} self.monitor.oqparam = self.oqparam # ugly: attaching an attribute needed in the task function self.monitor.num_outputs = len(self.outs) # attaching two other attributes used in riskinput.gen_outputs self.monitor.assets_by_site = self.assets_by_site self.monitor.eps_dict = eps_dict self.monitor.num_assets = N = self.count_assets() for o, out in enumerate(self.outs): self.datastore.hdf5.create_group(out) for l, loss_type in enumerate(loss_types): cb = self.riskmodel.curve_builders[l] C = len(cb.ratios) # curve resolution for r, rlz in enumerate(self.rlzs_assoc.realizations): key = '/%s/%s' % (loss_type, rlz.uid) if o == AGGLOSS: # loss tables dset = self.datastore.create_dset(out + key, elt_dt) elif o == AVGLOSS: # average losses dset = self.datastore.create_dset( out + key, numpy.float32, (N, 2)) elif o == SPECLOSS: # specific losses dset = self.datastore.create_dset(out + key, ela_dt) else: # risk curves if not C: continue dset = self.datastore.create_dset( out + key, cb.lr_dt, N) self.datasets[o, l, r] = dset if o == RC and C: grp = self.datastore['%s/%s' % (out, loss_type)] grp.attrs['loss_ratios'] = cb.ratios def execute(self): """ Run the event_based_risk calculator and aggregate the results """ return apply_reduce( self.core_func.__func__, (self.riskinputs, self.riskmodel, self.rlzs_assoc, self.monitor), concurrent_tasks=self.oqparam.concurrent_tasks, agg=self.agg, acc=cube(self.monitor.num_outputs, self.L, self.R, list), weight=operator.attrgetter('weight'), key=operator.attrgetter('col_id')) def agg(self, acc, result): """ Aggregate list of arrays in longer lists. :param acc: accumulator array of shape (O, L, R) :param result: a numpy array of shape (O, L, R) """ for idx, arrays in numpy.ndenumerate(result): # TODO: special case for avg_losses, they can be summed directly if idx[0] == AVGLOSS: # arrays has only 1 element acc[idx] = [sum(acc[idx] + arrays)] else: acc[idx].extend(arrays) return acc def post_execute(self, result): """ Save the event loss table in the datastore. :param result: a numpy array of shape (O, L, R) containing lists of arrays """ ses_ratio = self.oqparam.ses_ratio saved = {out: 0 for out in self.outs} N = len(self.assetcol) with self.monitor('saving loss table', autoflush=True, measuremem=True): for (o, l, r), data in numpy.ndenumerate(result): if not data: # empty list continue cb = self.riskmodel.curve_builders[l] if o in (AGGLOSS, SPECLOSS): # data is a list of arrays losses = numpy.concatenate(data) self.datasets[o, l, r].extend(losses) saved[self.outs[o]] += losses.nbytes elif o == AVGLOSS: # average losses lt = self.riskmodel.loss_types[l] [avgloss] = data avglosses = numpy.array([ avgloss[i] * asset[lt] for i, asset in enumerate(self.assetcol) ], numpy.float32) self.datasets[o, l, r].dset[:] = avglosses saved[self.outs[o]] += avglosses.nbytes elif cb.user_provided: # risk curves # data is a list of dicts asset idx -> counts poes = cb.build_poes(N, data, ses_ratio) self.datasets[o, l, r] = poes saved[self.outs[o]] += poes.nbytes self.datastore.hdf5.flush() for out in self.outs: nbytes = saved[out] if nbytes: self.datastore[out].attrs['nbytes'] = nbytes logging.info('Saved %s in %s', humansize(nbytes), out) else: # remove empty outputs del self.datastore[out] if self.oqparam.specific_assets: self.build_specific_loss_curves( self.datastore['specific-losses-rlzs']) rlzs = self.rlzs_assoc.realizations if len(rlzs) > 1: self.compute_store_stats(rlzs, '') # generic self.compute_store_stats(rlzs, '_specific') # The following is commented on purpose: # if (self.oqparam.conditional_loss_poes and # 'rcurves-rlzs' in self.datastore): # self.build_loss_maps() def clean_up(self): """ Final checks and cleanup """ if (self.oqparam.ground_motion_fields and 'gmf_by_trt_gsim' not in self.datastore): logging.warn( 'Even if the flag `ground_motion_fields` was set the GMFs ' 'were not saved.\nYou should use the event_based hazard ' 'calculator to do that, not the risk one') super(EventBasedRiskCalculator, self).clean_up() def build_specific_loss_curves(self, group, kind='loss'): ses_ratio = self.oqparam.ses_ratio assetcol = self.assetcol[self.spec_indices] for loss_type, builder in zip(group, self.riskmodel.curve_builders): for rlz, dset in group[loss_type].items(): losses_by_aid = collections.defaultdict(list) for ela in dset.value: losses_by_aid[ela['ass_id']].append(ela[kind]) curves = builder.build_loss_curves(assetcol, losses_by_aid, ses_ratio) key = 'specific-loss_curves-rlzs/%s/%s' % (loss_type, rlz) self.datastore[key] = curves def build_loss_maps(self): """ Build loss maps from the loss curves """ oq = self.oqparam for loss_type, group in self.datastore['rcurves-rlzs'].items(): asset_values = self.assetcol[loss_type] ratios = group.attrs['loss_ratios'] for rlz, poe_matrix in group.items(): maps = scientific.calc_loss_maps(oq.conditional_loss_poes, asset_values, ratios, poe_matrix) key = 'lmaps-rlzs/%s/%s' % (loss_type, rlz) self.datastore[key] = maps # ################### methods to compute statistics #################### # def build_stats(self, builder): """ Compute all statistics for all assets starting from the stored loss curves. Yield a statistical output object for each loss type. """ if 'rcurves-rlzs' not in self.datastore: return [] stats = [] # NB: should we encounter memory issues in the future, the easy # solution is to split the assets in blocks and perform # the computation one block at the time assets = self.assetcol['asset_ref'] rlzs = self.rlzs_assoc.realizations for loss_type in self.riskmodel.loss_types: group = self.datastore['rcurves-rlzs/%s' % loss_type] asset_values = self.assetcol[loss_type] data = [] for rlz, dataset in zip(rlzs, group.values()): dkey = 'avg_losses-rlzs/%s/%s' % (loss_type, rlz.uid) average_losses = self.datastore[dkey].value ratios = group.attrs['loss_ratios'] lcs = [] for avalue, poes in zip(asset_values, dataset['poes']): lcs.append((avalue * ratios, poes)) losses_poes = numpy.array(lcs) # -> shape (N, 2, C) out = scientific.Output( assets, loss_type, rlz.ordinal, rlz.weight, loss_curves=losses_poes, insured_curves=None, average_losses=average_losses[:, 0], average_insured_losses=average_losses[:, 1]) data.append(out) stats.append(builder.build(data)) return stats # TODO: add a direct test def build_specific_stats(self, builder): """ Compute all statistics for the specified assets starting from the stored loss curves. Yield a statistical output object for each loss type. """ if not self.oqparam.specific_assets: return [] specific_assets = set(self.oqparam.specific_assets) assetcol = self.assetcol specific_ids = [] for i, a in enumerate(self.assetcol): if a['asset_ref'] in specific_assets: specific_ids.append(i) assets = assetcol['asset_ref'] rlzs = self.rlzs_assoc.realizations stats = [] for loss_type in self.riskmodel.loss_types: group = self.datastore['/specific-loss_curves-rlzs/%s' % loss_type] data = [] for rlz, dataset in zip(rlzs, group.values()): dkey = 'avg_losses-rlzs/%s/%s' % (loss_type, rlz.uid) average_losses = self.datastore[dkey][specific_ids] lcs = dataset.value losses_poes = numpy.array( # -> shape (N, 2, C) [lcs['losses'], lcs['poes']]).transpose(1, 0, 2) out = scientific.Output( assets, loss_type, rlz.ordinal, rlz.weight, loss_curves=losses_poes, insured_curves=None, average_losses=average_losses[:, 0], average_insured_losses=average_losses[:, 1]) data.append(out) stats.append(builder.build(data, prefix='specific-')) return stats def compute_store_stats(self, rlzs, kind): """ Compute and store the statistical outputs """ oq = self.oqparam N = (len(self.oqparam.specific_assets) if kind == '_specific' else len(self.assetcol)) Q = 1 + len(oq.quantile_loss_curves) C = oq.loss_curve_resolution # TODO: could be loss_type-dependent loss_curve_dt = numpy.dtype([('losses', (float, C)), ('poes', (float, C)), ('avg', float)]) if oq.conditional_loss_poes: lm_names = _loss_map_names(oq.conditional_loss_poes) loss_map_dt = numpy.dtype([(f, float) for f in lm_names]) loss_curve_stats = numpy.zeros((Q, N), loss_curve_dt) ins_curve_stats = numpy.zeros((Q, N), loss_curve_dt) if oq.conditional_loss_poes: loss_map_stats = numpy.zeros((Q, N), loss_map_dt) builder = scientific.StatsBuilder(oq.quantile_loss_curves, oq.conditional_loss_poes, [], scientific.normalize_curves_eb) build_stats = getattr(self, 'build%s_stats' % kind) all_stats = build_stats(builder) for stat in all_stats: # there is one stat for each loss_type curves, ins_curves, maps = scientific.get_stat_curves(stat) loss_curve_stats[:] = curves if oq.insured_losses: ins_curve_stats[:] = ins_curves if oq.conditional_loss_poes: loss_map_stats[:] = maps for i, path in enumerate(stat.paths): self._store(path % 'loss_curves', loss_curve_stats[i]) self._store(path % 'ins_curves', ins_curve_stats[i]) if oq.conditional_loss_poes: self._store(path % 'loss_maps', loss_map_stats[i]) stats = scientific.SimpleStats(rlzs, oq.quantile_loss_curves) stats.compute_and_store('avg_losses', self.datastore) def _store(self, path, curves): if curves.view(float).sum(): # there are some nonzero values self.datastore[path] = curves
class UCERFEventBasedRuptureCalculator(event_based.EventBasedRuptureCalculator ): """ Event based PSHA calculator generating the ruptures only """ core_task = compute_ruptures etags = datastore.persistent_attribute('etags') is_stochastic = True def pre_execute(self): """ parse the logic tree and source model input """ self.sitecol = readinput.get_site_collection(self.oqparam) self.save_mesh() self.gsim_lt = readinput.get_gsim_lt(self.oqparam, [DEFAULT_TRT]) self.smlt = readinput.get_source_model_lt(self.oqparam) parser = source.SourceModelParser( UCERFSourceConverter(self.oqparam.investigation_time, self.oqparam.rupture_mesh_spacing)) [self.source ] = parser.parse_sources(self.oqparam.inputs["source_model"]) branches = sorted(self.smlt.branches.items()) min_mag, max_mag = self.source.min_mag, None source_models = [] num_gsim_paths = self.gsim_lt.get_num_paths() for ordinal, (name, branch) in enumerate(branches): tm = source.TrtModel(DEFAULT_TRT, [], min_mag, max_mag, ordinal, eff_ruptures=-1) sm = source.SourceModel(name, branch.weight, [name], [tm], num_gsim_paths, ordinal, 1) source_models.append(sm) self.csm = source.CompositeSourceModel(self.gsim_lt, self.smlt, source_models, set_weight=False) self.rup_data = {} self.infos = [] def execute(self): """ Run the ucerf rupture calculation """ id_set = [(key, self.smlt.branches[key].value, self.smlt.branches[key].weight) for key in self.smlt.branches] ruptures_by_trt_id = parallel.apply_reduce( compute_ruptures, (id_set, self.source, self.sitecol, self.oqparam, self.monitor), concurrent_tasks=self.oqparam.concurrent_tasks, agg=self.agg) self.rlzs_assoc = self.csm.info.get_rlzs_assoc( functools.partial(self.count_eff_ruptures, ruptures_by_trt_id)) self.datastore['csm_info'] = self.csm.info self.datastore['source_info'] = numpy.array(self.infos, source.source_info_dt) return ruptures_by_trt_id def agg(self, acc, val): """ Aggregated the ruptures and the calculation times """ for trt_id in val: ltbrid, dt = val.calc_times[trt_id] info = source.SourceInfo( trt_id, ltbrid, source_class=UCERFSESControl.__class__.__name__, weight=1, sources=1, filter_time=0, split_time=0, calc_time=dt) self.infos.append(info) return acc + val
class RiskCalculator(HazardCalculator): """ Base class for all risk calculators. A risk calculator must set the attributes .riskmodel, .sitecol, .assets_by_site, .exposure .riskinputs in the pre_execute phase. """ riskmodel = datastore.persistent_attribute('riskmodel') specific_assets = datastore.persistent_attribute('specific_assets') def make_eps_dict(self, num_ruptures): """ :param num_ruptures: the size of the epsilon array for each asset """ oq = self.oqparam with self.monitor('building epsilons', autoflush=True): eps = riskinput.make_eps_dict(self.assets_by_site, num_ruptures, oq.master_seed, oq.asset_correlation) return eps def build_riskinputs(self, hazards_by_key, eps_dict=None): """ :param hazards_by_key: a dictionary key -> IMT -> array of length num_sites :returns: a list of RiskInputs objects, sorted by IMT. """ imtls = self.oqparam.imtls with self.monitor('building riskinputs', autoflush=True): riskinputs = [] idx_weight_pairs = [(i, len(assets)) for i, assets in enumerate(self.assets_by_site) ] blocks = general.split_in_blocks(idx_weight_pairs, self.oqparam.concurrent_tasks or 1, weight=operator.itemgetter(1)) for block in blocks: indices = numpy.array([idx for idx, _weight in block]) reduced_assets = self.assets_by_site[indices] reduced_eps = {} # for the assets belonging to the indices if eps_dict: for assets in reduced_assets: for asset in assets: reduced_eps[asset.id] = eps_dict[asset.id] # collect the hazards by key into hazards by imt hdata = collections.defaultdict(lambda: [{} for _ in indices]) for key, hazards_by_imt in hazards_by_key.items(): for imt in imtls: hazards_by_site = hazards_by_imt[imt] for i, haz in enumerate(hazards_by_site[indices]): hdata[imt][i][key] = haz # build the riskinputs for imt in hdata: ri = self.riskmodel.build_input(imt, hdata[imt], reduced_assets, reduced_eps) if ri.weight > 0: riskinputs.append(ri) logging.info('Built %d risk inputs', len(riskinputs)) return sorted(riskinputs, key=self.riskinput_key) def riskinput_key(self, ri): """ :param ri: riskinput object :returns: the IMT associated to it """ return ri.imt def pre_execute(self): """ Set the attributes .riskmodel, .sitecol, .assets_by_site """ HazardCalculator.pre_execute(self) self.riskmodel = readinput.get_risk_model(self.oqparam) if hasattr(self, 'exposure'): missing = self.exposure.taxonomies - set( self.riskmodel.get_taxonomies()) if missing: raise RuntimeError('The exposure contains the taxonomies %s ' 'which are not in the risk model' % missing) def execute(self): """ Parallelize on the riskinputs and returns a dictionary of results. Require a `.core_func` to be defined with signature (riskinputs, riskmodel, rlzs_assoc, monitor). """ # add fatalities as side effect riskinput.build_asset_collection(self.assets_by_site, self.oqparam.time_event) self.monitor.oqparam = self.oqparam if self.pre_calculator == 'event_based_rupture': self.monitor.assets_by_site = self.assets_by_site self.monitor.num_assets = self.count_assets() res = apply_reduce( self.core_func.__func__, (self.riskinputs, self.riskmodel, self.rlzs_assoc, self.monitor), concurrent_tasks=self.oqparam.concurrent_tasks, weight=get_weight, key=self.riskinput_key) return res
class BaseCalculator(with_metaclass(abc.ABCMeta)): """ Abstract base class for all calculators. :param oqparam: OqParam object :param monitor: monitor object :param calc_id: numeric calculation ID """ sitemesh = datastore.persistent_attribute('sitemesh') sitecol = datastore.persistent_attribute('sitecol') rlzs_assoc = datastore.persistent_attribute('rlzs_assoc') realizations = datastore.persistent_attribute('realizations') assets_by_site = datastore.persistent_attribute('assets_by_site') assetcol = datastore.persistent_attribute('assetcol') cost_types = datastore.persistent_attribute('cost_types') taxonomies = datastore.persistent_attribute('taxonomies') job_info = datastore.persistent_attribute('job_info') source_chunks = datastore.persistent_attribute('source_chunks') performance = datastore.persistent_attribute('performance') csm = datastore.persistent_attribute('composite_source_model') pre_calculator = None # to be overridden is_stochastic = False # True for scenario and event based calculators def __init__(self, oqparam, monitor=DummyMonitor(), calc_id=None): self.monitor = monitor self.datastore = datastore.DataStore(calc_id) self.monitor.hdf5path = self.datastore.hdf5path self.datastore.export_dir = oqparam.export_dir self.oqparam = oqparam def save_params(self, **kw): """ Update the current calculation parameters """ vars(self.oqparam).update(kw) for name, val in self.oqparam.to_params(): self.datastore.attrs[name] = val self.datastore.attrs['oqlite_version'] = repr(__version__) self.datastore.hdf5.flush() def set_log_format(self): """Set the format of the root logger""" fmt = '[%(asctime)s #{} %(levelname)s] %(message)s'.format( self.datastore.calc_id) for handler in logging.root.handlers: handler.setFormatter(logging.Formatter(fmt)) def run(self, pre_execute=True, concurrent_tasks=None, **kw): """ Run the calculation and return the exported outputs. """ self.set_log_format() if (concurrent_tasks is not None and concurrent_tasks != OqParam.concurrent_tasks.default): self.oqparam.concurrent_tasks = concurrent_tasks self.save_params(**kw) exported = {} try: if pre_execute: self.pre_execute() result = self.execute() self.post_execute(result) exported = self.export(kw.get('exports', '')) except KeyboardInterrupt: pids = ' '.join(str(p.pid) for p in executor._processes) sys.stderr.write( 'You can manually kill the workers with kill %s\n' % pids) raise except: if kw.get('pdb'): # post-mortem debug tb = sys.exc_info()[2] traceback.print_exc(tb) pdb.post_mortem(tb) else: logging.critical('', exc_info=True) raise self.clean_up() return exported def core_func(*args): """ Core routine running on the workers. """ raise NotImplementedError @abc.abstractmethod def pre_execute(self): """ Initialization phase. """ @abc.abstractmethod def execute(self): """ Execution phase. Usually will run in parallel the core function and return a dictionary with the results. """ @abc.abstractmethod def post_execute(self, result): """ Post-processing phase of the aggregated output. It must be overridden with the export code. It will return a dictionary of output files. """ def export(self, exports=None): """ Export all the outputs in the datastore in the given export formats. :returns: dictionary output_key -> sorted list of exported paths """ # avoid circular imports from openquake.commonlib.export import export as exp exported = {} individual_curves = self.oqparam.individual_curves if exports and isinstance(exports, tuple): fmts = exports elif exports: # is a string fmts = exports.split(',') else: # use passed values fmts = self.oqparam.exports for fmt in fmts: if not fmt: continue for key in self.datastore: # top level keys if 'rlzs' in key and not individual_curves: continue # skip individual curves ekey = (key, fmt) if ekey not in exp: # non-exportable output continue exported[ekey] = exp(ekey, self.datastore) logging.info('exported %s: %s', key, exported[ekey]) return exported def clean_up(self): """ Collect the realizations and the monitoring information, then close the datastore. """ if 'hcurves' in self.datastore: _set_nbytes('hcurves', self.datastore) if 'hmaps' in self.datastore: _set_nbytes('hmaps', self.datastore) if 'rlzs_assoc' in self.datastore: rlzs = self.rlzs_assoc.realizations self.realizations = numpy.array([(r.uid, r.weight) for r in rlzs], rlz_dt)
class ClassicalCalculator(base.HazardCalculator): """ Classical PSHA calculator """ core_func = classical source_info = datastore.persistent_attribute('source_info') def execute(self): """ Run in parallel `core_func(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ monitor = self.monitor(self.core_func.__name__) monitor.oqparam = self.oqparam sources = self.csm.get_sources() zc = zero_curves(len(self.sitecol.complete), self.oqparam.imtls) zerodict = AccumDict((key, zc) for key in self.rlzs_assoc) zerodict['calc_times'] = [] gsims_assoc = self.rlzs_assoc.gsims_by_trt_id curves_by_trt_gsim = parallel.apply_reduce( self.core_func.__func__, (sources, self.sitecol, gsims_assoc, monitor), agg=agg_dicts, acc=zerodict, concurrent_tasks=self.oqparam.concurrent_tasks, weight=operator.attrgetter('weight'), key=operator.attrgetter('trt_model_id')) if self.persistent: store_source_chunks(self.datastore) return curves_by_trt_gsim def post_execute(self, curves_by_trt_gsim): """ Collect the hazard curves by realization and export them. :param curves_by_trt_gsim: a dictionary (trt_id, gsim) -> hazard curves """ # save calculation time per source try: calc_times = curves_by_trt_gsim.pop('calc_times') except KeyError: pass else: sources = self.csm.get_sources() info = [] for i, dt in calc_times: src = sources[i] info.append((src.trt_model_id, src.source_id, dt)) info.sort(key=operator.itemgetter(2), reverse=True) self.source_info = numpy.array(info, source_info_dt) # save curves_by_trt_gsim for sm in self.rlzs_assoc.csm_info.source_models: group = self.datastore.hdf5.create_group( 'curves_by_sm/' + '_'.join(sm.path)) group.attrs['source_model'] = sm.name for tm in sm.trt_models: for gsim in tm.gsims: try: curves = curves_by_trt_gsim[tm.id, gsim] except KeyError: # no data for the trt_model pass else: ts = '%03d-%s' % (tm.id, gsim) group[ts] = curves group[ts].attrs['trt'] = tm.trt oq = self.oqparam zc = zero_curves(len(self.sitecol.complete), oq.imtls) curves_by_rlz = self.rlzs_assoc.combine_curves( curves_by_trt_gsim, agg_curves, zc) rlzs = self.rlzs_assoc.realizations nsites = len(self.sitecol) if oq.individual_curves: for rlz, curves in curves_by_rlz.items(): self.store_curves('rlz-%03d' % rlz.ordinal, curves, rlz) if len(rlzs) == 1: # cannot compute statistics [self.mean_curves] = curves_by_rlz.values() return weights = (None if oq.number_of_logic_tree_samples else [rlz.weight for rlz in rlzs]) mean = oq.mean_hazard_curves if mean: self.mean_curves = numpy.array(zc) for imt in oq.imtls: self.mean_curves[imt] = scientific.mean_curve( [curves_by_rlz[rlz][imt] for rlz in rlzs], weights) self.quantile = {} for q in oq.quantile_hazard_curves: self.quantile[q] = qc = numpy.array(zc) for imt in oq.imtls: curves = [curves_by_rlz[rlz][imt] for rlz in rlzs] qc[imt] = scientific.quantile_curve( curves, q, weights).reshape((nsites, -1)) if mean: self.store_curves('mean', self.mean_curves) for q in self.quantile: self.store_curves('quantile-%s' % q, self.quantile[q]) def hazard_maps(self, curves): """ Compute the hazard maps associated to the curves """ n, p = len(self.sitecol), len(self.oqparam.poes) maps = zero_maps((n, p), self.oqparam.imtls) for imt in curves.dtype.fields: maps[imt] = calc.compute_hazard_maps( curves[imt], self.oqparam.imtls[imt], self.oqparam.poes) return maps def store_curves(self, kind, curves, rlz=None): """ Store all kind of curves, optionally computing maps and uhs curves. :param kind: the kind of curves to store :param curves: an array of N curves to store :param rlz: hazard realization, if any """ if not self.persistent: # do nothing return oq = self.oqparam self._store('hcurves/' + kind, curves, rlz) if oq.hazard_maps or oq.uniform_hazard_spectra: # hmaps is a composite array of shape (N, P) hmaps = self.hazard_maps(curves) if oq.hazard_maps: self._store('hmaps/' + kind, hmaps, rlz, poes=oq.poes) if oq.uniform_hazard_spectra: # uhs is an array of shape (N, I, P) self._store('uhs/' + kind, calc.make_uhs(hmaps), rlz, poes=oq.poes) def _store(self, name, curves, rlz, **kw): self.datastore.hdf5[name] = curves dset = self.datastore.hdf5[name] if rlz is not None: dset.attrs['uid'] = rlz.uid for k, v in kw.items(): dset.attrs[k] = v
class ScenarioCalculator(base.HazardCalculator): """ Scenario hazard calculator """ core_func = calc_gmfs tags = datastore.persistent_attribute('tags') sescollection = datastore.persistent_attribute('sescollection') is_stochastic = True def pre_execute(self): """ Read the site collection and initialize GmfComputer, tags and seeds """ super(ScenarioCalculator, self).pre_execute() trunc_level = self.oqparam.truncation_level correl_model = readinput.get_correl_model(self.oqparam) n_gmfs = self.oqparam.number_of_ground_motion_fields rupture = readinput.get_rupture(self.oqparam) self.gsims = readinput.get_gsims(self.oqparam) self.rlzs_assoc = readinput.get_rlzs_assoc(self.oqparam) # filter the sites self.sitecol = filters.filter_sites_by_distance_to_rupture( rupture, self.oqparam.maximum_distance, self.sitecol) if self.sitecol is None: raise RuntimeError('All sites were filtered out! ' 'maximum_distance=%s km' % self.oqparam.maximum_distance) self.tags = numpy.array( sorted(['scenario-%010d' % i for i in range(n_gmfs)]), (bytes, 100)) self.computer = GmfComputer(rupture, self.sitecol, self.oqparam.imtls, self.gsims, trunc_level, correl_model) rnd = random.Random(self.oqparam.random_seed) self.tag_seed_pairs = [(tag, rnd.randint(0, calc.MAX_INT)) for tag in self.tags] self.sescollection = [{ tag: Rupture(tag, seed, rupture) for tag, seed in self.tag_seed_pairs }] def execute(self): """ Compute the GMFs in parallel and return a dictionary gmf_by_tag """ logging.info('Computing the GMFs') args = (self.tag_seed_pairs, self.computer, self.monitor('calc_gmfs')) gmf_by_tag = parallel.apply_reduce( self.core_func.__func__, args, concurrent_tasks=self.oqparam.concurrent_tasks) return gmf_by_tag def post_execute(self, gmf_by_tag): """ :param gmf_by_tag: a dictionary tag -> gmf """ data = [] for ordinal, tag in enumerate(sorted(gmf_by_tag)): gmf = gmf_by_tag[tag] gmf['idx'] = ordinal data.append(gmf) gmfa = numpy.concatenate(data) self.datastore['gmfs/col00'] = gmfa self.datastore['gmfs'].attrs['nbytes'] = gmfa.nbytes
class EventBasedRuptureCalculator(ClassicalCalculator): """ Event based PSHA calculator generating the ruptures only """ core_task = compute_ruptures etags = datastore.persistent_attribute('etags') is_stochastic = True def init(self): """ Set the random seed passed to the SourceManager and the minimum_intensity dictionary. """ oq = self.oqparam self.random_seed = oq.random_seed self.rlzs_assoc = self.datastore['csm_info'].get_rlzs_assoc() self.min_iml = fix_minimum_intensity(oq.minimum_intensity, oq.imtls) self.rup_data = {} def count_eff_ruptures(self, ruptures_by_trt_id, trt_model): """ Returns the number of ruptures sampled in the given trt_model. :param ruptures_by_trt_id: a dictionary with key trt_id :param trt_model: a TrtModel instance """ return sum( len(ruptures) for trt_id, ruptures in ruptures_by_trt_id.items() if trt_model.id == trt_id) def zerodict(self): """ Initial accumulator, a dictionary (trt_id, gsim) -> curves """ zd = AccumDict() zd.calc_times = [] zd.eff_ruptures = AccumDict() return zd def agg_dicts(self, acc, ruptures_by_trt_id): """ Aggregate dictionaries of hazard curves by updating the accumulator. :param acc: accumulator dictionary :param ruptures_by_trt_id: a nested dictionary trt_id -> ProbabilityMap """ with self.monitor('aggregate curves', autoflush=True): if hasattr(ruptures_by_trt_id, 'calc_times'): acc.calc_times.extend(ruptures_by_trt_id.calc_times) if hasattr(ruptures_by_trt_id, 'eff_ruptures'): acc.eff_ruptures += ruptures_by_trt_id.eff_ruptures acc += ruptures_by_trt_id if len(ruptures_by_trt_id): trt = ruptures_by_trt_id.trt try: dset = self.rup_data[trt] except KeyError: dset = self.rup_data[trt] = self.datastore.create_dset( 'rup_data/' + trt, ruptures_by_trt_id.rup_data.dtype) dset.extend(ruptures_by_trt_id.rup_data) self.datastore.flush() return acc def post_execute(self, result): """ Save the SES collection """ with self.monitor('saving ruptures', autoflush=True): # ordering ruptures sescollection = [] for trt_id in result: for ebr in result[trt_id]: sescollection.append(ebr) sescollection.sort(key=operator.attrgetter('serial')) etags = numpy.concatenate([ebr.etags for ebr in sescollection]) self.etags = numpy.array(etags, (bytes, 100)) nr = len(sescollection) logging.info('Saving SES collection with %d ruptures, %d events', nr, len(etags)) eid = 0 for ebr in sescollection: eids = [] for event in ebr.events: event['eid'] = eid eids.append(eid) eid += 1 self.datastore['sescollection/%s' % ebr.serial] = ebr self.datastore.set_nbytes('sescollection') for dset in self.rup_data.values(): if len(dset.dset): numsites = dset.dset['numsites'] multiplicity = dset.dset['multiplicity'] spr = numpy.average(numsites, weights=multiplicity) mul = numpy.average(multiplicity, weights=numsites) self.datastore.set_attrs(dset.name, sites_per_rupture=spr, multiplicity=mul) if self.rup_data: self.datastore.set_nbytes('rup_data')
class ClassicalCalculator(base.HazardCalculator): """ Classical PSHA calculator """ core_func = classical source_info = datastore.persistent_attribute('source_info') def agg_dicts(self, acc, val): """ Aggregate dictionaries of hazard curves by updating the accumulator. :param acc: accumulator dictionary :param val: a dictionary of hazard curves, keyed by (trt_id, gsim) """ with self.monitor('aggregate curves', autoflush=True): if hasattr(val, 'calc_times'): acc.calc_times.extend(val.calc_times) for bb in getattr(val, 'bbs', []): acc.bb_dict[bb.lt_model_id, bb.site_id].update_bb(bb) if hasattr(acc, 'n'): # tiling calculator for key in val: acc[key] = agg_curves( acc[key], expand(val[key], acc.n, val.siteslice)) else: # classical, event_based for key in val: acc[key] = agg_curves(acc[key], val[key]) return acc def execute(self): """ Run in parallel `core_func(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ monitor = self.monitor.new(self.core_func.__name__) monitor.oqparam = self.oqparam sources = self.csm.get_sources() zc = zero_curves(len(self.sitecol.complete), self.oqparam.imtls) zerodict = AccumDict((key, zc) for key in self.rlzs_assoc) zerodict.calc_times = [] zerodict.bb_dict = { (smodel.ordinal, site.id): BoundingBox(smodel.ordinal, site.id) for site in self.sitecol for smodel in self.csm.source_models } if self.oqparam.poes_disagg else {} curves_by_trt_gsim = parallel.apply_reduce( self.core_func.__func__, (sources, self.sitecol, 0, self.rlzs_assoc, monitor), agg=self.agg_dicts, acc=zerodict, concurrent_tasks=self.oqparam.concurrent_tasks, weight=operator.attrgetter('weight'), key=operator.attrgetter('trt_model_id')) store_source_chunks(self.datastore) return curves_by_trt_gsim def post_execute(self, curves_by_trt_gsim): """ Collect the hazard curves by realization and export them. :param curves_by_trt_gsim: a dictionary (trt_id, gsim) -> hazard curves """ # save calculation time per source calc_times = getattr(curves_by_trt_gsim, 'calc_times', []) sources = self.csm.get_sources() infodict = collections.defaultdict(float) weight = {} for src_idx, dt in calc_times: src = sources[src_idx] weight[src.trt_model_id, src.source_id] = src.weight infodict[src.trt_model_id, src.source_id] += dt infolist = [key + (dt, weight[key]) for key, dt in infodict.items()] infolist.sort(key=operator.itemgetter(1), reverse=True) if infolist: self.source_info = numpy.array(infolist, source_info_dt) with self.monitor('save curves_by_trt_gsim', autoflush=True): for sm in self.rlzs_assoc.csm_info.source_models: group = self.datastore.hdf5.create_group( 'curves_by_sm/' + '_'.join(sm.path)) group.attrs['source_model'] = sm.name for tm in sm.trt_models: for i, gsim in enumerate(tm.gsims): try: curves = curves_by_trt_gsim[tm.id, gsim] except KeyError: # no data for the trt_model pass else: ts = '%03d-%d' % (tm.id, i) if nonzero(curves): group[ts] = curves group[ts].attrs['trt'] = tm.trt group[ts].attrs['nbytes'] = curves.nbytes group[ts].attrs['gsim'] = str(gsim) self.datastore.set_nbytes(group.name) self.datastore.set_nbytes('curves_by_sm') oq = self.oqparam with self.monitor('combine and save curves_by_rlz', autoflush=True): zc = zero_curves(len(self.sitecol.complete), oq.imtls) curves_by_rlz = self.rlzs_assoc.combine_curves( curves_by_trt_gsim, agg_curves, zc) rlzs = self.rlzs_assoc.realizations nsites = len(self.sitecol) if oq.individual_curves: for rlz, curves in curves_by_rlz.items(): self.store_curves('rlz-%03d' % rlz.ordinal, curves, rlz) if len(rlzs) == 1: # cannot compute statistics [self.mean_curves] = curves_by_rlz.values() return with self.monitor('compute and save statistics', autoflush=True): weights = (None if oq.number_of_logic_tree_samples else [rlz.weight for rlz in rlzs]) mean = oq.mean_hazard_curves if mean: self.mean_curves = numpy.array(zc) for imt in oq.imtls: self.mean_curves[imt] = scientific.mean_curve( [curves_by_rlz[rlz][imt] for rlz in rlzs], weights) self.quantile = {} for q in oq.quantile_hazard_curves: self.quantile[q] = qc = numpy.array(zc) for imt in oq.imtls: curves = [curves_by_rlz[rlz][imt] for rlz in rlzs] qc[imt] = scientific.quantile_curve( curves, q, weights).reshape((nsites, -1)) if mean: self.store_curves('mean', self.mean_curves) for q in self.quantile: self.store_curves('quantile-%s' % q, self.quantile[q]) def hazard_maps(self, curves): """ Compute the hazard maps associated to the curves """ maps = zero_maps( len(self.sitecol), self.oqparam.imtls, self.oqparam.poes) for imt in curves.dtype.fields: # build a matrix of size (N, P) data = calc.compute_hazard_maps( curves[imt], self.oqparam.imtls[imt], self.oqparam.poes) for poe, hmap in zip(self.oqparam.poes, data.T): maps['%s~%s' % (imt, poe)] = hmap return maps def store_curves(self, kind, curves, rlz=None): """ Store all kind of curves, optionally computing maps and uhs curves. :param kind: the kind of curves to store :param curves: an array of N curves to store :param rlz: hazard realization, if any """ oq = self.oqparam self._store('hcurves/' + kind, curves, rlz, nbytes=curves.nbytes) if oq.hazard_maps or oq.uniform_hazard_spectra: # hmaps is a composite array of shape (N, P) hmaps = self.hazard_maps(curves) if oq.hazard_maps: self._store('hmaps/' + kind, hmaps, rlz, poes=oq.poes, nbytes=hmaps.nbytes) def _store(self, name, curves, rlz, **kw): self.datastore.hdf5[name] = curves dset = self.datastore.hdf5[name] if rlz is not None: dset.attrs['uid'] = rlz.uid for k, v in kw.items(): dset.attrs[k] = v
class HazardCalculator(BaseCalculator): """ Base class for hazard calculators based on source models """ riskmodel = datastore.persistent_attribute('riskmodel') mean_curves = None # to be overridden SourceProcessor = source.SourceFilterSplitter def assoc_assets_sites(self, sitecol): """ :param sitecol: a sequence of sites :returns: a pair (filtered_sites, assets_by_site) The new site collection is different from the original one if some assets were discarded or if there were missing assets for some sites. """ maximum_distance = self.oqparam.asset_hazard_distance siteobjects = geodetic.GeographicObjects( Site(sid, lon, lat) for sid, lon, lat in zip(sitecol.sids, sitecol.lons, sitecol.lats)) assets_by_sid = general.AccumDict() for assets in self.assets_by_site: if len(assets): lon, lat = assets[0].location site, _ = siteobjects.get_closest(lon, lat, maximum_distance) if site: assets_by_sid += {site.sid: list(assets)} if not assets_by_sid: raise AssetSiteAssociationError( 'Could not associate any site to any assets within the ' 'maximum distance of %s km' % maximum_distance) mask = numpy.array([sid in assets_by_sid for sid in sitecol.sids]) assets_by_site = [assets_by_sid.get(sid, []) for sid in sitecol.sids] return sitecol.filter(mask), numpy.array(assets_by_site) def count_assets(self): """ Count how many assets are taken into consideration by the calculator """ return sum(len(assets) for assets in self.assets_by_site) def pre_execute(self): """ Check if there is a pre_calculator or a previous calculation ID. If yes, read the inputs by invoking the precalculator or by retrieving the previous calculation; if not, read the inputs directly. """ if self.pre_calculator is not None: # the parameter hazard_calculation_id is only meaningful if # there is a precalculator precalc_id = self.oqparam.hazard_calculation_id if precalc_id is None: # recompute everything precalc = calculators[self.pre_calculator]( self.oqparam, self.monitor('precalculator'), self.datastore.calc_id) precalc.run() if 'scenario' not in self.oqparam.calculation_mode: self.csm = precalc.csm else: # read previously computed data parent = datastore.DataStore(precalc_id) self.datastore.set_parent(parent) # update oqparam with the attributes saved in the datastore self.oqparam = OqParam.from_(self.datastore.attrs) self.read_risk_data() else: # we are in a basic calculator self.read_risk_data() self.read_sources() self.datastore.hdf5.flush() def read_exposure(self): """ Read the exposure, the riskmodel and update the attributes .exposure, .sitecol, .assets_by_site, .cost_types, .taxonomies. """ logging.info('Reading the exposure') with self.monitor('reading exposure', autoflush=True): self.exposure = readinput.get_exposure(self.oqparam) self.sitecol, self.assets_by_site = (readinput.get_sitecol_assets( self.oqparam, self.exposure)) if len(self.exposure.cost_types): self.cost_types = self.exposure.cost_types self.taxonomies = numpy.array(sorted(self.exposure.taxonomies), '|S100') self.datastore['time_events'] = sorted(self.exposure.time_events) def load_riskmodel(self): """ Read the risk model and set the attribute .riskmodel. The riskmodel can be empty for hazard calculations. Save the loss ratios (if any) in the datastore. """ rmdict = riskmodels.get_risk_models(self.oqparam) self.oqparam.set_risk_imtls(rmdict) # save risk_imtls in the datastore: this is crucial self.datastore.hdf5.attrs['risk_imtls'] = repr(self.oqparam.risk_imtls) self.riskmodel = rm = readinput.get_risk_model(self.oqparam, rmdict) if 'taxonomies' in self.datastore: # check that we are covering all the taxonomies in the exposure missing = set(self.taxonomies) - set(rm.taxonomies) if rm and missing: raise RuntimeError('The exposure contains the taxonomies %s ' 'which are not in the risk model' % missing) # save the loss ratios in the datastore pairs = [(cb.loss_type, (numpy.float64, len(cb.ratios))) for cb in rm.curve_builders if cb.user_provided] if not pairs: return loss_ratios = numpy.zeros(len(rm), numpy.dtype(pairs)) for cb in rm.curve_builders: if cb.user_provided: loss_ratios_lt = loss_ratios[cb.loss_type] for i, imt_taxo in enumerate(sorted(rm)): loss_ratios_lt[i] = rm[imt_taxo].loss_ratios[cb.loss_type] self.datastore['loss_ratios'] = loss_ratios self.datastore['loss_ratios'].attrs['imt_taxos'] = sorted(rm) self.datastore['loss_ratios'].attrs['nbytes'] = loss_ratios.nbytes def read_risk_data(self): """ Read the exposure (if any), the risk model (if any) and then the site collection, possibly extracted from the exposure. """ logging.info('Reading the site collection') with self.monitor('reading site collection', autoflush=True): haz_sitecol = readinput.get_site_collection(self.oqparam) inputs = self.oqparam.inputs if 'exposure' in inputs: self.read_exposure() self.load_riskmodel() # must be called *after* read_exposure num_assets = self.count_assets() if self.datastore.parent: haz_sitecol = self.datastore.parent['sitecol'] if haz_sitecol is not None and haz_sitecol != self.sitecol: with self.monitor('assoc_assets_sites'): self.sitecol, self.assets_by_site = \ self.assoc_assets_sites(haz_sitecol.complete) ok_assets = self.count_assets() num_sites = len(self.sitecol) logging.warn('Associated %d assets to %d sites, %d discarded', ok_assets, num_sites, num_assets - ok_assets) elif (self.datastore.parent and 'exposure' in OqParam.from_( self.datastore.parent.attrs).inputs): logging.info('Re-using the already imported exposure') self.load_riskmodel() else: # no exposure self.load_riskmodel() self.sitecol = haz_sitecol # save mesh and asset collection self.save_mesh() if hasattr(self, 'assets_by_site'): self.assetcol = riskinput.build_asset_collection( self.assets_by_site, self.oqparam.time_event) spec = set(self.oqparam.specific_assets) unknown = spec - set(self.assetcol['asset_ref']) if unknown: raise ValueError('The specific asset(s) %s are not in the ' 'exposure' % ', '.join(unknown)) def save_mesh(self): """ Save the mesh associated to the complete sitecol in the HDF5 file """ if ('sitemesh' not in self.datastore and 'sitemesh' not in self.datastore.parent): col = self.sitecol.complete mesh_dt = numpy.dtype([('lon', F32), ('lat', F32)]) self.sitemesh = numpy.array(list(zip(col.lons, col.lats)), mesh_dt) def read_sources(self): """ Read the composite source model (if any). This method must be called after read_risk_data, to be able to filter to sources according to the site collection. """ if 'source' in self.oqparam.inputs: logging.info('Reading the composite source model') with self.monitor('reading composite source model', autoflush=True): self.csm = readinput.get_composite_source_model( self.oqparam, self.sitecol, self.SourceProcessor, self.monitor, dstore=self.datastore) # we could manage limits here self.job_info = readinput.get_job_info(self.oqparam, self.csm, self.sitecol) self.rlzs_assoc = self.csm.get_rlzs_assoc() logging.info('Total weight of the sources=%s', self.job_info['input_weight']) logging.info('Expected output size=%s', self.job_info['output_weight']) def post_process(self): """For compatibility with the engine"""
class PSHACalculator(base.HazardCalculator): """ Classical PSHA calculator """ core_task = classical source_info = datastore.persistent_attribute('source_info') def agg_dicts(self, acc, pmap): """ Aggregate dictionaries of hazard curves by updating the accumulator. :param acc: accumulator dictionary :param pmap: a ProbabilityMap """ with self.monitor('aggregate curves', autoflush=True): for src_id, nsites, calc_time in pmap.calc_times: src_id = src_id.split(':', 1)[0] info = self.csm.infos[pmap.grp_id, src_id] info.calc_time += calc_time info.num_sites = max(info.num_sites, nsites) info.num_split += 1 acc.eff_ruptures += pmap.eff_ruptures for bb in getattr(pmap, 'bbs', []): # for disaggregation acc.bb_dict[bb.lt_model_id, bb.site_id].update_bb(bb) acc[pmap.grp_id] |= pmap self.datastore.flush() return acc def count_eff_ruptures(self, result_dict, src_group): """ Returns the number of ruptures in the src_group (after filtering) or 0 if the src_group has been filtered away. :param result_dict: a dictionary with keys (grp_id, gsim) :param src_group: a SourceGroup instance """ return result_dict.eff_ruptures.get(src_group.id, 0) def zerodict(self): """ Initial accumulator, a dict grp_id -> ProbabilityMap(L, G) """ zd = AccumDict() num_levels = len(self.oqparam.imtls.array) for grp in self.csm.src_groups: num_gsims = len(self.rlzs_assoc.gsims_by_grp_id[grp.id]) zd[grp.id] = ProbabilityMap(num_levels, num_gsims) zd.calc_times = [] zd.eff_ruptures = AccumDict() # grp_id -> eff_ruptures zd.bb_dict = BBdict() if self.oqparam.poes_disagg or self.oqparam.iml_disagg: for sid in self.sitecol.sids: for smodel in self.csm.source_models: zd.bb_dict[smodel.ordinal, sid] = BoundingBox(smodel.ordinal, sid) return zd def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ oq = self.oqparam monitor = self.monitor(self.core_task.__name__, truncation_level=oq.truncation_level, imtls=oq.imtls, maximum_distance=oq.maximum_distance, disagg=oq.poes_disagg or oq.iml_disagg) with self.monitor('managing sources', autoflush=True): allargs = self.gen_args(self.csm, monitor) iterargs = saving_sources_by_task(allargs, self.datastore) if isinstance(allargs, list): # there is a trick here: if the arguments are known # (a list, not an iterator), keep them as a list # then the Starmap will understand the case of a single # argument tuple and it will run in core the task iterargs = list(iterargs) ires = parallel.Starmap(self.core_task.__func__, iterargs).submit_all() acc = ires.reduce(self.agg_dicts, self.zerodict()) with self.monitor('store source_info', autoflush=True): self.store_source_info(self.csm.infos, acc) return acc def gen_args(self, csm, monitor): """ Used in the case of large source model logic trees. :param csm: a CompositeSourceModel instance :param monitor: a :class:`openquake.baselib.performance.Monitor` :yields: (sources, sites, gsims, monitor) tuples """ oq = self.oqparam maxweight = self.csm.get_maxweight(oq.concurrent_tasks) logging.info('Using a maxweight of %d', maxweight) ngroups = sum(len(sm.src_groups) for sm in csm.source_models) for sm in csm.source_models: for sg in sm.src_groups: logging.info('Sending source group #%d of %d (%s, %d sources)', sg.id + 1, ngroups, sg.trt, len(sg.sources)) gsims = self.rlzs_assoc.gsims_by_grp_id[sg.id] if oq.poes_disagg or oq.iml_disagg: # only for disaggregation monitor.sm_id = self.rlzs_assoc.sm_ids[sg.id] param = dict( samples=sm.samples, seed=oq.ses_seed, ses_per_logic_tree_path=oq.ses_per_logic_tree_path) if sg.src_interdep == 'mutex': # do not split the group self.csm.add_infos(sg.sources) yield sg, self.src_filter, gsims, param, monitor else: for block in self.csm.split_sources( sg.sources, self.src_filter, maxweight): yield block, self.src_filter, gsims, param, monitor def store_source_info(self, infos, acc): # save the calculation times per each source if infos: rows = sorted(infos.values(), key=operator.attrgetter('calc_time'), reverse=True) array = numpy.zeros(len(rows), source.SourceInfo.dt) for i, row in enumerate(rows): for name in array.dtype.names: array[i][name] = getattr(row, name) self.source_info = array infos.clear() self.rlzs_assoc = self.csm.info.get_rlzs_assoc( partial(self.count_eff_ruptures, acc)) self.datastore['csm_info'] = self.csm.info self.datastore['csm_info/assoc_by_grp'] = array = ( self.rlzs_assoc.get_assoc_by_grp()) # computing properly the length in bytes of a variable length array nbytes = array.nbytes + sum(rec['rlzis'].nbytes for rec in array) self.datastore.set_attrs('csm_info/assoc_by_grp', nbytes=nbytes) self.datastore.flush() def post_execute(self, pmap_by_grp_id): """ Collect the hazard curves by realization and export them. :param pmap_by_grp_id: a dictionary grp_id -> hazard curves """ if pmap_by_grp_id.bb_dict: self.datastore['bb_dict'] = pmap_by_grp_id.bb_dict grp_trt = self.csm.info.grp_trt() with self.monitor('saving probability maps', autoflush=True): for grp_id, pmap in pmap_by_grp_id.items(): if pmap: # pmap can be missing if the group is filtered away key = 'poes/grp-%02d' % grp_id self.datastore[key] = pmap self.datastore.set_attrs(key, trt=grp_trt[grp_id]) if 'poes' in self.datastore: self.datastore.set_nbytes('poes')
class RiskCalculator(HazardCalculator): """ Base class for all risk calculators. A risk calculator must set the attributes .riskmodel, .sitecol, .assets_by_site, .exposure .riskinputs in the pre_execute phase. """ specific_assets = datastore.persistent_attribute('specific_assets') extra_args = () # to be overridden in subclasses def make_eps(self, num_ruptures): """ :param num_ruptures: the size of the epsilon array for each asset """ oq = self.oqparam with self.monitor('building epsilons', autoflush=True): return riskinput.make_eps(self.assets_by_site, num_ruptures, oq.master_seed, oq.asset_correlation) def build_riskinputs(self, hazards_by_key, eps=numpy.zeros(0)): """ :param hazards_by_key: a dictionary key -> IMT -> array of length num_sites :param eps: a matrix of epsilons (possibly empty) :returns: a list of RiskInputs objects, sorted by IMT. """ # add asset.idx as side effect riskinput.build_asset_collection(self.assets_by_site, self.oqparam.time_event) imtls = self.oqparam.imtls with self.monitor('building riskinputs', autoflush=True): riskinputs = [] idx_weight_pairs = [(i, len(assets)) for i, assets in enumerate(self.assets_by_site) ] blocks = general.split_in_blocks(idx_weight_pairs, self.oqparam.concurrent_tasks or 1, weight=operator.itemgetter(1)) for block in blocks: indices = numpy.array([idx for idx, _weight in block]) reduced_assets = self.assets_by_site[indices] reduced_eps = {} # for the assets belonging to the indices if len(eps): for assets in reduced_assets: for asset in assets: reduced_eps[asset.idx] = eps[asset.idx] # collect the hazards by key into hazards by imt hdata = collections.defaultdict(lambda: [{} for _ in indices]) for key, hazards_by_imt in hazards_by_key.items(): for imt in imtls: hazards_by_site = hazards_by_imt[imt] for i, haz in enumerate(hazards_by_site[indices]): hdata[imt][i][key] = haz # build the riskinputs for imt in hdata: ri = self.riskmodel.build_input(imt, hdata[imt], reduced_assets, reduced_eps) if ri.weight > 0: riskinputs.append(ri) logging.info('Built %d risk inputs', len(riskinputs)) return sorted(riskinputs, key=self.riskinput_key) def riskinput_key(self, ri): """ :param ri: riskinput object :returns: the IMT associated to it """ return ri.imt def execute(self): """ Parallelize on the riskinputs and returns a dictionary of results. Require a `.core_func` to be defined with signature (riskinputs, riskmodel, rlzs_assoc, monitor). """ # add fatalities as side effect riskinput.build_asset_collection(self.assets_by_site, self.oqparam.time_event) self.monitor.oqparam = self.oqparam if self.pre_calculator == 'event_based_rupture': self.monitor.assets_by_site = self.assets_by_site self.monitor.num_assets = self.count_assets() all_args = ((self.riskinputs, self.riskmodel, self.rlzs_assoc) + self.extra_args + (self.monitor, )) res = apply_reduce(self.core_func.__func__, all_args, concurrent_tasks=self.oqparam.concurrent_tasks, weight=get_weight, key=self.riskinput_key) return res
class BaseCalculator(with_metaclass(abc.ABCMeta)): """ Abstract base class for all calculators. :param oqparam: OqParam object :param monitor: monitor object :param calc_id: numeric calculation ID """ oqparam = datastore.persistent_attribute('oqparam') sitemesh = datastore.persistent_attribute('sitemesh') sitecol = datastore.persistent_attribute('sitecol') rlzs_assoc = datastore.persistent_attribute('rlzs_assoc') realizations = datastore.persistent_attribute('realizations') assets_by_site = datastore.persistent_attribute('assets_by_site') assetcol = datastore.persistent_attribute('assetcol') cost_types = datastore.persistent_attribute('cost_types') taxonomies = datastore.persistent_attribute('taxonomies') job_info = datastore.persistent_attribute('job_info') source_chunks = datastore.persistent_attribute('source_chunks') source_pre_info = datastore.persistent_attribute('source_pre_info') performance = datastore.persistent_attribute('performance') csm = datastore.persistent_attribute('composite_source_model') pre_calculator = None # to be overridden is_stochastic = False # True for scenario and event based calculators def __init__(self, oqparam, monitor=DummyMonitor(), calc_id=None, persistent=True): self.monitor = monitor if persistent: self.datastore = datastore.DataStore(calc_id) else: self.datastore = general.AccumDict() self.datastore.hdf5 = {} self.datastore.export_dir = oqparam.export_dir if 'oqparam' not in self.datastore: # new datastore self.oqparam = oqparam # else we are doing a precalculation; oqparam has been already stored self.persistent = persistent def run(self, pre_execute=True, clean_up=True, concurrent_tasks=None, **kw): """ Run the calculation and return the exported outputs. """ if concurrent_tasks is not None: self.oqparam.concurrent_tasks = concurrent_tasks vars(self.oqparam).update(kw) exported = {} try: if pre_execute: with self.monitor('pre_execute', autoflush=True): self.pre_execute() with self.monitor('execute', autoflush=True): result = self.execute() with self.monitor('post_execute', autoflush=True): self.post_execute(result) with self.monitor('export', autoflush=True): exported = self.export() finally: etype = sys.exc_info()[0] if etype: logging.critical('', exc_info=True) if clean_up: try: self.clean_up() except: logging.error('Cleanup error', exc_info=True) return exported def core_func(*args): """ Core routine running on the workers. """ raise NotImplementedError @abc.abstractmethod def pre_execute(self): """ Initialization phase. """ @abc.abstractmethod def execute(self): """ Execution phase. Usually will run in parallel the core function and return a dictionary with the results. """ @abc.abstractmethod def post_execute(self, result): """ Post-processing phase of the aggregated output. It must be overridden with the export code. It will return a dictionary of output files. """ def export(self, exports=None): """ Export all the outputs in the datastore in the given export formats. :returns: dictionary output_key -> sorted list of exported paths """ exported = {} individual_curves = self.oqparam.individual_curves fmts = exports.split(',') if exports else self.oqparam.exports for fmt in fmts: if not fmt: continue for key in self.datastore: if 'rlzs' in key and not individual_curves: continue # skip individual curves ekey = (key, fmt) try: exported[ekey] = sorted(export.export( ekey, self.datastore)) logging.info('exported %s: %s', key, exported[ekey]) except KeyError: logging.info('%s is not exportable in %s', key, fmt) return exported def clean_up(self): """ Collect the realizations and the monitoring information, then close the datastore. """ self.realizations = numpy.array( [(r.uid, r.weight) for r in self.rlzs_assoc.realizations], rlz_dt) performance = self.monitor.collect_performance() if performance is not None: self.performance = performance self.datastore.close() self.datastore.symlink(os.path.dirname(self.oqparam.inputs['job_ini']))
class BaseCalculator(with_metaclass(abc.ABCMeta)): """ Abstract base class for all calculators. :param oqparam: OqParam object :param monitor: monitor object :param calc_id: numeric calculation ID """ sitemesh = datastore.persistent_attribute('sitemesh') sitecol = datastore.persistent_attribute('sitecol') etags = datastore.persistent_attribute('etags') assetcol = datastore.persistent_attribute('assetcol') cost_types = datastore.persistent_attribute('cost_types') job_info = datastore.persistent_attribute('job_info') performance = datastore.persistent_attribute('performance') csm = datastore.persistent_attribute('composite_source_model') pre_calculator = None # to be overridden is_stochastic = False # True for scenario and event based calculators @property def taxonomies(self): return self.datastore['assetcol/taxonomies'].value def __init__(self, oqparam, monitor=Monitor(), calc_id=None): self.monitor = monitor self.datastore = datastore.DataStore(calc_id) self.monitor.calc_id = self.datastore.calc_id self.monitor.hdf5path = self.datastore.hdf5path self.datastore.export_dir = oqparam.export_dir self.oqparam = oqparam def save_params(self, **kw): """ Update the current calculation parameters and save engine_version """ vars(self.oqparam).update(engine_version=__version__, **kw) self.datastore['oqparam'] = self.oqparam # save the updated oqparam self.datastore.flush() def set_log_format(self): """Set the format of the root logger""" fmt = '[%(asctime)s #{} %(levelname)s] %(message)s'.format( self.datastore.calc_id) for handler in logging.root.handlers: handler.setFormatter(logging.Formatter(fmt)) def run(self, pre_execute=True, concurrent_tasks=None, close=True, **kw): """ Run the calculation and return the exported outputs. """ self.close = close self.set_log_format() if logversion: # make sure this is logged only once logging.info('Using engine version %s', __version__) logversion.pop() if (concurrent_tasks is not None and concurrent_tasks != OqParam.concurrent_tasks.default): self.oqparam.concurrent_tasks = concurrent_tasks self.save_params(**kw) exported = {} try: if pre_execute: self.pre_execute() result = self.execute() self.post_execute(result) exported = self.export(kw.get('exports', '')) except KeyboardInterrupt: pids = ' '.join(str(p.pid) for p in executor._processes) sys.stderr.write( 'You can manually kill the workers with kill %s\n' % pids) raise except: if kw.get('pdb'): # post-mortem debug tb = sys.exc_info()[2] traceback.print_exc(tb) pdb.post_mortem(tb) else: logging.critical('', exc_info=True) raise self.clean_up() return exported def core_task(*args): """ Core routine running on the workers. """ raise NotImplementedError @abc.abstractmethod def pre_execute(self): """ Initialization phase. """ @abc.abstractmethod def execute(self): """ Execution phase. Usually will run in parallel the core function and return a dictionary with the results. """ @abc.abstractmethod def post_execute(self, result): """ Post-processing phase of the aggregated output. It must be overridden with the export code. It will return a dictionary of output files. """ def export(self, exports=None): """ Export all the outputs in the datastore in the given export formats. :returns: dictionary output_key -> sorted list of exported paths """ # avoid circular imports from openquake.commonlib.export import export as exp exported = {} individual_curves = self.oqparam.individual_curves if exports and isinstance(exports, tuple): fmts = exports elif exports: # is a string fmts = exports.split(',') else: # use passed values fmts = self.oqparam.exports for fmt in fmts: if not fmt: continue keys = set(self.datastore) if (self.oqparam.uniform_hazard_spectra and not self.oqparam.hazard_maps): # do not export the hazard maps, even if they are there keys.remove('hmaps') for key in sorted(keys): # top level keys if 'rlzs' in key and not individual_curves: continue # skip individual curves ekey = (key, fmt) if ekey not in exp: # non-exportable output continue with self.monitor('export'): exported[ekey] = exp(ekey, self.datastore) logging.info('exported %s: %s', key, exported[ekey]) # special case for uhs which is a view if (self.oqparam.uniform_hazard_spectra and 'hmaps' in self.datastore): ekey = ('uhs', fmt) exported[ekey] = exp(ekey, self.datastore) logging.info('exported %s: %s', key, exported[ekey]) return exported def clean_up(self): """ Collect the realizations and the monitoring information, then close the datastore. """ if 'hcurves' in self.datastore: self.datastore.set_nbytes('hcurves') if 'hmaps' in self.datastore: self.datastore.set_nbytes('hmaps') self.datastore.flush() if self.close: # in the engine we close later try: self.datastore.close() except RuntimeError: # there could be a mysterious HDF5 error logging.warn('', exc_info=True)
class EventBasedRiskCalculator(base.RiskCalculator): """ Event based PSHA calculator generating the event loss table and fixed ratios loss curves. """ pre_calculator = 'event_based_rupture' core_func = ebr epsilon_matrix = datastore.persistent_attribute('epsilon_matrix') is_stochastic = True def pre_execute(self): """ Read the precomputed ruptures (or compute them on the fly) and prepare some datasets in the datastore. """ super(EventBasedRiskCalculator, self).pre_execute() if not self.riskmodel: # there is no riskmodel, exit early self.execute = lambda: None self.post_execute = lambda result: None return oq = self.oqparam epsilon_sampling = oq.epsilon_sampling correl_model = readinput.get_correl_model(oq) gsims_by_col = self.rlzs_assoc.get_gsims_by_col() assets_by_site = self.assets_by_site # the following is needed to set the asset idx attribute self.assetcol = riskinput.build_asset_collection( assets_by_site, oq.time_event) logging.info('Populating the risk inputs') rup_by_tag = sum(self.datastore['sescollection'], AccumDict()) all_ruptures = [rup_by_tag[tag] for tag in sorted(rup_by_tag)] num_samples = min(len(all_ruptures), epsilon_sampling) eps_dict = riskinput.make_eps_dict(assets_by_site, num_samples, oq.master_seed, oq.asset_correlation) logging.info('Generated %d epsilons', num_samples * len(eps_dict)) self.epsilon_matrix = numpy.array( [eps_dict[a['asset_ref']] for a in self.assetcol]) self.riskinputs = list( self.riskmodel.build_inputs_from_ruptures( self.sitecol.complete, all_ruptures, gsims_by_col, oq.truncation_level, correl_model, eps_dict, oq.concurrent_tasks or 1)) logging.info('Built %d risk inputs', len(self.riskinputs)) # preparing empty datasets loss_types = self.riskmodel.loss_types self.L = len(loss_types) self.R = len(self.rlzs_assoc.realizations) self.outs = OUTPUTS self.datasets = {} self.monitor.oqparam = self.oqparam # ugly: attaching an attribute needed in the task function self.monitor.num_outputs = len(self.outs) # attaching two other attributes used in riskinput.gen_outputs self.monitor.assets_by_site = self.assets_by_site self.monitor.num_assets = N = self.count_assets() for o, out in enumerate(self.outs): self.datastore.hdf5.create_group(out) for l, loss_type in enumerate(loss_types): cb = self.riskmodel.curve_builders[l] build_curves = len(cb.ratios) for r, rlz in enumerate(self.rlzs_assoc.realizations): key = '/%s/rlz-%03d' % (loss_type, rlz.ordinal) if o in (ELT, ILT): # loss tables dset = self.datastore.create_dset(out + key, elt_dt) else: # risk curves if not build_curves: continue dset = self.datastore.create_dset( out + key, cb.poes_dt, N) self.datasets[o, l, r] = dset if o in (FRC, IRC) and build_curves: grp = self.datastore['%s/%s' % (out, loss_type)] grp.attrs['loss_ratios'] = cb.ratios def execute(self): """ Run the ebr calculator in parallel and aggregate the results """ return apply_reduce( self.core_func.__func__, (self.riskinputs, self.riskmodel, self.rlzs_assoc, self.monitor), concurrent_tasks=self.oqparam.concurrent_tasks, agg=self.agg, acc=cube(self.monitor.num_outputs, self.L, self.R, list), weight=operator.attrgetter('weight'), key=operator.attrgetter('col_id')) def agg(self, acc, result): """ Aggregate list of arrays in longer lists. :param acc: accumulator array of shape (O, L, R) :param result: a numpy array of shape (O, L, R) """ for idx, arrays in numpy.ndenumerate(result): acc[idx].extend(arrays) return acc def post_execute(self, result): """ Save the event loss table in the datastore. :param result: a numpy array of shape (O, L, R) containing lists of arrays """ nses = self.oqparam.ses_per_logic_tree_path saved = {out: 0 for out in self.outs} N = len(self.assetcol) with self.monitor('saving loss table', autoflush=True, measuremem=True): for (o, l, r), data in numpy.ndenumerate(result): if not data: # empty list continue if o in (ELT, ILT): # loss tables, data is a list of arrays losses = numpy.concatenate(data) self.datasets[o, l, r].extend(losses) saved[self.outs[o]] += losses.nbytes else: # risk curves, data is a list of counts dictionaries cb = self.riskmodel.curve_builders[l] counts_matrix = cb.get_counts(N, data) curves = cb.build_rcurves(counts_matrix, nses, self.assetcol) self.datasets[o, l, r].dset[:] = curves saved[self.outs[o]] += curves.nbytes self.datastore.hdf5.flush() for out in self.outs: nbytes = saved[out] if nbytes: self.datastore[out].attrs['nbytes'] = nbytes logging.info('Saved %s in %s', humansize(nbytes), out) else: # remove empty outputs del self.datastore[out]
class EventBasedRiskCalculator(base.RiskCalculator): """ Event based PSHA calculator generating the ruptures only """ pre_calculator = 'event_based_rupture' core_func = event_based_risk epsilon_matrix = datastore.persistent_attribute('epsilon_matrix') event_loss_asset = datastore.persistent_attribute('event_loss_asset') event_loss = datastore.persistent_attribute('event_loss') is_stochastic = True def riskinput_key(self, ri): """ :param ri: riskinput object :returns: the SESCollection idx associated to it """ return ri.col_id def pre_execute(self): """ Read the precomputed ruptures (or compute them on the fly) and prepare some empty files in the export directory to store the gmfs (if any). If there were pre-existing files, they will be erased. """ super(EventBasedRiskCalculator, self).pre_execute() oq = self.oqparam epsilon_sampling = getattr(oq, 'epsilon_sampling', 1000) correl_model = readinput.get_correl_model(oq) gsims_by_col = self.rlzs_assoc.get_gsims_by_col() assets_by_site = self.assets_by_site logging.info('Building the epsilons') logging.info('Populating the risk inputs') rup_by_tag = sum(self.datastore['sescollection'], AccumDict()) all_ruptures = [rup_by_tag[tag] for tag in sorted(rup_by_tag)] num_samples = min(len(all_ruptures), epsilon_sampling) eps_dict = riskinput.make_eps_dict( assets_by_site, num_samples, oq.master_seed, oq.asset_correlation) logging.info('Generated %d epsilons', num_samples * len(eps_dict)) self.epsilon_matrix = numpy.array( [eps_dict[a['asset_ref']] for a in self.assetcol]) self.riskinputs = list(self.riskmodel.build_inputs_from_ruptures( self.sitecol.complete, all_ruptures, gsims_by_col, oq.truncation_level, correl_model, eps_dict, oq.concurrent_tasks or 1)) logging.info('Built %d risk inputs', len(self.riskinputs)) def zeros(self, shape, dtype): """ Build a composite dtype from the given loss_types and dtype and return a zero array of the given shape. """ loss_types = self.riskmodel.get_loss_types() dt = numpy.dtype([(lt, dtype) for lt in loss_types]) return numpy.zeros(shape, dt) def post_execute(self, result): """ Extract from the result dictionary rlz.ordinal -> (loss_type, tag) -> [(asset.id, loss), ...] several interesting outputs. """ oq = self.oqparam # take the cached self.rlzs_assoc and write it on the datastore self.rlzs_assoc = self.rlzs_assoc rlzs = self.rlzs_assoc.realizations loss_types = self.riskmodel.get_loss_types() C = oq.loss_curve_resolution self.loss_curve_dt = numpy.dtype( [('losses', (float, C)), ('poes', (float, C)), ('avg', float)]) if oq.conditional_loss_poes: lm_names = _loss_map_names(oq.conditional_loss_poes) self.loss_map_dt = numpy.dtype([(f, float) for f in lm_names]) self.assets = assets = riskinput.sorted_assets(self.assets_by_site) self.specific_assets = specific_assets = [ a for a in assets if a.id in self.oqparam.specific_assets] specific_asset_refs = set(self.oqparam.specific_assets) N = len(assets) event_loss_asset = [{} for rlz in rlzs] event_loss = [{} for rlz in rlzs] loss_curves = self.zeros(N, self.loss_curve_dt) ins_curves = self.zeros(N, self.loss_curve_dt) if oq.conditional_loss_poes: loss_maps = self.zeros(N, self.loss_map_dt) agg_loss_curve = self.zeros(1, self.loss_curve_dt) for i in sorted(result): rlz = rlzs[i] data_by_lt_tag = result[i] # (loss_type, asset_id) -> [(tag, loss, ins_loss), ...] elass = {(loss_type, asset.id): [] for asset in assets for loss_type in loss_types} elagg = [] # aggregate event loss nonzero = total = 0 for loss_type, tag in data_by_lt_tag: d = data_by_lt_tag[loss_type, tag] if tag == 'counts_matrix': assets, counts = d.keys(), d.values() indices = numpy.array([asset.idx for asset in assets]) asset_values = workflows.get_values(loss_type, assets) poes = scientific.build_poes( counts, oq.ses_per_logic_tree_path) cb = scientific.CurveBuilder( loss_type, numpy.linspace(0, 1, C)) lcurves = cb.build_loss_curves( poes, asset_values, indices, N) self.store('lcurves/' + loss_type, rlz, lcurves) continue for aid, loss, ins_loss in d['data']: elass[loss_type, aid].append((tag, loss, ins_loss)) # aggregates elagg.append((loss_type, tag, d['loss'], d['ins_loss'])) nonzero += d['nonzero'] total += d['total'] logging.info('rlz=%d: %d/%d nonzero losses', i, nonzero, total) if elass: data_by_lt = collections.defaultdict(list) for (loss_type, asset_id), rows in elass.items(): for tag, loss, ins_loss in rows: data_by_lt[loss_type].append( (tag, asset_id, loss, ins_loss)) for loss_type, data in data_by_lt.items(): event_loss_asset[i][loss_type] = sorted( # data contains rows (tag, asset, loss, ins_loss) (t, a, l, i) for t, a, l, i in data if a in specific_asset_refs) # build the loss curves per asset lc = self.build_loss_curves(elass, loss_type, 1) loss_curves[loss_type] = lc if oq.insured_losses: # build the insured loss curves per asset ic = self.build_loss_curves(elass, loss_type, 2) ins_curves[loss_type] = ic if oq.conditional_loss_poes: # build the loss maps per asset, array of shape (N, P) losses_poes = numpy.array( # shape (N, 2, C) [lc['losses'], lc['poes']]).transpose(1, 0, 2) lmaps = scientific.loss_map_matrix( oq.conditional_loss_poes, losses_poes) # (P, N) for lm, lmap in zip(lm_names, lmaps): loss_maps[loss_type][lm] = lmap self.store('loss_curves', rlz, loss_curves) if oq.insured_losses: self.store('ins_curves', rlz, ins_curves) if oq.conditional_loss_poes: self.store('loss_maps', rlz, loss_maps) if elagg: for loss_type, rows in groupby( elagg, operator.itemgetter(0)).items(): event_loss[i][loss_type] = [row[1:] for row in rows] # aggregate loss curve for all tags losses, poes, avg, _ = self.build_agg_loss_curve_and_map( [loss for _lt, _tag, loss, _ins_loss in rows]) # NB: there is no aggregate insured loss curve agg_loss_curve[loss_type][0] = (losses, poes, avg) # NB: the aggregated loss_map is not stored self.store('agg_loss_curve', rlz, agg_loss_curve) if specific_assets: self.event_loss_asset = event_loss_asset self.event_loss = event_loss # store statistics (i.e. mean and quantiles) for curves and maps if len(self.rlzs_assoc.realizations) > 1: self.compute_store_stats('loss_curves') self.compute_store_stats('agg_loss_curve') def clean_up(self): """ Final checks and cleanup """ if (self.oqparam.ground_motion_fields and 'gmf_by_trt_gsim' not in self.datastore): logging.warn( 'Even if the flag `ground_motion_fields` was set the GMFs ' 'were not saved.\nYou should use the event_based hazard ' 'calculator to do that, not the risk one') super(EventBasedRiskCalculator, self).clean_up() def build_agg_loss_curve_and_map(self, losses): """ Build a loss curve from a set of losses with length given by the parameter loss_curve_resolution. :param losses: a sequence of losses :returns: a quartet (losses, poes, avg, loss_map) """ oq = self.oqparam clp = oq.conditional_loss_poes losses_poes = scientific.event_based( losses, tses=oq.tses, time_span=oq.risk_investigation_time or oq.investigation_time, curve_resolution=oq.loss_curve_resolution) loss_map = scientific.loss_map_matrix( clp, [losses_poes]).reshape(len(clp)) if clp else None return (losses_poes[0], losses_poes[1], scientific.average_loss(losses_poes), loss_map) def build_loss_curves(self, elass, loss_type, i): """ Build loss curves per asset from a set of losses with length given by the parameter loss_curve_resolution. :param elass: a dict (loss_type, asset_id) -> (tag, loss, ins_loss) :param loss_type: the loss_type :param i: 1 for loss curves or 2 for insured losses :returns: an array of loss curves, one for each asset """ oq = self.oqparam C = oq.loss_curve_resolution lcs = [] for asset in self.assets: all_losses = [loss[i] for loss in elass[loss_type, asset.id]] if all_losses: losses, poes = scientific.event_based( all_losses, tses=oq.tses, time_span=oq.risk_investigation_time or oq.investigation_time, curve_resolution=C) avg = scientific.average_loss((losses, poes)) else: losses, poes = numpy.zeros(C), numpy.zeros(C) avg = 0 lcs.append((losses, poes, avg)) return numpy.array(lcs, self.loss_curve_dt) def store(self, name, dset, curves): """ Store loss curves, maps and aggregates :param name: the name of the HDF5 file :param dset: the dataset where to store the curves :param curves: an array of curves to store """ if hasattr(dset, 'uid'): dset = dset.uid kind = 'rlzs' else: kind = 'stats' self.datastore['%s-%s/%s' % (name, kind, dset)] = curves # ################### methods to compute statistics #################### # def build_stats(self, loss_curve_key): """ Compute all statistics for the specified assets starting from the stored loss curves. Yield a statistical output object for each loss type. """ oq = self.oqparam rlzs = self.rlzs_assoc.realizations stats = scientific.StatsBuilder( oq.quantile_loss_curves, oq.conditional_loss_poes, [], scientific.normalize_curves_eb) # NB: should we encounter memory issues in the future, the easy # solution is to split the specific assets in blocks and perform # the computation one block at the time for loss_type in self.riskmodel.get_loss_types(): outputs = [] for rlz in rlzs: key = '%s-rlzs/%s' % (loss_curve_key, rlz.uid) lcs = self.datastore[key][loss_type] assets = [None] if key.startswith('agg') else self.assets losses_poes = numpy.array( # -> shape (N, 2, C) [lcs['losses'], lcs['poes']]).transpose(1, 0, 2) out = scientific.Output( assets, loss_type, rlz.ordinal, rlz.weight, loss_curves=losses_poes, insured_curves=None) outputs.append(out) yield stats.build(outputs) def compute_store_stats(self, loss_curve_key): """ Compute and store the statistical outputs """ oq = self.oqparam N = 1 if loss_curve_key.startswith('agg_') else len(self.assets) Q = 1 + len(oq.quantile_loss_curves) loss_curve_stats = self.zeros((Q, N), self.loss_curve_dt) ins_curve_stats = self.zeros((Q, N), self.loss_curve_dt) if oq.conditional_loss_poes: loss_map_stats = self.zeros((Q, N), self.loss_map_dt) for stat in self.build_stats(loss_curve_key): # there is one stat for each loss_type curves, ins_curves, maps = scientific.get_stat_curves(stat) loss_curve_stats[:][stat.loss_type] = curves if oq.insured_losses: ins_curve_stats[:][stat.loss_type] = ins_curves if oq.conditional_loss_poes: loss_map_stats[:][stat.loss_type] = maps for i, stats in enumerate(_mean_quantiles(oq.quantile_loss_curves)): self.store(loss_curve_key, stats, loss_curve_stats[i]) if oq.insured_losses: self.store(loss_curve_key + '_ins', stats, ins_curve_stats[i]) if oq.conditional_loss_poes: self.store(loss_curve_key + '_maps', stats, loss_map_stats[i])
class BaseCalculator(with_metaclass(abc.ABCMeta)): """ Abstract base class for all calculators. :param oqparam: OqParam object :param monitor: monitor object :param calc_id: numeric calculation ID """ sitemesh = datastore.persistent_attribute('sitemesh') sitecol = datastore.persistent_attribute('sitecol') rlzs_assoc = datastore.persistent_attribute('rlzs_assoc') realizations = datastore.persistent_attribute('realizations') assets_by_site = datastore.persistent_attribute('assets_by_site') assetcol = datastore.persistent_attribute('assetcol') cost_types = datastore.persistent_attribute('cost_types') taxonomies = datastore.persistent_attribute('taxonomies') job_info = datastore.persistent_attribute('job_info') source_chunks = datastore.persistent_attribute('source_chunks') source_pre_info = datastore.persistent_attribute('source_pre_info') performance = datastore.persistent_attribute('performance') csm = datastore.persistent_attribute('composite_source_model') pre_calculator = None # to be overridden is_stochastic = False # True for scenario and event based calculators def __init__(self, oqparam, monitor=DummyMonitor(), calc_id=None, persistent=True): self.monitor = monitor if persistent: self.datastore = datastore.DataStore(calc_id) else: self.datastore = general.AccumDict() self.datastore.hdf5 = {} self.datastore.attrs = {} self.datastore.export_dir = oqparam.export_dir self.oqparam = oqparam self.persistent = persistent def save_params(self, **kw): """ Update the current calculation parameters """ vars(self.oqparam).update(kw) for name, val in self.oqparam.to_params(): self.datastore.attrs[name] = val self.datastore.attrs['oqlite_version'] = repr(__version__) self.datastore.hdf5.flush() def run(self, pre_execute=True, clean_up=True, concurrent_tasks=None, **kw): """ Run the calculation and return the exported outputs. """ if concurrent_tasks is not None: self.oqparam.concurrent_tasks = concurrent_tasks self.save_params(**kw) exported = {} try: if pre_execute: with self.monitor('pre_execute', autoflush=True): self.pre_execute() with self.monitor('execute', autoflush=True): result = self.execute() with self.monitor('post_execute', autoflush=True): self.post_execute(result) with self.monitor('export', autoflush=True): exported = self.export() except: if kw.get('pdb'): # post-mortem debug tb = sys.exc_info()[2] traceback.print_exc(tb) pdb.post_mortem(tb) else: logging.critical('', exc_info=True) raise # don't cleanup if there is a critical error, otherwise # there will likely be a cleanup error covering the real one if clean_up: self.clean_up() return exported def core_func(*args): """ Core routine running on the workers. """ raise NotImplementedError @abc.abstractmethod def pre_execute(self): """ Initialization phase. """ @abc.abstractmethod def execute(self): """ Execution phase. Usually will run in parallel the core function and return a dictionary with the results. """ @abc.abstractmethod def post_execute(self, result): """ Post-processing phase of the aggregated output. It must be overridden with the export code. It will return a dictionary of output files. """ def export(self, exports=None): """ Export all the outputs in the datastore in the given export formats. :returns: dictionary output_key -> sorted list of exported paths """ exported = {} individual_curves = self.oqparam.individual_curves fmts = exports.split(',') if exports else self.oqparam.exports for fmt in fmts: if not fmt: continue for key in self.datastore: # top level keys if 'rlzs' in key and not individual_curves: continue # skip individual curves ekey = (key, fmt) if ekey not in export.export: # non-exportable output continue exported[ekey] = export.export(ekey, self.datastore) logging.info('exported %s: %s', key, exported[ekey]) return exported def clean_up(self): """ Collect the realizations and the monitoring information, then close the datastore. """ if 'rlzs_assoc' in self.datastore: self.realizations = numpy.array( [(r.uid, r.weight) for r in self.rlzs_assoc.realizations], rlz_dt) performance = self.monitor.collect_performance() if performance is not None: self.performance = performance
class BaseCalculator(with_metaclass(abc.ABCMeta)): """ Abstract base class for all calculators. :param oqparam: OqParam object :param monitor: monitor object :param calc_id: numeric calculation ID """ from_engine = False # set by engine.run_calc sitecol = datastore.persistent_attribute('sitecol') assetcol = datastore.persistent_attribute('assetcol') performance = datastore.persistent_attribute('performance') pre_calculator = None # to be overridden is_stochastic = False # True for scenario and event based calculators @property def taxonomies(self): return self.datastore['assetcol/taxonomies'].value def __init__(self, oqparam, monitor=Monitor(), calc_id=None): self._monitor = monitor self.datastore = datastore.DataStore(calc_id) self.oqparam = oqparam def monitor(self, operation, **kw): """ Return a new Monitor instance """ mon = self._monitor(operation, hdf5path=self.datastore.hdf5path) self._monitor.calc_id = mon.calc_id = self.datastore.calc_id vars(mon).update(kw) return mon def save_params(self, **kw): """ Update the current calculation parameters and save engine_version """ vars(self.oqparam).update(**kw) self.datastore['oqparam'] = self.oqparam # save the updated oqparam attrs = self.datastore['/'].attrs attrs['engine_version'] = engine_version self.datastore.flush() def set_log_format(self): """Set the format of the root logger""" fmt = '[%(asctime)s #{} %(levelname)s] %(message)s'.format( self.datastore.calc_id) for handler in logging.root.handlers: handler.setFormatter(logging.Formatter(fmt)) def run(self, pre_execute=True, concurrent_tasks=None, close=True, **kw): """ Run the calculation and return the exported outputs. """ global logversion self.close = close self.set_log_format() if logversion: # make sure this is logged only once logging.info('Running %s', self.oqparam.inputs['job_ini']) logging.info('Using engine version %s', engine_version) logversion = False if concurrent_tasks is None: # use the job.ini parameter ct = self.oqparam.concurrent_tasks else: # used the parameter passed in the command-line ct = concurrent_tasks if ct == 0: # disable distribution temporarily oq_distribute = os.environ.get('OQ_DISTRIBUTE') os.environ['OQ_DISTRIBUTE'] = 'no' if ct != self.oqparam.concurrent_tasks: # save the used concurrent_tasks self.oqparam.concurrent_tasks = ct self.save_params(**kw) exported = {} try: if pre_execute: self.pre_execute() self.result = self.execute() if self.result is not None: self.post_execute(self.result) self.before_export() exported = self.export(kw.get('exports', '')) except KeyboardInterrupt: pids = ' '.join(str(p.pid) for p in executor._processes) sys.stderr.write( 'You can manually kill the workers with kill %s\n' % pids) raise except: if kw.get('pdb'): # post-mortem debug tb = sys.exc_info()[2] traceback.print_tb(tb) pdb.post_mortem(tb) else: logging.critical('', exc_info=True) raise finally: if ct == 0: # restore OQ_DISTRIBUTE if oq_distribute is None: # was not set del os.environ['OQ_DISTRIBUTE'] else: os.environ['OQ_DISTRIBUTE'] = oq_distribute return exported def core_task(*args): """ Core routine running on the workers. """ raise NotImplementedError @abc.abstractmethod def pre_execute(self): """ Initialization phase. """ @abc.abstractmethod def execute(self): """ Execution phase. Usually will run in parallel the core function and return a dictionary with the results. """ @abc.abstractmethod def post_execute(self, result): """ Post-processing phase of the aggregated output. It must be overridden with the export code. It will return a dictionary of output files. """ def export(self, exports=None): """ Export all the outputs in the datastore in the given export formats. Individual outputs are not exported if there are multiple realizations. :returns: dictionary output_key -> sorted list of exported paths """ num_rlzs = len(self.datastore['realizations']) exported = {} if isinstance(exports, tuple): fmts = exports elif exports: # is a string fmts = exports.split(',') elif isinstance(self.oqparam.exports, tuple): fmts = self.oqparam.exports else: # is a string fmts = self.oqparam.exports.split(',') keys = set(self.datastore) has_hcurves = 'hcurves' in self.datastore or 'poes' in self.datastore if has_hcurves: keys.add('hcurves') for fmt in fmts: if not fmt: continue for key in sorted(keys): # top level keys if 'rlzs' in key and num_rlzs > 1: continue # skip individual curves self._export((key, fmt), exported) if has_hcurves and self.oqparam.hazard_maps: self._export(('hmaps', fmt), exported) if has_hcurves and self.oqparam.uniform_hazard_spectra: self._export(('uhs', fmt), exported) if self.close: # in the engine we close later self.result = None try: self.datastore.close() except (RuntimeError, ValueError): # sometimes produces errors but they are difficult to # reproduce logging.warn('', exc_info=True) return exported def _export(self, ekey, exported): if ekey in exp: with self.monitor('export'): exported[ekey] = exp(ekey, self.datastore) logging.info('exported %s: %s', ekey[0], exported[ekey]) def before_export(self): """ Collect the realizations and set the attributes nbytes """ sm_by_rlz = self.datastore['csm_info'].get_sm_by_rlz( self.rlzs_assoc.realizations) or collections.defaultdict( lambda: 'NA') self.datastore['realizations'] = numpy.array( [(r.uid, sm_by_rlz[r], gsim_names(r), r.weight) for r in self.rlzs_assoc.realizations], rlz_dt) if 'hcurves' in set(self.datastore): self.datastore.set_nbytes('hcurves') self.datastore.flush()
class ClassicalRiskCalculator(base.RiskCalculator): """ Classical Risk calculator """ pre_calculator = 'classical' avg_losses = datastore.persistent_attribute('avg_losses-rlzs') core_task = classical_risk def pre_execute(self): """ Associate the assets to the sites and build the riskinputs. """ if 'hazard_curves' in self.oqparam.inputs: # read hazard from file haz_sitecol, haz_curves = readinput.get_hcurves(self.oqparam) self.save_params() self.read_exposure() # define .assets_by_site self.load_riskmodel() self.assetcol = riskinput.AssetCollection( self.assets_by_site, self.cost_calculator, self.oqparam.time_event) self.sitecol, self.assets_by_site = self.assoc_assets_sites( haz_sitecol) curves_by_trt_gsim = {(0, 'FromFile'): haz_curves} self.datastore['csm_info'] = fake = source.CompositionInfo.fake() self.rlzs_assoc = fake.get_rlzs_assoc() self.save_mesh() else: # compute hazard or read it from the datastore super(ClassicalRiskCalculator, self).pre_execute() logging.info('Preparing the risk input') curves_by_trt_gsim = {} for key in self.datastore['poes']: pmap = self.datastore['poes/' + key] trt_id = int(key) gsims = self.rlzs_assoc.gsims_by_trt_id[trt_id] for i, gsim in enumerate(gsims): curves_by_trt_gsim[trt_id, gsim] = array_of_curves( pmap, len(self.sitecol), self.oqparam.imtls, i) self.riskinputs = self.build_riskinputs(curves_by_trt_gsim) self.monitor.oqparam = self.oqparam self.N = sum(len(assets) for assets in self.assets_by_site) self.L = len(self.riskmodel.loss_types) self.R = len(self.rlzs_assoc.realizations) self.I = self.oqparam.insured_losses self.Q1 = len(self.oqparam.quantile_loss_curves) + 1 def post_execute(self, result): """ Save the losses in a compact form. """ self.loss_curve_dt, self.loss_maps_dt = ( self.riskmodel.build_loss_dtypes( self.oqparam.conditional_loss_poes, self.I)) self.save_loss_curves(result) if self.oqparam.conditional_loss_poes: self.save_loss_maps(result) def save_loss_curves(self, result): """ Saving loss curves in the datastore. :param result: aggregated result of the task classical_risk """ ltypes = self.riskmodel.loss_types loss_curves = numpy.zeros((self.N, self.R), self.loss_curve_dt) for l, r, aid, lcurve in result['loss_curves']: loss_curves_lt = loss_curves[ltypes[l]] for i, name in enumerate(loss_curves_lt.dtype.names): if name.startswith('avg'): loss_curves_lt[name][aid, r] = lcurve[i] else: base.set_array(loss_curves_lt[name][aid, r], lcurve[i]) self.datastore['loss_curves-rlzs'] = loss_curves # loss curves stats if self.R > 1: stat_curves = numpy.zeros((self.N, self.Q1), self.loss_curve_dt) for l, aid, statcurve in result['stat_curves']: stat_curves_lt = stat_curves[ltypes[l]] for name in stat_curves_lt.dtype.names: for s in range(self.Q1): if name.startswith('avg'): stat_curves_lt[name][aid, s] = statcurve[name][s] else: base.set_array(stat_curves_lt[name][aid, s], statcurve[name][s]) self.datastore['loss_curves-stats'] = stat_curves def save_loss_maps(self, result): """ Saving loss maps in the datastore. :param result: aggregated result of the task classical_risk """ ltypes = self.riskmodel.loss_types loss_maps = numpy.zeros((self.N, self.R), self.loss_maps_dt) for l, r, aid, lmaps in result['loss_maps']: loss_maps_lt = loss_maps[ltypes[l]] for i, name in enumerate(loss_maps_lt.dtype.names): loss_maps_lt[name][aid, r] = lmaps[i] self.datastore['loss_maps-rlzs'] = loss_maps # loss maps stats if self.R > 1: stat_maps = numpy.zeros((self.N, self.Q1), self.loss_maps_dt) for l, aid, statmaps in result['stat_maps']: statmaps_lt = stat_maps[ltypes[l]] for name in statmaps_lt.dtype.names: for s in range(self.Q1): statmaps_lt[name][aid, s] = statmaps[name][s] self.datastore['loss_maps-stats'] = stat_maps