def build_agg_curve_and_stats(self, builder): """ Build a single loss curve per realization. It is NOT obtained by aggregating the loss curves; instead, it is obtained without generating the loss curves, directly from the the aggregate losses. """ oq = self.oqparam C = oq.loss_curve_resolution loss_curve_dt, _ = self.riskmodel.build_all_loss_dtypes( C, oq.conditional_loss_poes, oq.insured_losses) lts = self.riskmodel.loss_types lr_data = [(l, r, dset.dset.value) for (l, r), dset in numpy.ndenumerate(self.agg_loss_table)] ses_ratio = self.oqparam.ses_ratio result = parallel.apply_reduce( build_agg_curve, (lr_data, self.I, ses_ratio, C, self.L, self.monitor('')), concurrent_tasks=self.oqparam.concurrent_tasks) agg_curve = numpy.zeros(self.R, loss_curve_dt) for l, r, name in result: agg_curve[lts[l]][name][r] = result[l, r, name] if oq.individual_curves: self.datastore['agg_curve-rlzs'] = agg_curve self.saved['agg_curve-rlzs'] = agg_curve.nbytes if self.R > 1: self.build_agg_curve_stats(builder, agg_curve, loss_curve_dt)
def test_apply_reduce_no_tasks(self): res = parallel.apply_reduce( get_length, ('aaabb',), concurrent_tasks=0, key=lambda char: char) self.assertEqual(res, {'n': 5}) self.assertEqual(parallel.apply_reduce._chunks, [['a', 'a', 'a'], ['b', 'b']])
def execute(self): """ Run in parallel `core_func(sources, sitecol, monitor)`, by parallelizing on the ruptures according to their weight and tectonic region type. """ oq = self.oqparam if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields: return monitor = self.monitor(self.core_func.__name__) monitor.oqparam = oq zc = zero_curves(len(self.sitecol.complete), self.oqparam.imtls) zerodict = AccumDict((key, zc) for key in self.rlzs_assoc) self.nbytes = 0 curves_by_trt_gsim = parallel.apply_reduce( self.core_func.__func__, (self.sesruptures, self.sitecol, self.rlzs_assoc, monitor), concurrent_tasks=self.oqparam.concurrent_tasks, acc=zerodict, agg=self.combine_curves_and_save_gmfs, key=operator.attrgetter('col_id')) if oq.ground_motion_fields: # sanity check on the saved gmfs size expected_nbytes = self.datastore[ 'counts_per_rlz'].attrs['gmfs_nbytes'] self.datastore['gmfs'].attrs['nbytes'] = self.nbytes assert self.nbytes == expected_nbytes, ( self.nbytes, expected_nbytes) return curves_by_trt_gsim
def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the ruptures according to their weight and tectonic region type. """ oq = self.oqparam if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields: return monitor = self.monitor(self.core_task.__name__) monitor.oqparam = oq min_iml = calc.fix_minimum_intensity( oq.minimum_intensity, oq.imtls) acc = parallel.apply_reduce( self.core_task.__func__, (self.sesruptures, self.sitecol, oq.imtls, self.rlzs_assoc, min_iml, monitor), concurrent_tasks=self.oqparam.concurrent_tasks, agg=self.combine_curves_and_save_gmfs, acc=ProbabilityMap(), key=operator.attrgetter('grp_id'), weight=operator.attrgetter('weight')) if oq.ground_motion_fields: self.datastore.set_nbytes('gmf_data') return acc
def build_agg_curve(self, saved): """ Build a single loss curve per realization. It is NOT obtained by aggregating the loss curves; instead, it is obtained without generating the loss curves, directly from the the aggregate losses. :param saved: a Counter {<HDF5 key>: <nbytes>} """ ltypes = self.riskmodel.loss_types C = self.oqparam.loss_curve_resolution I = self.oqparam.insured_losses rlzs = self.datastore['rlzs_assoc'].realizations agglosses = self.datastore['agg_losses-rlzs'] R = len(rlzs) ses_ratio = self.oqparam.ses_ratio lr_list = [(l, r.ordinal, agglosses[l][r.uid].value) for l in ltypes for r in rlzs] result = parallel.apply_reduce( build_agg_curve, (lr_list, I, ses_ratio, C, self.monitor), concurrent_tasks=self.oqparam.concurrent_tasks) for loss_type in ltypes: agg_curve = numpy.zeros((R, 2), self.loss_curve_dt) for l, r, i in result: if l == loss_type: agg_curve[r, i] = result[l, r, i] outkey = 'agg_curve-rlzs/' + loss_type self.datastore[outkey] = agg_curve saved[outkey] = agg_curve.nbytes
def execute(self): """ Run in parallel `core_func(sources, sitecol, monitor)`, by parallelizing on the ruptures according to their weight and tectonic region type. """ oq = self.oqparam if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields: return monitor = self.monitor(self.core_func.__name__) monitor.oqparam = oq zc = zero_curves(len(self.sitecol.complete), self.oqparam.imtls) zerodict = AccumDict((key, zc) for key in self.rlzs_assoc) self.nbytes = 0 curves_by_trt_gsim = parallel.apply_reduce( self.core_func.__func__, (self.sesruptures, self.sitecol, self.rlzs_assoc, monitor), concurrent_tasks=self.oqparam.concurrent_tasks, acc=zerodict, agg=self.combine_curves_and_save_gmfs, key=operator.attrgetter('col_id')) if oq.ground_motion_fields: # sanity check on the saved gmfs size expected_nbytes = self.datastore['counts_per_rlz'].attrs[ 'gmfs_nbytes'] self.datastore['gmfs'].attrs['nbytes'] = self.nbytes assert self.nbytes == expected_nbytes, (self.nbytes, expected_nbytes) return curves_by_trt_gsim
def test_apply_reduce_no_tasks(self): res = parallel.apply_reduce(get_length, ('aaabb', ), concurrent_tasks=0, key=lambda char: char) self.assertEqual(res, {'n': 5}) self.assertEqual(parallel.apply_reduce._chunks, [['a', 'a', 'a'], ['b', 'b']])
def execute(self): """ Run in parallel `core_func(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ monitor = self.monitor.new(self.core_func.__name__) monitor.oqparam = self.oqparam sources = self.csm.get_sources() zc = zero_curves(len(self.sitecol.complete), self.oqparam.imtls) zerodict = AccumDict((key, zc) for key in self.rlzs_assoc) zerodict.calc_times = [] zerodict.bb_dict = { (smodel.ordinal, site.id): BoundingBox(smodel.ordinal, site.id) for site in self.sitecol for smodel in self.csm.source_models } if self.oqparam.poes_disagg else {} curves_by_trt_gsim = parallel.apply_reduce( self.core_func.__func__, (sources, self.sitecol, 0, self.rlzs_assoc, monitor), agg=self.agg_dicts, acc=zerodict, concurrent_tasks=self.oqparam.concurrent_tasks, weight=operator.attrgetter('weight'), key=operator.attrgetter('trt_model_id')) store_source_chunks(self.datastore) return curves_by_trt_gsim
def execute(self): """ Compute the GMFs in parallel """ logging.info('Computing the GMFs') return parallel.apply_reduce( self.core_func.__func__, (self.tag_seed_pairs, self.computer, self.monitor('calc_gmfs')), concurrent_tasks=self.oqparam.concurrent_tasks)
def execute(self): """ Compute the GMFs in parallel and return a dictionary gmf_by_tag """ with self.monitor('computing gmfs', autoflush=True): args = (self.tag_seed_pairs, self.computer, self.monitor('calc_gmfs')) gmf_by_tag = parallel.apply_reduce( self.core_func.__func__, args, concurrent_tasks=self.oqparam.concurrent_tasks) return gmf_by_tag
def execute(self): """ Compute the GMFs in parallel and return a dictionary gmf_by_tag """ logging.info('Computing the GMFs') args = (self.tag_seed_pairs, self.computer, self.monitor('calc_gmfs')) gmf_by_tag = parallel.apply_reduce( self.core_func.__func__, args, concurrent_tasks=self.oqparam.concurrent_tasks) return gmf_by_tag
def execute(self): """ Run the event_based_risk calculator and aggregate the results """ return apply_reduce( self.core_func.__func__, (self.riskinputs, self.riskmodel, self.rlzs_assoc, self.assets_by_site, self.epsilon_matrix, self.oqparam.specific_assets, self.monitor), concurrent_tasks=self.oqparam.concurrent_tasks, weight=operator.attrgetter('weight'), key=operator.attrgetter('col_id'))
def execute(self): """ Run the ebr calculator in parallel and aggregate the results """ return apply_reduce( self.core_func.__func__, (self.riskinputs, self.riskmodel, self.rlzs_assoc, self.monitor), concurrent_tasks=self.oqparam.concurrent_tasks, agg=self.agg, acc=cube(self.monitor.num_outputs, self.L, self.R, list), weight=operator.attrgetter('weight'), key=operator.attrgetter('col_id'))
def execute(self): """ Run the event_based_risk calculator and aggregate the results """ return apply_reduce( self.core_func.__func__, (self.riskinputs, self.riskmodel, self.rlzs_assoc, self.monitor), concurrent_tasks=self.oqparam.concurrent_tasks, agg=self.agg, acc=cube(self.monitor.num_outputs, self.L, self.R, list), weight=operator.attrgetter('weight'), key=operator.attrgetter('col_id'))
def execute(self): """ Run in parallel `core_func(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ monitor = self.monitor(self.core_func.__name__) return apply_reduce( self.core_func.__func__, (self.all_sources, self.site_collection, monitor), concurrent_tasks=self.oqparam.concurrent_tasks, weight=get_weight, key=get_trt)
def execute(self): """ Parallelize on the riskinputs and returns a dictionary of results. Require a `.core_func` to be defined with signature (riskinputs, riskmodel, monitor). """ monitor = self.monitor(self.core_func.__name__) return apply_reduce( self.core_func.__func__, (self.riskinputs, self.riskmodel, monitor), concurrent_tasks=self.oqparam.concurrent_tasks, weight=get_weight, key=get_imt)
def execute(self): """ Run the ucerf rupture calculation """ id_set = [(key, self.smlt.branches[key].value, self.smlt.branches[key].weight) for key in self.smlt.branches] ruptures_by_trt_id = parallel.apply_reduce( compute_ruptures, (id_set, self.source, self.sitecol, self.oqparam, self.monitor), concurrent_tasks=self.oqparam.concurrent_tasks, agg=self.agg) self.rlzs_assoc = self.csm.info.get_rlzs_assoc( functools.partial(self.count_eff_ruptures, ruptures_by_trt_id)) self.datastore['csm_info'] = self.csm.info self.datastore['source_info'] = numpy.array( self.infos, source.source_info_dt) return ruptures_by_trt_id
def filter_sources(sources, sitecol, maxdist): """ Filter a list of hazardlib sources according to the maximum distance. :param sources: the original sources :param sitecol: a :class:`openquake.hazardlib.site.SiteCollection` instance :param maxdist: maximum distance :returns: the filtered sources ordered by source_id """ if len(sources) * len(sitecol) > LOTS_OF_SOURCES_SITES: # filter in parallel on all available cores sources = parallel.apply_reduce( _filter_sources, (sources, sitecol, maxdist), operator.add, []) else: # few sources and sites, filter sequentially on a single core sources = _filter_sources(sources, sitecol, maxdist) return sorted(sources, key=operator.attrgetter('source_id'))
def execute(self): """ Parallelize on the riskinputs and returns a dictionary of results. Require a `.core_func` to be defined with signature (riskinputs, riskmodel, rlzs_assoc, monitor). """ with self.monitor('execute risk', autoflush=True) as monitor: monitor.oqparam = self.oqparam if self.pre_calculator == 'event_based_rupture': monitor.assets_by_site = self.assets_by_site monitor.num_assets = self.count_assets() res = apply_reduce( self.core_func.__func__, (self.riskinputs, self.riskmodel, self.rlzs_assoc, monitor), concurrent_tasks=self.oqparam.concurrent_tasks, weight=get_weight, key=self.riskinput_key) return res
def execute(self): """ Run the ucerf rupture calculation """ id_set = [(key, self.smlt.branches[key].value, self.smlt.branches[key].weight) for key in self.smlt.branches] ruptures_by_trt_id = parallel.apply_reduce( compute_ruptures, (id_set, self.source, self.sitecol, self.oqparam, self.monitor), concurrent_tasks=self.oqparam.concurrent_tasks, agg=self.agg) self.rlzs_assoc = self.csm.info.get_rlzs_assoc( functools.partial(self.count_eff_ruptures, ruptures_by_trt_id)) self.datastore['csm_info'] = self.csm.info self.datastore['source_info'] = numpy.array(self.infos, source.source_info_dt) return ruptures_by_trt_id
def execute(self): """ Parallelize on the riskinputs and returns a dictionary of results. Require a `.core_task` to be defined with signature (riskinputs, riskmodel, rlzs_assoc, monitor). """ self.monitor.oqparam = self.oqparam rlz_ids = getattr(self.oqparam, 'rlz_ids', ()) if rlz_ids: self.rlzs_assoc = self.rlzs_assoc.extract(rlz_ids) all_args = ((self.riskinputs, self.riskmodel, self.rlzs_assoc) + self.extra_args + (self.monitor,)) res = apply_reduce( self.core_task.__func__, all_args, concurrent_tasks=self.oqparam.concurrent_tasks, weight=get_weight, key=self.riskinput_key, posthook=self.save_data_transfer) return res
def execute(self): """ Run the ebr calculator in parallel and aggregate the results """ self.monitor.oqparam = oq = self.oqparam # ugly: attaching an attribute needed in the task function self.monitor.num_outputs = 2 if oq.insured_losses else 1 # attaching two other attributes used in riskinput.gen_outputs self.monitor.assets_by_site = self.assets_by_site self.monitor.num_assets = self.count_assets() return apply_reduce( self.core_func.__func__, (self.riskinputs, self.riskmodel, self.rlzs_assoc, self.monitor), concurrent_tasks=oq.concurrent_tasks, agg=self.agg, acc=cube(self.monitor.num_outputs, self.L, self.R, list), weight=operator.attrgetter('weight'), key=operator.attrgetter('col_id'))
def execute(self): """ Run the event_based_risk calculator and aggregate the results """ self.saved = collections.Counter() # nbytes per HDF5 key self.ass_bytes = 0 self.agg_bytes = 0 rlz_ids = getattr(self.oqparam, 'rlz_ids', ()) if rlz_ids: self.rlzs_assoc = self.rlzs_assoc.extract(rlz_ids) return apply_reduce(self.core_task.__func__, (self.riskinputs, self.riskmodel, self.rlzs_assoc, self.assetcol, self.monitor.new('task')), concurrent_tasks=self.oqparam.concurrent_tasks, agg=self.agg, weight=operator.attrgetter('weight'), key=operator.attrgetter('trt_id'), posthook=self.save_data_transfer)
def execute(self): """ Parallelize on the riskinputs and returns a dictionary of results. Require a `.core_func` to be defined with signature (riskinputs, riskmodel, rlzs_assoc, monitor). """ # add fatalities as side effect riskinput.build_asset_collection( self.assets_by_site, self.oqparam.time_event) self.monitor.oqparam = self.oqparam if self.pre_calculator == 'event_based_rupture': self.monitor.assets_by_site = self.assets_by_site self.monitor.num_assets = self.count_assets() res = apply_reduce( self.core_func.__func__, (self.riskinputs, self.riskmodel, self.rlzs_assoc, self.monitor), concurrent_tasks=self.oqparam.concurrent_tasks, weight=get_weight, key=self.riskinput_key) return res
def execute(self): """ Compute the GMFs in parallel and return a dictionary gmf_by_trt_gsim """ logging.info('Computing the GMFs') args = (self.tag_seed_pairs, self.computer, self.monitor('calc_gmfs')) gmf_by_tag = parallel.apply_reduce( self.core_func.__func__, args, concurrent_tasks=self.oqparam.concurrent_tasks) rlzs = self.rlzs_assoc.realizations imt_dt = numpy.dtype([(imt, float) for imt in self.oqparam.imtls]) dic = collections.defaultdict(list) for tag in sorted(gmf_by_tag): for rlz in rlzs: gsim = str(rlz) dic[0, gsim].append(gmf_by_tag[tag][gsim]) # (trt_id, gsim) -> N x R matrix return {key: numpy.array(dic[key], imt_dt).T for key in dic}
def execute(self): """ Run in parallel `core_func(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ monitor = self.monitor(self.core_func.__name__) monitor.oqparam = self.oqparam sources = self.composite_source_model.get_sources() zc = zero_curves(len(self.sitecol), self.oqparam.imtls) zerodict = AccumDict((key, zc) for key in self.rlzs_assoc) gsims_assoc = self.rlzs_assoc.get_gsims_by_trt_id() curves_by_trt_gsim = parallel.apply_reduce( self.core_func.__func__, (sources, self.sitecol, gsims_assoc, monitor), agg=agg_dicts, acc=zerodict, concurrent_tasks=self.oqparam.concurrent_tasks, weight=operator.attrgetter('weight'), key=operator.attrgetter('trt_model_id')) return curves_by_trt_gsim
def execute(self): """ Run in parallel `core_func(sources, sitecol, monitor)`, by parallelizing on the ruptures according to their weight and tectonic region type. """ oq = self.oqparam if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields: return monitor = self.monitor(self.core_func.__name__) monitor.oqparam = oq zc = zero_curves(len(self.sitecol), self.oqparam.imtls) zerodict = AccumDict((key, zc) for key in self.rlzs_assoc) self.gmf_dict = collections.defaultdict(AccumDict) curves_by_trt_gsim = parallel.apply_reduce( self.core_func.__func__, (self.sesruptures, self.sitecol, self.rlzs_assoc, monitor), concurrent_tasks=self.oqparam.concurrent_tasks, acc=zerodict, agg=self.combine_curves_and_save_gmfs, key=operator.attrgetter('col_id')) return curves_by_trt_gsim
def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the ruptures according to their weight and tectonic region type. """ oq = self.oqparam if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields: return monitor = self.monitor(self.core_task.__name__) monitor.oqparam = oq min_iml = fix_minimum_intensity(oq.minimum_intensity, oq.imtls) acc = parallel.apply_reduce( self.core_task.__func__, (self.sesruptures, self.sitecol, oq.imtls, self.rlzs_assoc, min_iml, monitor), concurrent_tasks=self.oqparam.concurrent_tasks, agg=self.combine_curves_and_save_gmfs, acc=ProbabilityMap(), key=operator.attrgetter('trt_id'), weight=operator.attrgetter('weight')) if oq.ground_motion_fields: self.datastore.set_nbytes('gmf_data') return acc
def execute(self): """ Run in parallel `core_func(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ monitor = self.monitor(self.core_func.__name__) monitor.oqparam = self.oqparam sources = self.csm.get_sources() zc = zero_curves(len(self.sitecol.complete), self.oqparam.imtls) zerodict = AccumDict((key, zc) for key in self.rlzs_assoc) zerodict['calc_times'] = [] gsims_assoc = self.rlzs_assoc.gsims_by_trt_id curves_by_trt_gsim = parallel.apply_reduce( self.core_func.__func__, (sources, self.sitecol, gsims_assoc, monitor), agg=agg_dicts, acc=zerodict, concurrent_tasks=self.oqparam.concurrent_tasks, weight=operator.attrgetter('weight'), key=operator.attrgetter('trt_model_id')) if self.persistent: store_source_chunks(self.datastore) return curves_by_trt_gsim
def execute(self): """ Run in parallel `core_func(sources, sitecol, info, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ monitor = self.monitor(self.core_func.__name__) monitor.oqparam = self.oqparam csm = self.composite_source_model sources = csm.get_sources() ruptures_by_trt = parallel.apply_reduce( self.core_func.__func__, (sources, self.sitecol, csm.info, monitor), concurrent_tasks=self.oqparam.concurrent_tasks, weight=operator.attrgetter('weight'), key=operator.attrgetter('trt_model_id')) logging.info('Generated %d SESRuptures', sum(len(v) for v in ruptures_by_trt.itervalues())) self.rlzs_assoc = csm.get_rlzs_assoc( lambda trt: len(ruptures_by_trt.get(trt.id, []))) return ruptures_by_trt
def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the ruptures according to their weight and tectonic region type. """ oq = self.oqparam if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields: return monitor = self.monitor(self.core_task.__name__) monitor.oqparam = oq zc = zero_curves(len(self.sitecol.complete), self.oqparam.imtls) zerodict = AccumDict((key, zc) for key in self.rlzs_assoc) curves_by_trt_gsim = parallel.apply_reduce( self.core_task.__func__, (self.sesruptures, self.sitecol, self.rlzs_assoc, monitor), concurrent_tasks=self.oqparam.concurrent_tasks, acc=zerodict, agg=self.combine_curves_and_save_gmfs, key=operator.attrgetter('trt_id'), weight=operator.attrgetter('multiplicity')) if oq.ground_motion_fields: self.datastore.set_nbytes('gmf_data') self.datastore.set_nbytes('sid_data') return curves_by_trt_gsim
def execute(self): """ Run in parallel `core_func(sources, sitecol, info, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ monitor = self.monitor(self.core_func.__name__) monitor.oqparam = self.oqparam sources = self.csm.get_sources() ruptures_by_trt = parallel.apply_reduce( self.core_func.__func__, (sources, self.sitecol, self.rlzs_assoc.csm_info, monitor), concurrent_tasks=self.oqparam.concurrent_tasks, weight=operator.attrgetter('weight'), key=operator.attrgetter('trt_model_id')) store_source_chunks(self.datastore) logging.info('Generated %d SESRuptures', sum(len(v) for v in ruptures_by_trt.values())) self.rlzs_assoc = self.csm.get_rlzs_assoc( lambda trt: len(ruptures_by_trt.get(trt.id, []))) return ruptures_by_trt
def test_apply_reduce(self): res = parallel.apply_reduce( get_length, (numpy.arange(10),), concurrent_tasks=3) self.assertEqual(res, {'n': 10}) self.assertEqual(list(map(len, parallel.apply_reduce._chunks)), [4, 4, 2])
def test_apply_reduce(self): res = parallel.apply_reduce(get_length, (numpy.arange(10), ), concurrent_tasks=3) self.assertEqual(res, {'n': 10}) self.assertEqual(list(map(len, parallel.apply_reduce._chunks)), [4, 4, 2])