def test_apply_maxweight(self): res = parallel.apply( get_length, ('aaabb',), maxweight=2, key=lambda char: char) # chunks ['aa', 'ab', 'b'] partial_sums = sorted(dic['n'] for dic in res) self.assertEqual(partial_sums, [1, 2, 2])
def test_apply_no_tasks(self): res = parallel.apply( get_length, ('aaabb',), concurrent_tasks=0, key=lambda char: char) # chunks [['a', 'a', 'a'], ['b', 'b']] partial_sums = sorted(dic['n'] for dic in res) self.assertEqual(partial_sums, [2, 3])
def execute(self): A = len(self.assetcol) ltypes = self.riskmodel.loss_types I = self.oqparam.insured_losses + 1 R = len(self.rlzs_assoc.realizations) self.vals = self.assetcol.values() # loss curves multi_lr_dt = numpy.dtype( [(ltype, (F32, len(cbuilder.ratios))) for ltype, cbuilder in zip( ltypes, self.riskmodel.curve_builders)]) rcurves = numpy.zeros((A, R, I), multi_lr_dt) # build rcurves-rlzs if self.oqparam.loss_ratios: assets = list(self.assetcol) cb_inputs = self.cb_inputs('all_loss_ratios') mon = self.monitor('build_rcurves') res = parallel.apply( build_rcurves, (cb_inputs, assets, mon)).reduce() for l, r in res: aids, curves = res[l, r] rcurves[ltypes[l]][aids, r] = curves self.datastore['rcurves-rlzs'] = rcurves # build rcurves-stats (sequentially) # this is a fundamental output, being used to compute loss_maps-stats if R > 1: weights = self.datastore['realizations']['weight'] quantiles = self.oqparam.quantile_loss_curves with self.monitor('computing avg_losses-stats'): self.datastore['avg_losses-stats'] = compute_stats2( self.datastore['avg_losses-rlzs'], quantiles, weights) with self.monitor('computing rcurves-stats'): self.datastore['rcurves-stats'] = compute_stats2( rcurves, quantiles, weights) # build an aggregate loss curve per realization if 'agg_loss_table' in self.datastore: with self.monitor('building agg_curve'): self.build_agg_curve()
def build_agg_curve(self): """ Build a single loss curve per realization. It is NOT obtained by aggregating the loss curves; instead, it is obtained without generating the loss curves, directly from the the aggregate losses. """ oq = self.oqparam cr = {cb.loss_type: cb.curve_resolution for cb in self.riskmodel.curve_builders} loss_curve_dt, _ = scientific.build_loss_dtypes( cr, oq.conditional_loss_poes) lts = self.riskmodel.loss_types cb_inputs = self.cb_inputs('agg_loss_table') I = oq.insured_losses + 1 R = len(self.rlzs_assoc.realizations) result = parallel.apply( build_agg_curve, (cb_inputs, self.monitor('')), concurrent_tasks=self.oqparam.concurrent_tasks).reduce() agg_curve = numpy.zeros((I, R), loss_curve_dt) for l, r, i in result: agg_curve[lts[l]][i, r] = result[l, r, i] self.datastore['agg_curve-rlzs'] = agg_curve if R > 1: # save stats too weights = self.datastore['realizations']['weight'] Q1 = len(oq.quantile_loss_curves) + 1 agg_curve_stats = numpy.zeros((I, Q1), agg_curve.dtype) for l, loss_type in enumerate(agg_curve.dtype.names): acs = agg_curve_stats[loss_type] data = agg_curve[loss_type] for i in range(I): losses, all_poes = scientific.normalize_curves_eb( [(c['losses'], c['poes']) for c in data[i]]) acs['losses'][i] = losses acs['poes'][i] = compute_stats( all_poes, oq.quantile_loss_curves, weights) acs['avg'][i] = compute_stats( data['avg'][i], oq.quantile_loss_curves, weights) self.datastore['agg_curve-stats'] = agg_curve_stats
def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ monitor = self.monitor.new(self.core_task.__name__) monitor.oqparam = oq = self.oqparam ucerf_source = self.src_group.sources[0] self.src_filter = SourceFilter(self.sitecol, oq.maximum_distance) max_dist = oq.maximum_distance[DEFAULT_TRT] acc = AccumDict({ grp_id: ProbabilityMap(len(oq.imtls.array), len(gsims)) for grp_id, gsims in self.rlzs_assoc.gsims_by_grp_id.items()}) acc.calc_times = [] acc.eff_ruptures = AccumDict() # grp_id -> eff_ruptures acc.bb_dict = {} if len(self.csm) > 1: # when multiple branches, parallelise by branch branches = [br.value for br in self.smlt.branches.values()] rup_res = parallel.starmap( ucerf_classical_hazard_by_branch, self.gen_args(branches, ucerf_source, monitor)).submit_all() else: # single branch gsims = self.rlzs_assoc.gsims_by_grp_id[0] [(branch_id, branch)] = self.smlt.branches.items() branchname = branch.value ucerf_source.src_group_id = 0 ucerf_source.weight = 1 ucerf_source.nsites = len(self.sitecol) self.infos[0, ucerf_source.source_id] = source.SourceInfo( ucerf_source) logging.info('Getting the background point sources') with self.monitor('getting background sources', autoflush=True): ucerf_source.build_idx_set() background_sids = ucerf_source.get_background_sids( self.sitecol, max_dist) bckgnd_sources = ucerf_source.get_background_sources( background_sids) # parallelize on the background sources, small tasks args = (bckgnd_sources, self.src_filter, oq.imtls, gsims, self.oqparam.truncation_level, (), monitor) bg_res = parallel.apply( pmap_from_grp, args, concurrent_tasks=self.oqparam.concurrent_tasks).submit_all() # parallelize by rupture subsets tasks = self.oqparam.concurrent_tasks * 2 # they are big tasks rup_sets = ucerf_source.get_rupture_indices(branchname) rup_res = parallel.apply( ucerf_classical_hazard_by_rupture_set, (rup_sets, branchname, ucerf_source, self.src_group.id, self.src_filter, gsims, monitor), concurrent_tasks=tasks).submit_all() # compose probabilities from background sources for pmap in bg_res: acc[0] |= pmap self.save_data_transfer(bg_res) pmap_by_grp_id = functools.reduce(self.agg_dicts, rup_res, acc) with self.monitor('store source_info', autoflush=True): self.store_source_info(self.infos) self.save_data_transfer(rup_res) self.datastore['csm_info'] = self.csm.info self.rlzs_assoc = self.csm.info.get_rlzs_assoc( functools.partial(self.count_eff_ruptures, pmap_by_grp_id)) self.datastore['csm_info'] = self.csm.info return pmap_by_grp_id
def test_received(self): with mock.patch('os.environ', OQ_DISTRIBUTE='celery'): res = parallel.apply( get_length, (numpy.arange(10),)).submit_all() list(res) # iterate on the results self.assertGreater(len(res.received), 0)
def test_apply(self): res = parallel.apply( get_length, (numpy.arange(10),), concurrent_tasks=3).reduce() self.assertEqual(res, {'n': 10}) # chunks [4, 4, 2]