def build_starmap(self, ssm, sitecol, assetcol, riskmodel, imts, trunc_level, correl_model, min_iml, monitor): """ :param ssm: CompositeSourceModel containing a single source model :param sitecol: a SiteCollection instance :param assetcol: an AssetCollection instance :param riskmodel: a RiskModel instance :param imts: a list of Intensity Measure Types :param trunc_level: truncation level :param correl_model: correlation model :param min_iml: vector of minimum intensities, one per IMT :param monitor: a Monitor instance :returns: a pair (starmap, dictionary) """ ruptures_by_grp = AccumDict() num_ruptures = 0 num_events = 0 allargs = [] grp_trt = {} # collect the sources maxweight = ssm.get_maxweight(self.oqparam.concurrent_tasks) logging.info('Using a maxweight of %d', maxweight) for src_group in ssm.src_groups: grp_trt[src_group.id] = trt = src_group.trt gsims = ssm.gsim_lt.values[trt] for block in block_splitter(src_group, maxweight, getweight): allargs.append((block, self.sitecol, gsims, monitor)) # collect the ruptures for dic in parallel.starmap(self.compute_ruptures, allargs): ruptures_by_grp += dic [rupts] = dic.values() num_ruptures += len(rupts) num_events += dic.num_events ruptures_by_grp.num_events = num_events save_ruptures(self, ruptures_by_grp) # determine the realizations rlzs_assoc = ssm.info.get_rlzs_assoc( count_ruptures=lambda grp: len(ruptures_by_grp.get(grp.id, 0))) allargs = [] # prepare the risk inputs ruptures_per_block = self.oqparam.ruptures_per_block for src_group in ssm.src_groups: for rupts in block_splitter( ruptures_by_grp[src_group.id], ruptures_per_block): trt = grp_trt[rupts[0].grp_id] ri = riskinput.RiskInputFromRuptures( trt, imts, sitecol, rupts, trunc_level, correl_model, min_iml) allargs.append((ri, riskmodel, rlzs_assoc, assetcol, monitor)) taskname = '%s#%d' % (losses_by_taxonomy.__name__, ssm.sm_id + 1) smap = starmap(losses_by_taxonomy, allargs, name=taskname) attrs = dict(num_ruptures={ sg_id: len(rupts) for sg_id, rupts in ruptures_by_grp.items()}, num_events=num_events, num_rlzs=len(rlzs_assoc.realizations), sm_id=ssm.sm_id) return smap, attrs
def execute(self): """ Split the computation by tiles which are run in parallel. """ monitor = self.monitor(self.core_func.__name__) monitor.oqparam = oq = self.oqparam self.tiles = split_in_blocks( self.sitecol, self.oqparam.concurrent_tasks or 1) oq.concurrent_tasks = 0 calculator = ClassicalCalculator( self.oqparam, monitor, persistent=False) calculator.csm = self.csm rlzs_assoc = self.csm.get_rlzs_assoc() self.rlzs_assoc = calculator.rlzs_assoc = rlzs_assoc # parallelization all_args = [] position = 0 for (i, tile) in enumerate(self.tiles): all_args.append((calculator, SiteCollection(tile), position, i, monitor)) position += len(tile) acc = {trt_gsim: zero_curves(len(self.sitecol), oq.imtls) for trt_gsim in calculator.rlzs_assoc} acc['calc_times'] = [] return parallel.starmap(classical_tiling, all_args).reduce( agg_curves_by_trt_gsim, acc)
def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the ruptures according to their weight and tectonic region type. """ oq = self.oqparam if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields: return self.sesruptures = [] if self.precalc: # the ruptures are already in memory for grp_id, sesruptures in self.precalc.result.items(): for sr in sesruptures: self.sesruptures.append(sr) else: # read the ruptures from the datastore for serial in self.datastore['sescollection']: sr = self.datastore['sescollection/' + serial] self.sesruptures.append(sr) self.sesruptures.sort(key=operator.attrgetter('serial')) if self.oqparam.ground_motion_fields: calc.check_overflow(self) L = len(oq.imtls.array) res = parallel.starmap( self.core_task.__func__, self.gen_args(self.sesruptures) ).submit_all() acc = functools.reduce(self.combine_pmaps_and_save_gmfs, res, { rlz.ordinal: ProbabilityMap(L, 1) for rlz in self.rlzs_assoc.realizations}) self.save_data_transfer(res) return acc
def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ oq = self.oqparam monitor = self.monitor.new( self.core_task.__name__, truncation_level=oq.truncation_level, imtls=oq.imtls, maximum_distance=oq.maximum_distance, poes_disagg=oq.poes_disagg, ses_per_logic_tree_path=oq.ses_per_logic_tree_path, seed=oq.random_seed) with self.monitor('managing sources', autoflush=True): src_groups = list(self.csm.src_groups) iterargs = saving_sources_by_task( self.gen_args(src_groups, oq, monitor), self.datastore) res = parallel.starmap( self.core_task.__func__, iterargs).submit_all() acc = reduce(self.agg_dicts, res, self.zerodict()) self.save_data_transfer(res) with self.monitor('store source_info', autoflush=True): self.store_source_info(self.infos) self.rlzs_assoc = self.csm.info.get_rlzs_assoc( partial(self.count_eff_ruptures, acc)) self.datastore['csm_info'] = self.csm.info return acc
def test_spawn(self): all_data = [("a", list(range(10))), ("b", list(range(20))), ("c", list(range(15)))] res = {key: parallel.starmap(get_length, [(data,)]) for key, data in all_data} for key, val in res.items(): res[key] = val.reduce() parallel.TaskManager.restart() self.assertEqual(res, {"a": {"n": 10}, "c": {"n": 15}, "b": {"n": 20}})
def test_spawn(self): all_data = [ ('a', range(10)), ('b', range(20)), ('c', range(15))] res = {key: parallel.starmap(get_length, [(data,)]) for key, data in all_data} for key, val in res.iteritems(): res[key] = val.reduce() parallel.TaskManager.restart() self.assertEqual(res, {'a': {'n': 10}, 'c': {'n': 15}, 'b': {'n': 20}})
def test_spawn(self): all_data = [('a', list(range(10))), ('b', list(range(20))), ('c', list(range(15)))] res = { key: parallel.starmap(get_length, [(data, )]) for key, data in all_data } for key, val in res.items(): res[key] = val.reduce() parallel.TaskManager.restart() self.assertEqual(res, {'a': {'n': 10}, 'c': {'n': 15}, 'b': {'n': 20}})
def execute(self): """ Split the computation by tiles which are run in parallel. """ acc = AccumDict( {trt_gsim: zero_curves(len(self.sitecol), self.oqparam.imtls) for trt_gsim in self.rlzs_assoc}) acc.calc_times = [] acc.n = len(self.sitecol) hint = math.ceil(acc.n / self.oqparam.sites_per_tile) tiles = self.sitecol.split_in_tiles(hint) logging.info('Generating %d tiles of %d sites each', len(tiles), len(tiles[0])) sources = self.csm.get_sources() rlzs_assoc = self.csm.get_rlzs_assoc() ctasks = self.oqparam.concurrent_tasks or 1 maxweight = math.ceil(self.csm.weight / ctasks) siteidx = 0 tmanagers = [] maximum_distance = self.oqparam.maximum_distance # try to produce more tasks than self.oqparam.concurrent_tasks num_blocks = math.ceil(self.MORE_TASKS * ctasks / len(tiles)) splitmap = {} for i, tile in enumerate(tiles, 1): monitor = self.monitor.new() monitor.oqparam = self.oqparam with self.monitor('filtering sources per tile', autoflush=True): filtered_sources = [ src for src in sources if src.filter_sites_by_distance_to_source( maximum_distance, tile) is not None] if not filtered_sources: continue blocks = split_in_blocks( split_sources(sources, maxweight, splitmap), num_blocks, weight=operator.attrgetter('weight'), key=operator.attrgetter('trt_model_id')) tm = parallel.starmap( classical, ((blk, tile, siteidx, rlzs_assoc, monitor) for blk in blocks), name='tile_%d/%d' % (i, len(tiles))) tmanagers.append(tm) siteidx += len(tile) logging.info('Total number of tasks submitted: %d', sum(len(tm.results) for tm in tmanagers)) for tm in tmanagers: tm.reduce(self.agg_dicts, acc) self.rlzs_assoc = self.csm.get_rlzs_assoc( partial(is_effective_trt_model, acc)) return acc
def execute(self): """ Parallelize on the riskinputs and returns a dictionary of results. Require a `.core_task` to be defined with signature (riskinputs, riskmodel, rlzs_assoc, monitor). """ self.monitor.oqparam = self.oqparam rlz_ids = getattr(self.oqparam, 'rlz_ids', ()) if rlz_ids: self.rlzs_assoc = self.rlzs_assoc.extract(rlz_ids) all_args = ((riskinput, self.riskmodel, self.rlzs_assoc) + self.extra_args + (self.monitor,) for riskinput in self.riskinputs) res = starmap(self.core_task.__func__, all_args).reduce() return res
def execute(self): """ Parallelize on the riskinputs and returns a dictionary of results. Require a `.core_task` to be defined with signature (riskinputs, riskmodel, rlzs_assoc, monitor). """ self.monitor.oqparam = self.oqparam rlz_ids = getattr(self.oqparam, 'rlz_ids', ()) if rlz_ids: self.rlzs_assoc = self.rlzs_assoc.extract(rlz_ids) all_args = ((riskinput, self.riskmodel, self.rlzs_assoc) + self.extra_args + (self.monitor, ) for riskinput in self.riskinputs) res = starmap(self.core_task.__func__, all_args).reduce() return res
def send_sources(self): """ Filter/split and send the sources to the workers. :returns: a :class:`openquake.commonlib.parallel.TaskManager` """ oq = self.oqparam tiles = [self.sitecol] self.num_tiles = 1 if self.is_tiling(): hint = math.ceil(len(self.sitecol) / oq.sites_per_tile) tiles = self.sitecol.split_in_tiles(hint) self.num_tiles = len(tiles) logging.info('Generating %d tiles of %d sites each', self.num_tiles, len(tiles[0])) manager = source.SourceManager( self.csm, oq.maximum_distance, self.datastore, self.monitor.new(oqparam=oq), self.random_seed, oq.filter_sources, num_tiles=self.num_tiles) tm = starmap(self.core_task.__func__, manager.gen_args(tiles)) manager.store_source_info(self.datastore) return tm
def execute(self): """ Builds hcurves and stats from the stored PoEs """ if 'poes' not in self.datastore: # for short report return oq = self.oqparam rlzs = self.rlzs_assoc.realizations # initialize datasets N = len(self.sitecol) L = len(oq.imtls.array) attrs = dict( __pyclass__='openquake.hazardlib.probability_map.ProbabilityMap', sids=numpy.arange(N, dtype=numpy.uint32)) if oq.individual_curves: for rlz in rlzs: self.datastore.create_dset( 'hcurves/rlz-%03d' % rlz.ordinal, F32, (N, L, 1), attrs=attrs) if oq.mean_hazard_curves: self.datastore.create_dset( 'hcurves/mean', F32, (N, L, 1), attrs=attrs) for q in oq.quantile_hazard_curves: self.datastore.create_dset( 'hcurves/quantile-%s' % q, F32, (N, L, 1), attrs=attrs) self.datastore.flush() logging.info('Building hazard curves') with self.monitor('submitting poes', autoflush=True): pmap_by_grp = { int(group_id): self.datastore['poes/' + group_id] for group_id in self.datastore['poes']} res = parallel.starmap( build_hcurves_and_stats, list(self.gen_args(pmap_by_grp))).submit_all() with self.monitor('saving hcurves and stats', autoflush=True): nbytes = reduce(self.save_hcurves, res, AccumDict()) self.save_data_transfer(res) return nbytes
def send_sources(self): """ Filter/split and send the sources to the workers. :returns: a :class:`openquake.commonlib.parallel.TaskManager` """ oq = self.oqparam tiles = [self.sitecol] self.num_tiles = 1 if self.is_tiling(): hint = math.ceil(len(self.sitecol) / oq.sites_per_tile) tiles = self.sitecol.split_in_tiles(hint) self.num_tiles = len(tiles) logging.info('Generating %d tiles of %d sites each', self.num_tiles, len(tiles[0])) manager = source.SourceManager(self.csm, oq.maximum_distance, self.datastore, self.monitor.new(oqparam=oq), self.random_seed, oq.filter_sources, num_tiles=self.num_tiles) tm = starmap(self.core_task.__func__, manager.gen_args(tiles)) manager.store_source_info(self.datastore) return tm
def execute(self): """ Run the event_based_risk calculator and aggregate the results """ oq = self.oqparam correl_model = oq.get_correl_model() self.N = len(self.assetcol) self.E = sum(len(v) for v in self.datastore['events'].values()) logging.info('Populating the risk inputs') all_ruptures = [] preprecalc = getattr(self.precalc, 'precalc', None) if preprecalc: # the ruptures are already in memory for grp_id, sesruptures in preprecalc.result.items(): for sr in sesruptures: all_ruptures.append(sr) else: # read the ruptures from the datastore for serial in self.datastore['sescollection']: rup = self.datastore['sescollection/' + serial] all_ruptures.append(rup) all_ruptures.sort(key=operator.attrgetter('serial')) if not self.riskmodel.covs: # do not generate epsilons eps = None else: eps = riskinput.make_eps( self.assets_by_site, self.E, oq.master_seed, oq.asset_correlation) logging.info('Generated %s epsilons', eps.shape) # preparing empty datasets loss_types = self.riskmodel.loss_types self.C = self.oqparam.loss_curve_resolution self.L = L = len(loss_types) self.R = R = len(self.rlzs_assoc.realizations) self.I = self.oqparam.insured_losses # ugly: attaching attributes needed in the task function mon = self.monitor mon.num_assets = self.count_assets() mon.avg_losses = self.oqparam.avg_losses mon.asset_loss_table = self.oqparam.asset_loss_table mon.insured_losses = self.I mon.ses_ratio = ( oq.risk_investigation_time or oq.investigation_time) / ( oq.investigation_time * oq.ses_per_logic_tree_path) self.N = N = len(self.assetcol) self.E = sum(len(v) for v in self.datastore['events'].values()) # average losses, stored in a composite array of shape N, R self.avg_losses = numpy.zeros((N, R), oq.loss_dt()) self.ass_loss_table = square(L, R, lambda: None) self.agg_loss_table = square(L, R, lambda: None) self.ela_dt, self.elt_dt = mon.ela_dt, mon.elt_dt = build_el_dtypes( self.I) for (l, r) in itertools.product(range(L), range(R)): lt = loss_types[l] if self.oqparam.asset_loss_table: self.ass_loss_table[l, r] = self.datastore.create_dset( 'ass_loss_table/rlz-%03d/%s' % (r, lt), self.ela_dt) self.agg_loss_table[l, r] = self.datastore.create_dset( 'agg_loss_table/rlz-%03d/%s' % (r, lt), self.elt_dt) self.saved = collections.Counter() # nbytes per HDF5 key self.ass_bytes = 0 self.agg_bytes = 0 self.gmfbytes = 0 rlz_ids = getattr(self.oqparam, 'rlz_ids', ()) if rlz_ids: self.rlzs_assoc = self.rlzs_assoc.extract(rlz_ids) if not oq.minimum_intensity: # infer it from the risk models if not directly set in job.ini oq.minimum_intensity = self.riskmodel.get_min_iml() min_iml = calc.fix_minimum_intensity( oq.minimum_intensity, oq.imtls) if min_iml.sum() == 0: logging.warn('The GMFs are not filtered: ' 'you may want to set a minimum_intensity') else: logging.info('minimum_intensity=%s', oq.minimum_intensity) csm_info = self.datastore['csm_info'] grp_trt = {sg.id: sg.trt for sm in csm_info.source_models for sg in sm.src_groups} with self.monitor('building riskinputs', autoflush=True): riskinputs = self.riskmodel.build_inputs_from_ruptures( grp_trt, list(oq.imtls), self.sitecol.complete, all_ruptures, oq.truncation_level, correl_model, min_iml, eps, oq.concurrent_tasks or 1) # NB: I am using generators so that the tasks are submitted one at # the time, without keeping all of the arguments in memory res = starmap( self.core_task.__func__, ((riskinput, self.riskmodel, self.rlzs_assoc, self.assetcol, self.monitor.new('task')) for riskinput in riskinputs)).submit_all() acc = functools.reduce(self.agg, res, AccumDict()) self.save_data_transfer(res) return acc
def execute(self): """ Run the event_based_risk calculator and aggregate the results """ oq = self.oqparam correl_model = readinput.get_correl_model(oq) self.N = len(self.assetcol) self.E = len(self.etags) logging.info('Populating the risk inputs') rlzs_by_tr_id = self.rlzs_assoc.get_rlzs_by_trt_id() num_rlzs = {t: len(rlzs) for t, rlzs in rlzs_by_tr_id.items()} num_assets = {sid: len(self.assets_by_site[sid]) for sid in self.sitecol.sids} all_ruptures = [] for serial in self.datastore['sescollection']: rup = self.datastore['sescollection/' + serial] rup.set_weight(num_rlzs, num_assets) all_ruptures.append(rup) all_ruptures.sort(key=operator.attrgetter('serial')) if not self.riskmodel.covs: # do not generate epsilons eps = None else: eps = riskinput.make_eps( self.assets_by_site, self.E, oq.master_seed, oq.asset_correlation) logging.info('Generated %s epsilons', eps.shape) # preparing empty datasets loss_types = self.riskmodel.loss_types self.C = self.oqparam.loss_curve_resolution self.L = L = len(loss_types) self.R = R = len(self.rlzs_assoc.realizations) self.I = self.oqparam.insured_losses # ugly: attaching attributes needed in the task function mon = self.monitor mon.num_assets = self.count_assets() mon.avg_losses = self.oqparam.avg_losses mon.asset_loss_table = self.oqparam.asset_loss_table mon.insured_losses = self.I mon.ses_ratio = ( oq.risk_investigation_time or oq.investigation_time) / ( oq.investigation_time * oq.ses_per_logic_tree_path) self.N = N = len(self.assetcol) self.E = len(self.datastore['etags']) # average losses, stored in a composite array of shape N, R multi_avg_dt = self.riskmodel.loss_type_dt(insured=self.I) self.avg_losses = numpy.zeros((N, R), multi_avg_dt) self.ass_loss_table = square(L, R, lambda: None) self.agg_loss_table = square(L, R, lambda: None) self.ela_dt, self.elt_dt = mon.ela_dt, mon.elt_dt = build_el_dtypes( self.I) for (l, r) in itertools.product(range(L), range(R)): lt = loss_types[l] if self.oqparam.asset_loss_table: self.ass_loss_table[l, r] = self.datastore.create_dset( 'ass_loss_table/rlz-%03d/%s' % (r, lt), self.ela_dt) self.agg_loss_table[l, r] = self.datastore.create_dset( 'agg_loss_table/rlz-%03d/%s' % (r, lt), self.elt_dt) self.saved = collections.Counter() # nbytes per HDF5 key self.ass_bytes = 0 self.agg_bytes = 0 self.gmfbytes = 0 rlz_ids = getattr(self.oqparam, 'rlz_ids', ()) if rlz_ids: self.rlzs_assoc = self.rlzs_assoc.extract(rlz_ids) if not oq.minimum_intensity: # infer it from the risk models if not directly set in job.ini oq.minimum_intensity = self.riskmodel.get_min_iml() min_iml = calc.fix_minimum_intensity( oq.minimum_intensity, oq.imtls) if min_iml.sum() == 0: logging.warn('The GMFs are not filtered: ' 'you may want to set a minimum_intensity') else: logging.info('minimum_intensity=%s', oq.minimum_intensity) with self.monitor('building riskinputs', autoflush=True): riskinputs = self.riskmodel.build_inputs_from_ruptures( self.sitecol.complete, all_ruptures, oq.truncation_level, correl_model, min_iml, eps, oq.concurrent_tasks or 1) # NB: I am using generators so that the tasks are submitted one at # the time, without keeping all of the arguments in memory tm = starmap( self.core_task.__func__, ((riskinput, self.riskmodel, self.rlzs_assoc, self.assetcol, self.monitor.new('task')) for riskinput in riskinputs)) return tm.reduce(agg=self.agg, posthook=self.save_data_transfer)
def full_disaggregation(self, curves_by_trt_gsim): """ Run the disaggregation phase after hazard curve finalization. """ oq = self.oqparam tl = self.oqparam.truncation_level sitecol = self.sitecol mag_bin_width = self.oqparam.mag_bin_width eps_edges = numpy.linspace(-tl, tl, self.oqparam.num_epsilon_bins + 1) logging.info('%d epsilon bins from %s to %s', len(eps_edges) - 1, min(eps_edges), max(eps_edges)) self.bin_edges = {} curves_dict = {sid: self.get_curves(sid) for sid in sitecol.sids} all_args = [] num_trts = sum(len(sm.trt_models) for sm in self.csm.source_models) nblocks = math.ceil(oq.concurrent_tasks / num_trts) for smodel in self.csm.source_models: sm_id = smodel.ordinal trt_names = tuple(mod.trt for mod in smodel.trt_models) max_mag = max(mod.max_mag for mod in smodel.trt_models) min_mag = min(mod.min_mag for mod in smodel.trt_models) mag_edges = mag_bin_width * numpy.arange( int(numpy.floor(min_mag / mag_bin_width)), int(numpy.ceil(max_mag / mag_bin_width) + 1)) logging.info('%d mag bins from %s to %s', len(mag_edges) - 1, min_mag, max_mag) for trt_model in smodel.trt_models: for site in sitecol: curves = curves_dict[site.id] if not curves: continue # skip zero-valued hazard curves bb = curves_by_trt_gsim.bb_dict[sm_id, site.id] if not bb: logging.info( 'location %s was too far, skipping disaggregation', site.location) continue dist_edges, lon_edges, lat_edges = bb.bins_edges( oq.distance_bin_width, oq.coordinate_bin_width) logging.info( '%d dist bins from %s to %s', len(dist_edges) - 1, min(dist_edges), max(dist_edges)) logging.info( '%d lon bins from %s to %s', len(lon_edges) - 1, bb.west, bb.east) logging.info( '%d lat bins from %s to %s', len(lon_edges) - 1, bb.south, bb.north) self.bin_edges[sm_id, site.id] = ( mag_edges, dist_edges, lon_edges, lat_edges, eps_edges) bin_edges = {} for site in sitecol: if (sm_id, site.id) in self.bin_edges: bin_edges[site.id] = self.bin_edges[sm_id, site.id] for srcs in split_in_blocks(trt_model, nblocks): all_args.append( (sitecol, srcs, trt_model.id, self.rlzs_assoc, trt_names, curves_dict, bin_edges, oq, self.monitor)) results = parallel.starmap(compute_disagg, all_args).reduce( self.agg_result) self.save_disagg_results(results)
def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ monitor = self.monitor.new(self.core_task.__name__) monitor.oqparam = oq = self.oqparam ucerf_source = self.src_group.sources[0] max_dist = oq.maximum_distance[DEFAULT_TRT] acc = AccumDict({ grp_id: ProbabilityMap(len(oq.imtls.array), len(gsims)) for grp_id, gsims in self.rlzs_assoc.gsims_by_grp_id.items()}) acc.calc_times = [] acc.eff_ruptures = AccumDict() # grp_id -> eff_ruptures acc.bb_dict = {} if len(self.csm) > 1: # when multiple branches, parallelise by branch branches = [br.value for br in self.smlt.branches.values()] rup_res = parallel.starmap( ucerf_classical_hazard_by_branch, self.gen_args(branches, ucerf_source, monitor)).submit_all() else: # single branch gsims = self.rlzs_assoc.gsims_by_grp_id[0] [(branch_id, branch)] = self.smlt.branches.items() branchname = branch.value ucerf_source.src_group_id = 0 ucerf_source.weight = 1 ucerf_source.nsites = len(self.sitecol) self.infos[0, ucerf_source.source_id] = source.SourceInfo( ucerf_source) logging.info('Getting the background point sources') with self.monitor('getting background sources', autoflush=True): ucerf_source.build_idx_set() background_sids = ucerf_source.get_background_sids( self.sitecol, max_dist) bckgnd_sources = ucerf_source.get_background_sources( background_sids) # parallelize on the background sources, small tasks args = (bckgnd_sources, self.sitecol, oq.imtls, gsims, self.oqparam.truncation_level, 'SourceSitesFilter', max_dist, (), monitor) bg_res = parallel.apply( pmap_from_grp, args, concurrent_tasks=self.oqparam.concurrent_tasks).submit_all() # parallelize by rupture subsets tasks = self.oqparam.concurrent_tasks * 2 # they are big tasks rup_sets = ucerf_source.get_rupture_indices(branchname) rup_res = parallel.apply( ucerf_classical_hazard_by_rupture_set, (rup_sets, branchname, ucerf_source, self.src_group.id, self.sitecol, gsims, monitor), concurrent_tasks=tasks).submit_all() # compose probabilities from background sources for pmap in bg_res: acc[0] |= pmap self.save_data_transfer(bg_res) pmap_by_grp_id = functools.reduce(self.agg_dicts, rup_res, acc) with self.monitor('store source_info', autoflush=True): self.store_source_info(self.infos) self.save_data_transfer(rup_res) self.datastore['csm_info'] = self.csm.info self.rlzs_assoc = self.csm.info.get_rlzs_assoc( functools.partial(self.count_eff_ruptures, pmap_by_grp_id)) self.datastore['csm_info'] = self.csm.info return pmap_by_grp_id
def full_disaggregation(self, curves_by_trt_gsim): """ Run the disaggregation phase after hazard curve finalization. """ oq = self.oqparam tl = self.oqparam.truncation_level sitecol = self.sitecol mag_bin_width = self.oqparam.mag_bin_width eps_edges = numpy.linspace(-tl, tl, self.oqparam.num_epsilon_bins + 1) logging.info('%d epsilon bins from %s to %s', len(eps_edges) - 1, min(eps_edges), max(eps_edges)) self.bin_edges = {} curves_dict = {sid: self.get_curves(sid) for sid in sitecol.sids} all_args = [] num_trts = sum(len(sm.trt_models) for sm in self.csm.source_models) nblocks = math.ceil(oq.concurrent_tasks / num_trts) for smodel in self.csm.source_models: sm_id = smodel.ordinal trt_names = tuple(mod.trt for mod in smodel.trt_models) max_mag = max(mod.max_mag for mod in smodel.trt_models) min_mag = min(mod.min_mag for mod in smodel.trt_models) mag_edges = mag_bin_width * numpy.arange( int(numpy.floor(min_mag / mag_bin_width)), int(numpy.ceil(max_mag / mag_bin_width) + 1)) logging.info('%d mag bins from %s to %s', len(mag_edges) - 1, min_mag, max_mag) for trt_model in smodel.trt_models: for site in sitecol: curves = curves_dict[site.id] if not curves: continue # skip zero-valued hazard curves bb = curves_by_trt_gsim.bb_dict[sm_id, site.id] if not bb: logging.info( 'location %s was too far, skipping disaggregation', site.location) continue dist_edges, lon_edges, lat_edges = bb.bins_edges( oq.distance_bin_width, oq.coordinate_bin_width) logging.info('%d dist bins from %s to %s', len(dist_edges) - 1, min(dist_edges), max(dist_edges)) logging.info('%d lon bins from %s to %s', len(lon_edges) - 1, bb.west, bb.east) logging.info('%d lat bins from %s to %s', len(lon_edges) - 1, bb.south, bb.north) self.bin_edges[sm_id, site.id] = (mag_edges, dist_edges, lon_edges, lat_edges, eps_edges) bin_edges = {} for site in sitecol: if (sm_id, site.id) in self.bin_edges: bin_edges[site.id] = self.bin_edges[sm_id, site.id] for srcs in split_in_blocks(trt_model, nblocks): all_args.append( (sitecol, srcs, trt_model.id, self.rlzs_assoc, trt_names, curves_dict, bin_edges, oq, self.monitor)) results = parallel.starmap(compute_disagg, all_args).reduce(self.agg_result) self.save_disagg_results(results)
def execute(self): num_rlzs = len(self.rlzs_assoc.realizations) allres = parallel.starmap(compute_losses, self.gen_args()).submit_all() num_events = self.save_results(allres, num_rlzs) self.save_data_transfer(allres) return num_events
def full_disaggregation(self): """ Run the disaggregation phase after hazard curve finalization. """ oq = self.oqparam tl = self.oqparam.truncation_level bb_dict = self.datastore["bb_dict"] sitecol = self.sitecol mag_bin_width = self.oqparam.mag_bin_width eps_edges = numpy.linspace(-tl, tl, self.oqparam.num_epsilon_bins + 1) logging.info("%d epsilon bins from %s to %s", len(eps_edges) - 1, min(eps_edges), max(eps_edges)) self.bin_edges = {} curves_dict = {sid: self.get_curves(sid) for sid in sitecol.sids} all_args = [] num_trts = sum(len(sm.src_groups) for sm in self.csm.source_models) nblocks = math.ceil(oq.concurrent_tasks / num_trts) for smodel in self.csm.source_models: sm_id = smodel.ordinal trt_names = tuple(mod.trt for mod in smodel.src_groups) max_mag = max(mod.max_mag for mod in smodel.src_groups) min_mag = min(mod.min_mag for mod in smodel.src_groups) mag_edges = mag_bin_width * numpy.arange( int(numpy.floor(min_mag / mag_bin_width)), int(numpy.ceil(max_mag / mag_bin_width) + 1) ) logging.info("%d mag bins from %s to %s", len(mag_edges) - 1, min_mag, max_mag) for src_group in smodel.src_groups: if src_group.id not in self.rlzs_assoc.gsims_by_grp_id: continue # the group has been filtered away for sid, site in zip(sitecol.sids, sitecol): curves = curves_dict[sid] if not curves: continue # skip zero-valued hazard curves bb = bb_dict[sm_id, sid] if not bb: logging.info("location %s was too far, skipping disaggregation", site.location) continue dist_edges, lon_edges, lat_edges = bb.bins_edges(oq.distance_bin_width, oq.coordinate_bin_width) logging.info("%d dist bins from %s to %s", len(dist_edges) - 1, min(dist_edges), max(dist_edges)) logging.info("%d lon bins from %s to %s", len(lon_edges) - 1, bb.west, bb.east) logging.info("%d lat bins from %s to %s", len(lon_edges) - 1, bb.south, bb.north) self.bin_edges[sm_id, sid] = (mag_edges, dist_edges, lon_edges, lat_edges, eps_edges) bin_edges = {} for sid, site in zip(sitecol.sids, sitecol): if (sm_id, sid) in self.bin_edges: bin_edges[sid] = self.bin_edges[sm_id, sid] ss_filter = SourceSitesFilter(oq.maximum_distance) split_sources = [] for src in src_group: for split, _sites in ss_filter(sourceconverter.split_source(src), sitecol): split_sources.append(split) for srcs in split_in_blocks(split_sources, nblocks): all_args.append( ( sitecol, srcs, src_group.id, self.rlzs_assoc, trt_names, curves_dict, bin_edges, oq, self.monitor, ) ) results = parallel.starmap(compute_disagg, all_args).reduce(self.agg_result) self.save_disagg_results(results)
def execute(self): """ Run the event_based_risk calculator and aggregate the results """ oq = self.oqparam correl_model = readinput.get_correl_model(oq) self.N = len(self.assetcol) self.E = len(self.etags) logging.info('Populating the risk inputs') rlzs_by_tr_id = self.rlzs_assoc.get_rlzs_by_trt_id() num_rlzs = {t: len(rlzs) for t, rlzs in rlzs_by_tr_id.items()} num_assets = { sid: len(self.assets_by_site[sid]) for sid in self.sitecol.sids } all_ruptures = [] for serial in self.datastore['sescollection']: rup = self.datastore['sescollection/' + serial] rup.set_weight(num_rlzs, num_assets) all_ruptures.append(rup) all_ruptures.sort(key=operator.attrgetter('serial')) if not self.riskmodel.covs: # do not generate epsilons eps = None else: eps = riskinput.make_eps(self.assets_by_site, self.E, oq.master_seed, oq.asset_correlation) logging.info('Generated %s epsilons', eps.shape) # preparing empty datasets loss_types = self.riskmodel.loss_types self.C = self.oqparam.loss_curve_resolution self.L = L = len(loss_types) self.R = R = len(self.rlzs_assoc.realizations) self.I = self.oqparam.insured_losses # ugly: attaching attributes needed in the task function mon = self.monitor mon.num_assets = self.count_assets() mon.avg_losses = self.oqparam.avg_losses mon.asset_loss_table = self.oqparam.asset_loss_table mon.insured_losses = self.I mon.ses_ratio = (oq.risk_investigation_time or oq.investigation_time) / (oq.investigation_time * oq.ses_per_logic_tree_path) self.N = N = len(self.assetcol) self.E = len(self.datastore['etags']) # average losses, stored in a composite array of shape N, R multi_avg_dt = self.riskmodel.loss_type_dt(insured=self.I) self.avg_losses = numpy.zeros((N, R), multi_avg_dt) self.ass_loss_table = square(L, R, lambda: None) self.agg_loss_table = square(L, R, lambda: None) self.ela_dt, self.elt_dt = mon.ela_dt, mon.elt_dt = build_el_dtypes( self.I) for (l, r) in itertools.product(range(L), range(R)): lt = loss_types[l] if self.oqparam.asset_loss_table: self.ass_loss_table[l, r] = self.datastore.create_dset( 'ass_loss_table/rlz-%03d/%s' % (r, lt), self.ela_dt) self.agg_loss_table[l, r] = self.datastore.create_dset( 'agg_loss_table/rlz-%03d/%s' % (r, lt), self.elt_dt) self.saved = collections.Counter() # nbytes per HDF5 key self.ass_bytes = 0 self.agg_bytes = 0 self.gmfbytes = 0 rlz_ids = getattr(self.oqparam, 'rlz_ids', ()) if rlz_ids: self.rlzs_assoc = self.rlzs_assoc.extract(rlz_ids) if not oq.minimum_intensity: # infer it from the risk models if not directly set in job.ini oq.minimum_intensity = self.riskmodel.get_min_iml() min_iml = calc.fix_minimum_intensity(oq.minimum_intensity, oq.imtls) if min_iml.sum() == 0: logging.warn('The GMFs are not filtered: ' 'you may want to set a minimum_intensity') else: logging.info('minimum_intensity=%s', oq.minimum_intensity) with self.monitor('building riskinputs', autoflush=True): riskinputs = self.riskmodel.build_inputs_from_ruptures( self.sitecol.complete, all_ruptures, oq.truncation_level, correl_model, min_iml, eps, oq.concurrent_tasks or 1) # NB: I am using generators so that the tasks are submitted one at # the time, without keeping all of the arguments in memory tm = starmap(self.core_task.__func__, ((riskinput, self.riskmodel, self.rlzs_assoc, self.assetcol, self.monitor.new('task')) for riskinput in riskinputs)) res = tm.reduce(agg=self.agg) self.save_data_transfer(tm) return res