def test_split_in_blocks(self): weights = dict([('a', 11), ('b', 10), ('c', 100), ('d', 15), ('e', 20), ('f', 5), ('g', 30), ('h', 17), ('i', 25)]) blocks = list(split_in_blocks('abcdefghi', 1, weights.get)) self.assertEqual(len(blocks), 1) blocks = list(split_in_blocks('abcdefghi', 2, weights.get)) self.assertEqual(len(blocks), 3) self.assertEqual(repr(blocks), "[<WeightedSequence ['a', 'b'], weight=21>, <WeightedSequence ['c', 'd'], weight=115>, <WeightedSequence ['e', 'f', 'g', 'h', 'i'], weight=97>]")
def test_split_in_blocks(self): weights = dict( [("a", 11), ("b", 10), ("c", 100), ("d", 15), ("e", 20), ("f", 5), ("g", 30), ("h", 17), ("i", 25)] ) blocks = list(split_in_blocks("abcdefghi", 1, weights.get)) self.assertEqual(len(blocks), 1) blocks = list(split_in_blocks("abcdefghi", 2, weights.get)) self.assertEqual(len(blocks), 3) self.assertEqual( repr(blocks), "[<WeightedSequence ['a', 'b'], weight=21>, <WeightedSequence ['c', 'd'], weight=115>, <WeightedSequence ['e', 'f', 'g', 'h', 'i'], weight=97>]", )
def get_data_transfer(dstore): """ Determine the amount of data transferred from the controller node to the workers and back in a classical calculation. :param dstore: a :class:`openquake.commonlib.datastore.DataStore` instance :returns: (block_info, to_send_forward, to_send_back) """ oqparam = OqParam.from_(dstore.attrs) sitecol = dstore['sitecol'] rlzs_assoc = dstore['rlzs_assoc'] info = dstore['job_info'] sources = dstore['composite_source_model'].get_sources() num_gsims_by_trt = groupby(rlzs_assoc, operator.itemgetter(0), lambda group: sum(1 for row in group)) gsims_assoc = rlzs_assoc.gsims_by_trt_id to_send_forward = 0 to_send_back = 0 block_info = [] for block in split_in_blocks(sources, oqparam.concurrent_tasks or 1, operator.attrgetter('weight'), operator.attrgetter('trt_model_id')): num_gsims = num_gsims_by_trt.get(block[0].trt_model_id, 0) back = info['n_sites'] * info['n_levels'] * info['n_imts'] * num_gsims to_send_back += back * 8 # 8 bytes per float args = (block, sitecol, gsims_assoc, PerformanceMonitor('')) to_send_forward += sum(len(p) for p in parallel.pickle_sequence(args)) block_info.append((len(block), block.weight)) return numpy.array(block_info, block_dt), to_send_forward, to_send_back
def build_inputs_from_ruptures( self, grp_trt, imts, sitecol, all_ruptures, trunc_level, correl_model, min_iml, eps, hint ): """ :param imts: list of intensity measure type strings :param sitecol: a SiteCollection instance :param all_ruptures: the complete list of EBRupture instances :param trunc_level: the truncation level (or None) :param correl_model: the correlation model (or None) :param min_iml: an array of minimum IMLs per IMT :param eps: a matrix of epsilons of shape (N, E) or None :param hint: hint for how many blocks to generate Yield :class:`RiskInputFromRuptures` instances. """ by_grp_id = operator.attrgetter("grp_id") start = 0 for ses_ruptures in split_in_blocks( all_ruptures, hint or 1, key=by_grp_id, weight=operator.attrgetter("weight") ): grp_id = ses_ruptures[0].grp_id num_events = sum(sr.multiplicity for sr in ses_ruptures) idxs = numpy.arange(start, start + num_events) start += num_events yield RiskInputFromRuptures( grp_trt[grp_id], imts, sitecol, ses_ruptures, trunc_level, correl_model, min_iml, eps[:, idxs] if eps is not None else None, )
def gen_args(self, ebruptures): """ :param ebruptures: a list of EBRupture objects to be split :yields: the arguments for compute_gmfs_and_curves """ oq = self.oqparam monitor = self.monitor(self.core_task.__name__) monitor.oqparam = oq imts = list(oq.imtls) min_iml = calc.fix_minimum_intensity(oq.minimum_intensity, imts) grp_trt = {sg.id: sg.trt for sm in self.csm.info.source_models for sg in sm.src_groups} rlzs_by_grp = self.rlzs_assoc.get_rlzs_by_grp_id() correl_model = oq.get_correl_model() for block in split_in_blocks( ebruptures, oq.concurrent_tasks or 1, key=operator.attrgetter('grp_id')): grp_id = block[0].grp_id trt = grp_trt[grp_id] gsims = [dic[trt] for dic in self.rlzs_assoc.gsim_by_trt] samples = self.rlzs_assoc.samples[grp_id] getter = GmfGetter(gsims, block, self.sitecol, imts, min_iml, oq.truncation_level, correl_model, samples) yield getter, rlzs_by_grp[grp_id], monitor
def execute(self): """ Split the computation by tiles which are run in parallel. """ monitor = self.monitor(self.core_func.__name__) monitor.oqparam = oq = self.oqparam self.tiles = split_in_blocks( self.sitecol, self.oqparam.concurrent_tasks or 1) oq.concurrent_tasks = 0 calculator = ClassicalCalculator( self.oqparam, monitor, persistent=False) calculator.csm = self.csm rlzs_assoc = self.csm.get_rlzs_assoc() self.rlzs_assoc = calculator.rlzs_assoc = rlzs_assoc # parallelization all_args = [] position = 0 for (i, tile) in enumerate(self.tiles): all_args.append((calculator, SiteCollection(tile), position, i, monitor)) position += len(tile) acc = {trt_gsim: zero_curves(len(self.sitecol), oq.imtls) for trt_gsim in calculator.rlzs_assoc} acc['calc_times'] = [] return parallel.starmap(classical_tiling, all_args).reduce( agg_curves_by_trt_gsim, acc)
def apply(cls, task, task_args, concurrent_tasks=executor.num_tasks_hint, maxweight=None, weight=lambda item: 1, key=lambda item: 'Unspecified', name=None): """ Apply a task to a tuple of the form (sequence, \*other_args) by first splitting the sequence in chunks, according to the weight of the elements and possibly to a key (see :function: `openquake.baselib.general.split_in_blocks`). Then reduce the results with an aggregation function. The chunks which are generated internally can be seen directly ( useful for debugging purposes) by looking at the attribute `._chunks`, right after the `apply` function has been called. :param task: a task to run in parallel :param task_args: the arguments to be passed to the task function :param agg: the aggregation function :param acc: initial value of the accumulator (default empty AccumDict) :param concurrent_tasks: hint about how many tasks to generate :param maxweight: if not None, used to split the tasks :param weight: function to extract the weight of an item in arg0 :param key: function to extract the kind of an item in arg0 """ arg0 = task_args[0] # this is assumed to be a sequence args = task_args[1:] if maxweight: chunks = block_splitter(arg0, maxweight, weight, key) else: chunks = split_in_blocks(arg0, concurrent_tasks or 1, weight, key) return cls.starmap(task, [(chunk,) + args for chunk in chunks], name)
def apply_reduce(task, task_args, agg=operator.add, acc=None, concurrent_tasks=CONCURRENT_TASKS, weight=lambda item: 1, key=lambda item: 'Unspecified', name=None): """ Apply a task to a tuple of the form (job_id, data, *args) by splitting the data in chunks and reduce the results with an aggregation function. :param task: an oqtask :param task_args: the arguments to be passed to the task function :param agg: the aggregation function :param acc: initial value of the accumulator :param concurrent_tasks: hint about how many tasks to generate :param weight: function to extract the weight of an item in data :param key: function to extract the kind of an item in data """ if acc is None: acc = AccumDict() job_id = task_args[0] data = task_args[1] args = task_args[2:] if not data: return acc elif len(data) == 1 or not concurrent_tasks: return agg(acc, task.task_func(job_id, data, *args)) blocks = split_in_blocks(data, concurrent_tasks, weight, key) task_args = [(job_id, block) + args for block in blocks] return starmap(task, task_args, logs.LOG.progress, name).reduce(agg, acc)
def build_inputs_from_ruptures( self, sitecol, all_ruptures, trunc_level, correl_model, min_iml, eps, hint): """ :param sitecol: a SiteCollection instance :param all_ruptures: the complete list of EBRupture instances :param trunc_level: the truncation level (or None) :param correl_model: the correlation model (or None) :param min_iml: an array of minimum IMLs per IMT :param eps: a matrix of epsilons of shape (N, E) or None :param hint: hint for how many blocks to generate Yield :class:`RiskInputFromRuptures` instances. """ imt_taxonomies = self.get_imt_taxonomies() by_trt_id = operator.attrgetter('trt_id') for ses_ruptures in split_in_blocks( all_ruptures, hint or 1, key=by_trt_id, weight=operator.attrgetter('weight')): eids = [] for sr in ses_ruptures: eids.extend(sr.events['eid']) yield RiskInputFromRuptures( imt_taxonomies, sitecol, ses_ruptures, trunc_level, correl_model, min_iml, eps[:, eids] if eps is not None else None, eids)
def apply(cls, task, task_args, concurrent_tasks=executor.num_tasks_hint, maxweight=None, weight=lambda item: 1, key=lambda item: 'Unspecified', name=None): """ Apply a task to a tuple of the form (sequence, \*other_args) by first splitting the sequence in chunks, according to the weight of the elements and possibly to a key (see :func: `openquake.baselib.general.split_in_blocks`). :param task: a task to run in parallel :param task_args: the arguments to be passed to the task function :param agg: the aggregation function :param acc: initial value of the accumulator (default empty AccumDict) :param concurrent_tasks: hint about how many tasks to generate :param maxweight: if not None, used to split the tasks :param weight: function to extract the weight of an item in arg0 :param key: function to extract the kind of an item in arg0 """ arg0 = task_args[0] # this is assumed to be a sequence args = task_args[1:] if maxweight: chunks = block_splitter(arg0, maxweight, weight, key) else: chunks = split_in_blocks(arg0, concurrent_tasks or 1, weight, key) return cls(task, [(chunk,) + args for chunk in chunks], name)
def data_transfer(calc): """ Determine the amount of data transferred from the controller node to the workers and back in a classical calculation. :returns: a triple (num_tasks, to_send_forward, to_send_back) """ oqparam = calc.oqparam info = calc.job_info calc.monitor.oqparam = oqparam sources = calc.composite_source_model.get_sources() num_gsims_by_trt = groupby(calc.rlzs_assoc, operator.itemgetter(0), lambda group: sum(1 for row in group)) gsims_assoc = calc.rlzs_assoc.get_gsims_by_trt_id() to_send_forward = 0 to_send_back = 0 n_tasks = 0 for block in split_in_blocks(sources, oqparam.concurrent_tasks, operator.attrgetter('weight'), operator.attrgetter('trt_model_id')): num_gsims = num_gsims_by_trt[block[0].trt_model_id] back = info['n_sites'] * info['n_levels'] * info['n_imts'] * num_gsims to_send_back += back * 8 # 8 bytes per float args = (block, calc.sitecol, gsims_assoc, calc.monitor) logging.info('Pickling task args #%d', n_tasks) to_send_forward += sum(len(p) for p in parallel.pickle_sequence(args)) n_tasks += 1 return n_tasks, to_send_forward, to_send_back
def gen_args(self, ruptures_by_grp): """ :param ruptures_by_grp: a dictionary of EBRupture objects :yields: the arguments for compute_gmfs_and_curves """ oq = self.oqparam monitor = self.monitor(self.core_task.__name__) monitor.oqparam = oq imts = list(oq.imtls) min_iml = calc.fix_minimum_intensity(oq.minimum_intensity, imts) self.grp_trt = self.csm.info.grp_trt() rlzs_by_grp = self.rlzs_assoc.get_rlzs_by_grp_id() correl_model = oq.get_correl_model() for grp_id in ruptures_by_grp: ruptures = ruptures_by_grp[grp_id] if not ruptures: continue for block in split_in_blocks(ruptures, oq.concurrent_tasks or 1): trt = self.grp_trt[grp_id] gsims = [dic[trt] for dic in self.rlzs_assoc.gsim_by_trt] samples = self.rlzs_assoc.samples[grp_id] getter = GmfGetter(gsims, block, self.sitecol, imts, min_iml, oq.truncation_level, correl_model, samples) yield getter, rlzs_by_grp[grp_id], monitor
def split_in_tiles(self, hint): """ Split a SiteCollection into a set of tiles (SiteCollection instances). :param hint: hint for how many tiles to generate """ tiles = [] for seq in split_in_blocks(range(len(self)), hint or 1): sc = SiteCollection.__new__(SiteCollection) sc.array = self.array[numpy.array(seq, int)] tiles.append(sc) return tiles
def build_riskinputs(self, hazards_by_key, eps=numpy.zeros(0)): """ :param hazards_by_key: a dictionary key -> IMT -> array of length num_sites :param eps: a matrix of epsilons (possibly empty) :returns: a list of RiskInputs objects, sorted by IMT. """ self.check_poes(hazards_by_key) imtls = self.oqparam.imtls if not set(self.oqparam.risk_imtls) & set(imtls): rsk = ', '.join(self.oqparam.risk_imtls) haz = ', '.join(imtls) raise ValueError('The IMTs in the risk models (%s) are disjoint ' "from the IMTs in the hazard (%s)" % (rsk, haz)) num_tasks = math.ceil((self.oqparam.concurrent_tasks or 1) / len(imtls)) with self.monitor('building riskinputs', autoflush=True): riskinputs = [] idx_weight_pairs = [ (i, len(assets)) for i, assets in enumerate(self.assets_by_site)] blocks = general.split_in_blocks( idx_weight_pairs, num_tasks, weight=operator.itemgetter(1)) for block in blocks: indices = numpy.array([idx for idx, _weight in block]) reduced_assets = self.assets_by_site[indices] # dictionary of epsilons for the reduced assets reduced_eps = collections.defaultdict(F32) if len(eps): for assets in reduced_assets: for asset in assets: reduced_eps[asset.ordinal] = eps[asset.ordinal] # collect the hazards by key into hazards by imt hdata = collections.defaultdict(lambda: [{} for _ in indices]) for key, hazards_by_imt in hazards_by_key.items(): for imt in imtls: hazards_by_site = hazards_by_imt[imt] for i, haz in enumerate(hazards_by_site[indices]): hdata[imt][i][key] = haz # build the riskinputs for imt in hdata: ri = self.riskmodel.build_input( imt, hdata[imt], reduced_assets, reduced_eps) if ri.weight > 0: riskinputs.append(ri) assert riskinputs logging.info('Built %d risk inputs', len(riskinputs)) return sorted(riskinputs, key=self.riskinput_key)
def export_asset_loss_table(ekey, dstore): """ Export in parallel the asset loss table from the datastore. NB1: for large calculation this may run out of memory NB2: due to an heisenbug in the parallel reading of .hdf5 files this works reliably only if the datastore has been created by a different process The recommendation is: *do not use this exporter*: rather, study its source code and write what you need. Every postprocessing is different. """ key, fmt = ekey oq = dstore['oqparam'] assetcol = dstore['assetcol'] arefs = dstore['asset_refs'].value avals = assetcol.values() loss_types = dstore.get_attr('all_loss_ratios', 'loss_types').split() dtlist = [(lt, F32) for lt in loss_types] if oq.insured_losses: for lt in loss_types: dtlist.append((lt + '_ins', F32)) lrs_dt = numpy.dtype([('rlzi', U16), ('losses', dtlist)]) fname = dstore.export_path('%s.%s' % ekey) monitor = performance.Monitor(key, fname) lrgetter = riskinput.LossRatiosGetter(dstore) aids = range(len(assetcol)) allargs = [(lrgetter, list(block), monitor) for block in split_in_blocks(aids, oq.concurrent_tasks)] dstore.close() # avoid OSError: Can't read data (Wrong b-tree signature) L = len(loss_types) with hdf5.File(fname, 'w') as f: nbytes = 0 total = numpy.zeros(len(dtlist), F32) for pairs in parallel.Starmap(get_loss_ratios, allargs): for aid, data in pairs: asset = assetcol[aid] avalue = avals[aid] for l, lt in enumerate(loss_types): aval = avalue[lt] for i in range(oq.insured_losses + 1): data['ratios'][:, l + L * i] *= aval aref = arefs[asset.idx] f[b'asset_loss_table/' + aref] = data.view(lrs_dt) total += data['ratios'].sum(axis=0) nbytes += data.nbytes f['asset_loss_table'].attrs['loss_types'] = ' '.join(loss_types) f['asset_loss_table'].attrs['total'] = total f['asset_loss_table'].attrs['nbytes'] = nbytes return [fname]
def apply_reduce(cls, task, task_args, agg=operator.add, acc=None, concurrent_tasks=executor._max_workers, weight=lambda item: 1, key=lambda item: 'Unspecified', name=None, posthook=None): """ Apply a task to a tuple of the form (sequence, \*other_args) by first splitting the sequence in chunks, according to the weight of the elements and possibly to a key (see :function: `openquake.baselib.general.split_in_blocks`). Then reduce the results with an aggregation function. The chunks which are generated internally can be seen directly ( useful for debugging purposes) by looking at the attribute `._chunks`, right after the `apply_reduce` function has been called. :param task: a task to run in parallel :param task_args: the arguments to be passed to the task function :param agg: the aggregation function :param acc: initial value of the accumulator (default empty AccumDict) :param concurrent_tasks: hint about how many tasks to generate :param weight: function to extract the weight of an item in arg0 :param key: function to extract the kind of an item in arg0 """ arg0 = task_args[0] # this is assumed to be a sequence args = task_args[1:] task_func = getattr(task, 'task_func', task) if acc is None: acc = AccumDict() if len(arg0) == 0: # nothing to do return acc chunks = list(split_in_blocks(arg0, concurrent_tasks or 1, weight, key)) cls.apply_reduce.__func__._chunks = chunks if not concurrent_tasks or no_distribute() or len(chunks) == 1: # apply the function in the master process for i, chunk in enumerate(chunks): if args and hasattr(args[-1], 'flush'): # is monitor args[-1].task_no = i acc = agg(acc, task_func(chunk, *args)) return acc logging.info('Starting %d tasks', len(chunks)) self = cls.starmap(task, [(chunk, ) + args for chunk in chunks], name) return self.reduce(agg, acc, posthook)
def build_riskinputs(self, kind, hazards_by_rlz, eps=numpy.zeros(0)): """ :param kind: kind of hazard getter, can be 'poe' or 'gmf' :param hazards_by_rlz: a dictionary rlz -> IMT -> array of length num_sites :param eps: a matrix of epsilons (possibly empty) :returns: a list of RiskInputs objects, sorted by IMT. """ self.check_poes(hazards_by_rlz) imtls = self.oqparam.imtls if not set(self.oqparam.risk_imtls) & set(imtls): rsk = ', '.join(self.oqparam.risk_imtls) haz = ', '.join(imtls) raise ValueError('The IMTs in the risk models (%s) are disjoint ' "from the IMTs in the hazard (%s)" % (rsk, haz)) num_tasks = self.oqparam.concurrent_tasks or 1 rlzs = range(len(hazards_by_rlz)) assets_by_site = self.assetcol.assets_by_site() with self.monitor('building riskinputs', autoflush=True): riskinputs = [] idx_weight_pairs = [(i, len(assets)) for i, assets in enumerate(assets_by_site)] blocks = general.split_in_blocks(idx_weight_pairs, num_tasks, weight=operator.itemgetter(1)) for block in blocks: indices = numpy.array([idx for idx, _weight in block]) reduced_assets = assets_by_site[indices] # dictionary of epsilons for the reduced assets reduced_eps = collections.defaultdict(F32) if len(eps): for assets in reduced_assets: for asset in assets: reduced_eps[asset.ordinal] = eps[asset.ordinal] # build the riskinputs ri = riskinput.RiskInput( riskinput.HazardGetter(kind, 0, {None: rlzs}, hazards_by_rlz, indices, list(imtls)), reduced_assets, reduced_eps) if ri.weight > 0: riskinputs.append(ri) assert riskinputs logging.info('Built %d risk inputs', len(riskinputs)) return riskinputs
def _gen_riskinputs(self, kind, eps, num_events): num_tasks = self.oqparam.concurrent_tasks or 1 assets_by_site = self.assetcol.assets_by_site() if kind == 'poe': indices = None else: indices = self.datastore['gmf_data/indices'].value dstore = self.can_read_parent() or self.datastore sid_weight = [] for sid, assets in enumerate(assets_by_site): if len(assets) == 0: continue elif indices is None: weight = len(assets) else: idx = indices[sid] if indices.dtype.names: # engine < 3.2 num_gmfs = sum(stop - start for start, stop in idx) else: # engine >= 3.2 num_gmfs = (idx[1] - idx[0]).sum() weight = len(assets) * (num_gmfs or 1) sid_weight.append((sid, weight)) for block in general.split_in_blocks( sid_weight, num_tasks, weight=operator.itemgetter(1)): sids = numpy.array([sid for sid, _weight in block]) reduced_assets = assets_by_site[sids] # dictionary of epsilons for the reduced assets reduced_eps = {} for assets in reduced_assets: for ass in assets: if eps is not None and len(eps): reduced_eps[ass.ordinal] = eps[ass.ordinal] # build the riskinputs if kind == 'poe': # hcurves, shape (R, N) getter = PmapGetter(dstore, self.rlzs_assoc, sids) getter.num_rlzs = self.R else: # gmf getter = GmfDataGetter(dstore, sids, self.R, self.oqparam.imtls) if dstore is self.datastore: # read the hazard data in the controller node getter.init() else: # the datastore must be closed to avoid the HDF5 fork bug assert dstore.hdf5 == (), '%s is not closed!' % dstore ri = riskinput.RiskInput(getter, reduced_assets, reduced_eps) ri.weight = block.weight yield ri
def split_in_tiles(self, hint): """ Split a SiteCollection into a set of tiles (SiteCollection instances). :param hint: hint for how many tiles to generate """ tiles = [] offset = 0 for seq in split_in_blocks(range(len(self)), hint or 1): sc = SiteCollection.__new__(SiteCollection) sc.indices = None sc.array = self.array[numpy.array(seq, int)] sc.offset = offset tiles.append(sc) offset += len(seq) return tiles
def build_riskinputs(self, hazards_by_key, eps=numpy.zeros(0)): """ :param hazards_by_key: a dictionary key -> IMT -> array of length num_sites :param eps: a matrix of epsilons (possibly empty) :returns: a list of RiskInputs objects, sorted by IMT. """ # add asset.idx as side effect riskinput.build_asset_collection(self.assets_by_site, self.oqparam.time_event) imtls = self.oqparam.imtls with self.monitor('building riskinputs', autoflush=True): riskinputs = [] idx_weight_pairs = [(i, len(assets)) for i, assets in enumerate(self.assets_by_site) ] blocks = general.split_in_blocks(idx_weight_pairs, self.oqparam.concurrent_tasks or 1, weight=operator.itemgetter(1)) for block in blocks: indices = numpy.array([idx for idx, _weight in block]) reduced_assets = self.assets_by_site[indices] reduced_eps = {} # for the assets belonging to the indices if len(eps): for assets in reduced_assets: for asset in assets: reduced_eps[asset.idx] = eps[asset.idx] # collect the hazards by key into hazards by imt hdata = collections.defaultdict(lambda: [{} for _ in indices]) for key, hazards_by_imt in hazards_by_key.items(): for imt in imtls: hazards_by_site = hazards_by_imt[imt] for i, haz in enumerate(hazards_by_site[indices]): hdata[imt][i][key] = haz # build the riskinputs for imt in hdata: ri = self.riskmodel.build_input(imt, hdata[imt], reduced_assets, reduced_eps) if ri.weight > 0: riskinputs.append(ri) logging.info('Built %d risk inputs', len(riskinputs)) return sorted(riskinputs, key=self.riskinput_key)
def execute(self): oq = self.oqparam R = len(self.loss_builder.weights) # build loss maps if 'all_loss_ratios' in self.datastore and oq.conditional_loss_poes: assetcol = self.datastore['assetcol'] stats = oq.risk_stats() builder = self.loss_builder A = len(assetcol) S = len(stats) P = len(builder.return_periods) # create loss_maps datasets self.datastore.create_dset( 'loss_maps-rlzs', self.loss_maps_dt, (A, R), fillvalue=None) if R > 1: self.datastore.create_dset( 'loss_maps-stats', self.loss_maps_dt, (A, S), fillvalue=None) self.datastore.set_attrs( 'loss_maps-stats', stats=[encode(name) for (name, func) in stats]) self.datastore.create_dset( 'curves-stats', oq.loss_dt(), (A, S, P), fillvalue=None) self.datastore.set_attrs( 'curves-stats', return_periods=builder.return_periods, stats=[encode(name) for (name, func) in stats]) mon = self.monitor('loss maps') lazy = ('all_loss_ratios' in self.datastore.parent and self.can_read_parent()) logging.info('Instantiating LossRatiosGetters') with self.monitor('building lrgetters', measuremem=True, autoflush=True): allargs = [] for aids in split_in_blocks(range(A), oq.concurrent_tasks): dstore = self.datastore.parent if lazy else self.datastore getter = getters.LossRatiosGetter(dstore, aids, lazy) # a lazy getter will read the loss_ratios from the workers # an eager getter reads the loss_ratios upfront allargs.append((assetcol.values(aids), builder, getter, stats, oq.conditional_loss_poes, mon)) if lazy: # avoid OSError: Can't read data (Wrong b-tree signature) self.datastore.parent.close() parallel.Starmap(build_curves_maps, allargs).reduce( self.save_curves_maps) if lazy: # the parent was closed, reopen it self.datastore.parent.open()
def build_riskinputs(self, hazards_by_key, eps=numpy.zeros(0)): """ :param hazards_by_key: a dictionary key -> IMT -> array of length num_sites :param eps: a matrix of epsilons (possibly empty) :returns: a list of RiskInputs objects, sorted by IMT. """ # add asset.idx as side effect riskinput.build_asset_collection( self.assets_by_site, self.oqparam.time_event) imtls = self.oqparam.imtls with self.monitor('building riskinputs', autoflush=True): riskinputs = [] idx_weight_pairs = [ (i, len(assets)) for i, assets in enumerate(self.assets_by_site)] blocks = general.split_in_blocks( idx_weight_pairs, self.oqparam.concurrent_tasks or 1, weight=operator.itemgetter(1)) for block in blocks: indices = numpy.array([idx for idx, _weight in block]) reduced_assets = self.assets_by_site[indices] reduced_eps = {} # for the assets belonging to the indices if len(eps): for assets in reduced_assets: for asset in assets: reduced_eps[asset.idx] = eps[asset.idx] # collect the hazards by key into hazards by imt hdata = collections.defaultdict(lambda: [{} for _ in indices]) for key, hazards_by_imt in hazards_by_key.items(): for imt in imtls: hazards_by_site = hazards_by_imt[imt] for i, haz in enumerate(hazards_by_site[indices]): hdata[imt][i][key] = haz # build the riskinputs for imt in hdata: ri = self.riskmodel.build_input( imt, hdata[imt], reduced_assets, reduced_eps) if ri.weight > 0: riskinputs.append(ri) logging.info('Built %d risk inputs', len(riskinputs)) return sorted(riskinputs, key=self.riskinput_key)
def pre_execute(self): """ Read the full source model and sites and build the needed tiles """ self.oqparam = self.job.get_oqparam() source_model_lt = readinput.get_source_model_lt(self.oqparam) source_models = list(readinput.get_source_models( self.oqparam, source_model_lt)) self.parse_risk_model() self.initialize_site_collection() info = readinput.get_job_info( self.oqparam, source_models, self.site_collection) self.imtls = self.oqparam.imtls weight = info['n_sites'] * info['n_levels'] * info['max_realizations'] nblocks = math.ceil(weight / self.oqparam.maximum_tile_weight) self.tiles = list(split_in_blocks(self.site_collection, nblocks)) self.num_tiles = len(self.tiles)
def pre_execute(self): """ Read the full source model and sites and build the needed tiles """ self.oqparam = self.job.get_oqparam() source_model_lt = readinput.get_source_model_lt(self.oqparam) source_models = list( readinput.get_source_models(self.oqparam, source_model_lt)) self.parse_risk_model() self.initialize_site_collection() info = readinput.get_job_info(self.oqparam, source_models, self.site_collection) self.imtls = self.oqparam.imtls weight = info['n_sites'] * info['n_levels'] * info['max_realizations'] nblocks = math.ceil(weight / self.oqparam.maximum_tile_weight) self.tiles = list(split_in_blocks(self.site_collection, nblocks)) self.num_tiles = len(self.tiles)
def test_split_with_kind(self): Source = namedtuple("Source", "typology, weight") s1 = Source("point", 1) s2 = Source("point", 1) s3 = Source("area", 2) s4 = Source("area", 4) s5 = Source("area", 4) blocks = list( block_splitter([s1, s2, s3, s4, s5], max_weight=6, weight=attrgetter("weight"), kind=attrgetter("typology")) ) self.assertEqual(list(map(len, blocks)), [2, 2, 1]) self.assertEqual([b.weight for b in blocks], [2, 6, 4]) blocks = list( split_in_blocks([s1, s2, s3, s4, s5], hint=6, weight=attrgetter("weight"), key=attrgetter("typology")) ) self.assertEqual(list(map(len, blocks)), [2, 1, 1, 1]) self.assertEqual([b.weight for b in blocks], [2, 2, 4, 4])
def gen_rgetters(dstore, slc=slice(None)): """ :yields: unfiltered RuptureGetters """ full_lt = dstore['full_lt'] trt_by_grp = full_lt.trt_by_grp samples = full_lt.get_samples_by_grp() rlzs_by_gsim = full_lt.get_rlzs_by_gsim_grp() rup_array = dstore['ruptures'][slc] nr = len(dstore['ruptures']) for grp_id, arr in general.group_array(rup_array, 'grp_id').items(): if not rlzs_by_gsim.get(grp_id, []): # the model has no sources continue for block in general.split_in_blocks(arr, len(arr) / nr): rgetter = RuptureGetter( [RuptureProxy(rec) for rec in block], dstore.filename, grp_id, trt_by_grp[grp_id], samples[grp_id], rlzs_by_gsim[grp_id]) yield rgetter
def apply_reduce(task_func, task_args, agg=operator.add, acc=None, concurrent_tasks=executor._max_workers, weight=lambda item: 1, key=lambda item: 'Unspecified', name=None): """ Apply a function to a tuple of the form (sequence, \*other_args) by first splitting the sequence in chunks, according to the weight of the elements and possibly to a key (see :function: `openquake.baselib.general.split_in_blocks`). Then reduce the results with an aggregation function. Here is an example: >>> apply_reduce(sum, ([1, 2, 3, 4, 5],), lambda acc, x: acc + x, ... acc=0, concurrent_tasks=2) 15 The chunks which are generated internally can be seen directly ( useful for debugging purposes) by looking at the attribute `._chunks`, right after the `apply_reduce` function has been called: >>> apply_reduce._chunks [<WeightedSequence [1, 2, 3], weight=3>, <WeightedSequence [4, 5], weight=2>] :param task_func: a function to run in parallel :param task_args: the arguments to be passed to the task function :param agg: the aggregation function :param acc: initial value of the accumulator (default empty AccumDict) :param concurrent_tasks: hint about how many tasks to generate :param weight: function to extract the weight of an item in arg0 :param key: function to extract the kind of an item in arg0 """ arg0 = task_args[0] args = task_args[1:] if acc is None: acc = AccumDict() if not arg0: return acc elif len(arg0) == 1 or not concurrent_tasks: return agg(acc, task_func(arg0, *args)) chunks = list(split_in_blocks(arg0, concurrent_tasks, weight, key)) tm = starmap(task_func, [(chunk,) + args for chunk in chunks], logging.info, name) apply_reduce._chunks = chunks return tm.reduce(agg, acc)
def execute(self): oq = self.oqparam # build loss maps if 'all_loss_ratios' in self.datastore and oq.conditional_loss_poes: assetcol = self.assetcol stats = oq.risk_stats() builder = self.riskmodel.curve_builder A = len(assetcol) weights = self.datastore['realizations']['weight'] R = len(weights) # create loss_maps datasets self.datastore.create_dset('loss_maps-rlzs', self.loss_maps_dt, (A, R), fillvalue=None) if R > 1: self.datastore.create_dset('loss_maps-stats', self.loss_maps_dt, (A, len(stats)), fillvalue=None) mon = self.monitor('loss maps') lazy = (oq.hazard_calculation_id and 'all_loss_ratios' in self.datastore.parent) logging.info('Instantiating LossRatiosGetters') with self.monitor('building lrgetters', measuremem=True, autoflush=True): allargs = [] for aids in split_in_blocks(range(A), oq.concurrent_tasks): dstore = self.datastore.parent if lazy else self.datastore getter = riskinput.LossRatiosGetter(dstore, aids, lazy) # a lazy getter will read the loss_ratios from the workers # an eager getter reads the loss_ratios upfront allargs.append((assetcol.values(aids), builder, getter, weights, stats, mon)) if lazy: # avoid OSError: Can't read data (Wrong b-tree signature) self.datastore.parent.close() parallel.Starmap(build_loss_maps, allargs).reduce(self.save_loss_maps) if lazy: # the parent was closed, reopen it self.datastore.parent.open() # build an aggregate loss curve per realization if 'agg_loss_table' in self.datastore: self.build_agg_curve()
def apply_reduce(cls, task, task_args, agg=operator.add, acc=None, concurrent_tasks=executor._max_workers, weight=lambda item: 1, key=lambda item: 'Unspecified', name=None, posthook=None): """ Apply a task to a tuple of the form (sequence, \*other_args) by first splitting the sequence in chunks, according to the weight of the elements and possibly to a key (see :function: `openquake.baselib.general.split_in_blocks`). Then reduce the results with an aggregation function. The chunks which are generated internally can be seen directly ( useful for debugging purposes) by looking at the attribute `._chunks`, right after the `apply_reduce` function has been called. :param task: a task to run in parallel :param task_args: the arguments to be passed to the task function :param agg: the aggregation function :param acc: initial value of the accumulator (default empty AccumDict) :param concurrent_tasks: hint about how many tasks to generate :param weight: function to extract the weight of an item in arg0 :param key: function to extract the kind of an item in arg0 """ arg0 = task_args[0] # this is assumed to be a sequence args = task_args[1:] task_func = getattr(task, 'task_func', task) if acc is None: acc = AccumDict() if len(arg0) == 0: # nothing to do return acc chunks = list(split_in_blocks( arg0, concurrent_tasks or 1, weight, key)) cls.apply_reduce.__func__._chunks = chunks if not concurrent_tasks or no_distribute() or len(chunks) == 1: # apply the function in the master process for i, chunk in enumerate(chunks): if args and hasattr(args[-1], 'flush'): # is monitor args[-1].task_no = i acc = agg(acc, task_func(chunk, *args)) return acc logging.info('Starting %d tasks', len(chunks)) self = cls.starmap(task, [(chunk,) + args for chunk in chunks], name) return self.reduce(agg, acc, posthook)
def test_split_with_kind(self): Source = namedtuple('Source', 'typology, weight') s1 = Source('point', 1) s2 = Source('point', 1) s3 = Source('area', 2) s4 = Source('area', 4) s5 = Source('area', 4) blocks = list( block_splitter([s1, s2, s3, s4, s5], max_weight=6, weight=attrgetter('weight'), key=attrgetter('typology'))) self.assertEqual(list(map(len, blocks)), [2, 2, 1]) self.assertEqual([b.weight for b in blocks], [2, 6, 4]) blocks = list( split_in_blocks([s1, s2, s3, s4, s5], hint=6, weight=attrgetter('weight'), key=attrgetter('typology'))) self.assertEqual(list(map(len, blocks)), [1, 1, 1, 2]) self.assertEqual([b.weight for b in blocks], [2, 4, 4, 2])
def gen_rgetters(dstore, slc=slice(None)): """ :yields: unfiltered RuptureGetters """ csm_info = dstore['csm_info'] trt_by_grp = csm_info.grp_by("trt") samples = csm_info.get_samples_by_grp() rlzs_by_gsim = csm_info.get_rlzs_by_gsim_grp() rup_array = dstore['ruptures'][slc] ct = dstore['oqparam'].concurrent_tasks or 1 nr = len(dstore['ruptures']) for grp_id, arr in general.group_array(rup_array, 'grp_id').items(): if not rlzs_by_gsim[grp_id]: # the model has no sources continue for block in general.split_in_blocks(arr, len(arr) / nr * ct): rgetter = RuptureGetter([RuptureProxy(rec) for rec in block], dstore.filename, grp_id, trt_by_grp[grp_id], samples[grp_id], rlzs_by_gsim[grp_id]) yield rgetter
def test_split_with_kind(self): Source = namedtuple('Source', 'typology, weight') s1 = Source('point', 1) s2 = Source('point', 1) s3 = Source('area', 2) s4 = Source('area', 4) s5 = Source('area', 4) blocks = list( block_splitter([s1, s2, s3, s4, s5], max_weight=6, weight=attrgetter('weight'), kind=attrgetter('typology'))) self.assertEqual(list(map(len, blocks)), [2, 2, 1]) self.assertEqual([b.weight for b in blocks], [2, 6, 4]) blocks = list( split_in_blocks([s1, s2, s3, s4, s5], hint=6, weight=attrgetter('weight'), key=attrgetter('typology'))) self.assertEqual(list(map(len, blocks)), [1, 1, 1, 2]) self.assertEqual([b.weight for b in blocks], [2, 4, 4, 2])
def split_in_tiles(self, hint): """ Split a SiteCollection into a set of tiles (SiteCollection instances). :param hint: hint for how many tiles to generate """ tiles = [] for seq in split_in_blocks(range(len(self)), hint or 1): indices = numpy.array(seq, int) sc = SiteCollection.__new__(SiteCollection) sc.complete = sc sc.total_sites = len(indices) sc.sids = self.sids[indices] sc.lons = self.lons[indices] sc.lats = self.lats[indices] sc._vs30 = _extract(self._vs30, indices) sc._vs30measured = _extract(self._vs30measured, indices) sc._z1pt0 = _extract(self._z1pt0, indices) sc._z2pt5 = _extract(self._z2pt5, indices) sc._backarc = _extract(self._backarc, indices) tiles.append(sc) return tiles
def build_riskinputs(self, hazards_by_imt): """ :param hazards_by_imt: a dictionary IMT -> array of length equal to the number of sites :returns: a list of RiskInputs objects, sorted by IMT. """ riskinputs = [] idx_weight_pairs = [(i, len(assets)) for i, assets in enumerate(self.assets_by_site)] blocks = general.split_in_blocks( idx_weight_pairs, self.oqparam.concurrent_tasks or 1, weight=operator.itemgetter(1)) for block in blocks: idx = numpy.array([idx for idx, _weight in block]) for imt, hazards_by_site in hazards_by_imt.iteritems(): ri = self.riskmodel.build_input( imt, hazards_by_site[idx], self.assets_by_site[idx]) if ri.weight > 0: riskinputs.append(ri) logging.info('Built %d risk inputs', len(riskinputs)) return sorted(riskinputs, key=get_imt)
def apply(cls, task, args, concurrent_tasks=cpu_count * 3, maxweight=None, weight=lambda item: 1, key=lambda item: 'Unspecified', distribute=None, progress=logging.info): """ Apply a task to a tuple of the form (sequence, \*other_args) by first splitting the sequence in chunks, according to the weight of the elements and possibly to a key (see :func: `openquake.baselib.general.split_in_blocks`). :param task: a task to run in parallel :param args: the arguments to be passed to the task function :param concurrent_tasks: hint about how many tasks to generate :param maxweight: if not None, used to split the tasks :param weight: function to extract the weight of an item in arg0 :param key: function to extract the kind of an item in arg0 :param distribute: if not given, inferred from OQ_DISTRIBUTE :param progress: logging function to use (default logging.info) :returns: an :class:`IterResult` object """ arg0 = args[0] # this is assumed to be a sequence args = args[1:] mon = args[-1] if maxweight: # block_splitter is lazy task_args = ( (blk, ) + args for blk in block_splitter(arg0, maxweight, weight, key)) else: # split_in_blocks is eager task_args = [(blk, ) + args for blk in split_in_blocks( arg0, concurrent_tasks or 1, weight, key)] return cls(task, task_args, mon, distribute, progress).submit_all()
def get_rupture_getters(dstore, slc=slice(None), split=0, hdf5cache=None): """ :returns: a list of RuptureGetters """ csm_info = dstore['csm_info'] grp_trt = csm_info.grp_by("trt") samples = csm_info.get_samples_by_grp() rlzs_by_gsim = csm_info.get_rlzs_by_gsim_grp() rup_array = dstore['ruptures'][slc] code2cls = get_code2cls(dstore.get_attrs('ruptures')) rgetters = [] by_grp = operator.itemgetter(2) # serial, srcidx, grp_id for block in general.split_in_blocks(rup_array, split, key=by_grp): rups = numpy.array(block) grp_id = rups[0]['grp_id'] if not rlzs_by_gsim[grp_id]: # this may happen if a source model has no sources, like # in event_based_risk/case_3 continue rgetter = RuptureGetter(hdf5cache or dstore.hdf5path, code2cls, rups, grp_trt[grp_id], samples[grp_id], rlzs_by_gsim[grp_id]) rgetters.append(rgetter) return rgetters
def full_disaggregation(self): """ Run the disaggregation phase after hazard curve finalization. """ oq = self.oqparam tl = self.oqparam.truncation_level bb_dict = self.datastore["bb_dict"] sitecol = self.sitecol mag_bin_width = self.oqparam.mag_bin_width eps_edges = numpy.linspace(-tl, tl, self.oqparam.num_epsilon_bins + 1) logging.info("%d epsilon bins from %s to %s", len(eps_edges) - 1, min(eps_edges), max(eps_edges)) self.bin_edges = {} curves_dict = {sid: self.get_curves(sid) for sid in sitecol.sids} all_args = [] num_trts = sum(len(sm.src_groups) for sm in self.csm.source_models) nblocks = math.ceil(oq.concurrent_tasks / num_trts) for smodel in self.csm.source_models: sm_id = smodel.ordinal trt_names = tuple(mod.trt for mod in smodel.src_groups) max_mag = max(mod.max_mag for mod in smodel.src_groups) min_mag = min(mod.min_mag for mod in smodel.src_groups) mag_edges = mag_bin_width * numpy.arange( int(numpy.floor(min_mag / mag_bin_width)), int(numpy.ceil(max_mag / mag_bin_width) + 1) ) logging.info("%d mag bins from %s to %s", len(mag_edges) - 1, min_mag, max_mag) for src_group in smodel.src_groups: if src_group.id not in self.rlzs_assoc.gsims_by_grp_id: continue # the group has been filtered away for sid, site in zip(sitecol.sids, sitecol): curves = curves_dict[sid] if not curves: continue # skip zero-valued hazard curves bb = bb_dict[sm_id, sid] if not bb: logging.info("location %s was too far, skipping disaggregation", site.location) continue dist_edges, lon_edges, lat_edges = bb.bins_edges(oq.distance_bin_width, oq.coordinate_bin_width) logging.info("%d dist bins from %s to %s", len(dist_edges) - 1, min(dist_edges), max(dist_edges)) logging.info("%d lon bins from %s to %s", len(lon_edges) - 1, bb.west, bb.east) logging.info("%d lat bins from %s to %s", len(lon_edges) - 1, bb.south, bb.north) self.bin_edges[sm_id, sid] = (mag_edges, dist_edges, lon_edges, lat_edges, eps_edges) bin_edges = {} for sid, site in zip(sitecol.sids, sitecol): if (sm_id, sid) in self.bin_edges: bin_edges[sid] = self.bin_edges[sm_id, sid] ss_filter = SourceSitesFilter(oq.maximum_distance) split_sources = [] for src in src_group: for split, _sites in ss_filter(sourceconverter.split_source(src), sitecol): split_sources.append(split) for srcs in split_in_blocks(split_sources, nblocks): all_args.append( ( sitecol, srcs, src_group.id, self.rlzs_assoc, trt_names, curves_dict, bin_edges, oq, self.monitor, ) ) results = parallel.starmap(compute_disagg, all_args).reduce(self.agg_result) self.save_disagg_results(results)
def full_disaggregation(self): """ Run the disaggregation phase after hazard curve finalization. """ oq = self.oqparam tl = self.oqparam.truncation_level sitecol = self.sitecol eps_edges = numpy.linspace(-tl, tl, self.oqparam.num_epsilon_bins + 1) self.bin_edges = {} curves = [self.get_curves(sid) for sid in sitecol.sids] # determine the number of effective source groups sg_data = self.datastore['csm_info/sg_data'] num_grps = sum(1 for effrup in sg_data['effrup'] if effrup > 0) nblocks = math.ceil(oq.concurrent_tasks / num_grps) src_filter = SourceFilter(sitecol, oq.maximum_distance) R = len(self.rlzs_assoc.realizations) max_poe = numpy.zeros(R, oq.imt_dt()) # build trt_edges trts = tuple( sorted( set(sg.trt for smodel in self.csm.source_models for sg in smodel.src_groups))) # build mag_edges min_mag = min(sg.min_mag for smodel in self.csm.source_models for sg in smodel.src_groups) max_mag = max(sg.max_mag for smodel in self.csm.source_models for sg in smodel.src_groups) mag_edges = oq.mag_bin_width * numpy.arange( int(numpy.floor(min_mag / oq.mag_bin_width)), int(numpy.ceil(max_mag / oq.mag_bin_width) + 1)) # build dist_edges maxdist = max(oq.maximum_distance(trt, max_mag) for trt in trts) dist_edges = oq.distance_bin_width * numpy.arange( 0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1)) logging.info('dist = %s...%s', min(dist_edges), max(dist_edges)) # build eps_edges eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build lon_edges, lat_edges per sid bbs = src_filter.get_bounding_boxes(mag=max_mag) for sid, bb in zip(self.sitecol.sids, bbs): lon_edges, lat_edges = disagg.lon_lat_bins(bb, oq.coordinate_bin_width) logging.info('site %d, lon = %s...%s', sid, min(lon_edges), max(lon_edges)) logging.info('site %d, lat = %s...%s', sid, min(lat_edges), max(lat_edges)) self.bin_edges[sid] = bs = (mag_edges, dist_edges, lon_edges, lat_edges, eps_edges) shape = [len(edges) - 1 for edges in bs] + [len(trts)] logging.info('%s for sid %d', shape, sid) # check poes for smodel in self.csm.source_models: sm_id = smodel.ordinal for i, site in enumerate(sitecol): sid = sitecol.sids[i] curve = curves[i] # populate max_poe array for rlzi, poes in curve.items(): for imt in oq.imtls: max_poe[rlzi][imt] = max(max_poe[rlzi][imt], poes[imt].max()) if not curve: continue # skip zero-valued hazard curves # check for too big poes_disagg for poe in oq.poes_disagg: for rlz in self.rlzs_assoc.rlzs_by_smodel[sm_id]: rlzi = rlz.ordinal for imt in oq.imtls: min_poe = max_poe[rlzi][imt] if poe > min_poe: raise ValueError( self.POE_TOO_BIG % (poe, sm_id, smodel.name, min_poe, rlzi, imt)) # build all_args all_args = [] for smodel in self.csm.source_models: for sg in smodel.src_groups: split_sources = [] for src in sg: for split, _sites in src_filter( sourceconverter.split_source(src), sitecol): split_sources.append(split) if not split_sources: continue mon = self.monitor('disaggregation') rlzs_by_gsim = self.rlzs_assoc.get_rlzs_by_gsim( sg.trt, smodel.ordinal) cmaker = ContextMaker(rlzs_by_gsim, src_filter.integration_distance) imls = [ disagg.make_imldict(rlzs_by_gsim, oq.imtls, oq.iml_disagg, oq.poes_disagg, curve) for curve in curves ] for srcs in split_in_blocks(split_sources, nblocks): all_args.append((src_filter, srcs, cmaker, imls, trts, self.bin_edges, oq, mon)) self.cache_info = numpy.zeros(2) # operations, cache_hits results = parallel.Starmap(compute_disagg, all_args).reduce(self.agg_result) ops, hits = self.cache_info logging.info('Cache speedup %s', ops / (ops - hits)) self.save_disagg_results(results)
def full_disaggregation(self): """ Run the disaggregation phase after hazard curve finalization. """ oq = self.oqparam tl = self.oqparam.truncation_level bb_dict = self.datastore['bb_dict'] sitecol = self.sitecol mag_bin_width = self.oqparam.mag_bin_width eps_edges = numpy.linspace(-tl, tl, self.oqparam.num_epsilon_bins + 1) logging.info('%d epsilon bins from %s to %s', len(eps_edges) - 1, min(eps_edges), max(eps_edges)) self.bin_edges = {} curves_dict = {sid: self.get_curves(sid) for sid in sitecol.sids} all_args = [] num_trts = sum(len(sm.src_groups) for sm in self.csm.source_models) nblocks = math.ceil(oq.concurrent_tasks / num_trts) for smodel in self.csm.source_models: sm_id = smodel.ordinal trt_names = tuple(mod.trt for mod in smodel.src_groups) max_mag = max(mod.max_mag for mod in smodel.src_groups) min_mag = min(mod.min_mag for mod in smodel.src_groups) mag_edges = mag_bin_width * numpy.arange( int(numpy.floor(min_mag / mag_bin_width)), int(numpy.ceil(max_mag / mag_bin_width) + 1)) logging.info('%d mag bins from %s to %s', len(mag_edges) - 1, min_mag, max_mag) for src_group in smodel.src_groups: if src_group.id not in self.rlzs_assoc.gsims_by_grp_id: continue # the group has been filtered away for sid, site in zip(sitecol.sids, sitecol): curves = curves_dict[sid] if not curves: continue # skip zero-valued hazard curves bb = bb_dict[sm_id, sid] if not bb: logging.info( 'location %s was too far, skipping disaggregation', site.location) continue dist_edges, lon_edges, lat_edges = bb.bins_edges( oq.distance_bin_width, oq.coordinate_bin_width) logging.info('%d dist bins from %s to %s', len(dist_edges) - 1, min(dist_edges), max(dist_edges)) logging.info('%d lon bins from %s to %s', len(lon_edges) - 1, bb.west, bb.east) logging.info('%d lat bins from %s to %s', len(lon_edges) - 1, bb.south, bb.north) self.bin_edges[sm_id, sid] = (mag_edges, dist_edges, lon_edges, lat_edges, eps_edges) bin_edges = {} for sid, site in zip(sitecol.sids, sitecol): if (sm_id, sid) in self.bin_edges: bin_edges[sid] = self.bin_edges[sm_id, sid] src_filter = SourceFilter(sitecol, oq.maximum_distance) split_sources = [] for src in src_group: for split, _sites in src_filter( sourceconverter.split_source(src), sitecol): split_sources.append(split) mon = self.monitor('disaggregation') for srcs in split_in_blocks(split_sources, nblocks): all_args.append( (src_filter, srcs, src_group.id, self.rlzs_assoc, trt_names, curves_dict, bin_edges, oq, mon)) results = parallel.Starmap(compute_disagg, all_args).reduce(self.agg_result) self.save_disagg_results(results)
def apply(cls, func, args, concurrent_tasks=executor._max_workers * 5, weight=lambda item: 1, key=lambda item: 'Unspecified'): chunks = split_in_blocks(args[0], concurrent_tasks or 1, weight, key) if concurrent_tasks == 0: cls = Sequential return cls(func, (((chunk,) + args[1:]) for chunk in chunks))
def apply(cls, func, args, concurrent_tasks=executor._max_workers * 5, weight=lambda item: 1, key=lambda item: 'Unspecified'): chunks = split_in_blocks(args[0], concurrent_tasks, weight, key) return cls(func, (((chunk,) + args[1:]) for chunk in chunks))
def full_disaggregation(self, curves_by_trt_gsim): """ Run the disaggregation phase after hazard curve finalization. """ oq = self.oqparam tl = self.oqparam.truncation_level sitecol = self.sitecol mag_bin_width = self.oqparam.mag_bin_width eps_edges = numpy.linspace(-tl, tl, self.oqparam.num_epsilon_bins + 1) logging.info('%d epsilon bins from %s to %s', len(eps_edges) - 1, min(eps_edges), max(eps_edges)) self.bin_edges = {} curves_dict = {sid: self.get_curves(sid) for sid in sitecol.sids} all_args = [] num_trts = sum(len(sm.trt_models) for sm in self.csm.source_models) nblocks = math.ceil(oq.concurrent_tasks / num_trts) for smodel in self.csm.source_models: sm_id = smodel.ordinal trt_names = tuple(mod.trt for mod in smodel.trt_models) max_mag = max(mod.max_mag for mod in smodel.trt_models) min_mag = min(mod.min_mag for mod in smodel.trt_models) mag_edges = mag_bin_width * numpy.arange( int(numpy.floor(min_mag / mag_bin_width)), int(numpy.ceil(max_mag / mag_bin_width) + 1)) logging.info('%d mag bins from %s to %s', len(mag_edges) - 1, min_mag, max_mag) for trt_model in smodel.trt_models: for site in sitecol: curves = curves_dict[site.id] if not curves: continue # skip zero-valued hazard curves bb = curves_by_trt_gsim.bb_dict[sm_id, site.id] if not bb: logging.info( 'location %s was too far, skipping disaggregation', site.location) continue dist_edges, lon_edges, lat_edges = bb.bins_edges( oq.distance_bin_width, oq.coordinate_bin_width) logging.info( '%d dist bins from %s to %s', len(dist_edges) - 1, min(dist_edges), max(dist_edges)) logging.info( '%d lon bins from %s to %s', len(lon_edges) - 1, bb.west, bb.east) logging.info( '%d lat bins from %s to %s', len(lon_edges) - 1, bb.south, bb.north) self.bin_edges[sm_id, site.id] = ( mag_edges, dist_edges, lon_edges, lat_edges, eps_edges) bin_edges = {} for site in sitecol: if (sm_id, site.id) in self.bin_edges: bin_edges[site.id] = self.bin_edges[sm_id, site.id] for srcs in split_in_blocks(trt_model, nblocks): all_args.append( (sitecol, srcs, trt_model.id, self.rlzs_assoc, trt_names, curves_dict, bin_edges, oq, self.monitor)) results = parallel.starmap(compute_disagg, all_args).reduce( self.agg_result) self.save_disagg_results(results)
def full_disaggregation(self, curves_by_trt_gsim): """ Run the disaggregation phase after hazard curve finalization. """ oq = self.oqparam tl = self.oqparam.truncation_level sitecol = self.sitecol mag_bin_width = self.oqparam.mag_bin_width eps_edges = numpy.linspace(-tl, tl, self.oqparam.num_epsilon_bins + 1) logging.info('%d epsilon bins from %s to %s', len(eps_edges) - 1, min(eps_edges), max(eps_edges)) self.bin_edges = {} curves_dict = {sid: self.get_curves(sid) for sid in sitecol.sids} all_args = [] num_trts = sum(len(sm.trt_models) for sm in self.csm.source_models) nblocks = math.ceil(oq.concurrent_tasks / num_trts) for smodel in self.csm.source_models: sm_id = smodel.ordinal trt_names = tuple(mod.trt for mod in smodel.trt_models) max_mag = max(mod.max_mag for mod in smodel.trt_models) min_mag = min(mod.min_mag for mod in smodel.trt_models) mag_edges = mag_bin_width * numpy.arange( int(numpy.floor(min_mag / mag_bin_width)), int(numpy.ceil(max_mag / mag_bin_width) + 1)) logging.info('%d mag bins from %s to %s', len(mag_edges) - 1, min_mag, max_mag) for trt_model in smodel.trt_models: for site in sitecol: curves = curves_dict[site.id] if not curves: continue # skip zero-valued hazard curves bb = curves_by_trt_gsim.bb_dict[sm_id, site.id] if not bb: logging.info( 'location %s was too far, skipping disaggregation', site.location) continue dist_edges, lon_edges, lat_edges = bb.bins_edges( oq.distance_bin_width, oq.coordinate_bin_width) logging.info('%d dist bins from %s to %s', len(dist_edges) - 1, min(dist_edges), max(dist_edges)) logging.info('%d lon bins from %s to %s', len(lon_edges) - 1, bb.west, bb.east) logging.info('%d lat bins from %s to %s', len(lon_edges) - 1, bb.south, bb.north) self.bin_edges[sm_id, site.id] = (mag_edges, dist_edges, lon_edges, lat_edges, eps_edges) bin_edges = {} for site in sitecol: if (sm_id, site.id) in self.bin_edges: bin_edges[site.id] = self.bin_edges[sm_id, site.id] for srcs in split_in_blocks(trt_model, nblocks): all_args.append( (sitecol, srcs, trt_model.id, self.rlzs_assoc, trt_names, curves_dict, bin_edges, oq, self.monitor)) results = parallel.starmap(compute_disagg, all_args).reduce(self.agg_result) self.save_disagg_results(results)