def _gen_riskinputs_gmf(self, dstore): if 'gmf_data' not in dstore: # needed for case_shakemap dstore.close() dstore = self.datastore if 'gmf_data' not in dstore: raise InvalidFile('Did you forget gmfs_csv in %s?' % self.oqparam.inputs['job_ini']) with self.monitor('reading GMFs'): rlzs = dstore['events']['rlz_id'] gmf_df = dstore.read_df('gmf_data', 'sid') by_sid = dict(list(gmf_df.groupby(gmf_df.index))) logging.info('Grouped the GMFs by site ID') for sid, assets in enumerate(self.assetcol.assets_by_site()): if len(assets) == 0: continue try: df = by_sid[sid] except KeyError: getter = getters.ZeroGetter(sid, rlzs, self.R) else: df['rlzs'] = rlzs[df.eid.to_numpy()] getter = getters.GmfDataGetter(sid, df, len(rlzs), self.R) if len(dstore['gmf_data/gmv_0']) == 0: raise RuntimeError( 'There are no GMFs available: perhaps you did set ' 'ground_motion_fields=False or a large minimum_intensity') for block in general.block_splitter( assets, self.oqparam.assets_per_site_limit): yield riskinput.RiskInput(sid, getter, numpy.array(block)) if len(block) >= TWO16: logging.error('There are %d assets on site #%d!', len(block), sid)
def compute_losses(ssm, src_filter, param, riskmodel, monitor): """ Compute the losses for a single source model. Returns the ruptures as an attribute `.ruptures_by_grp` of the list of losses. :param ssm: CompositeSourceModel containing a single source model :param sitecol: a SiteCollection instance :param param: a dictionary of extra parameters :param riskmodel: a RiskModel instance :param monitor: a Monitor instance :returns: a List containing the losses by taxonomy and some attributes """ [grp] = ssm.src_groups res = List() rlzs_assoc = ssm.info.get_rlzs_assoc() rlzs_by_gsim = rlzs_assoc.get_rlzs_by_gsim(DEFAULT_TRT) hazard = compute_hazard(grp, src_filter, rlzs_by_gsim, param, monitor) [(grp_id, ebruptures)] = hazard['ruptures'].items() samples = ssm.info.get_samples_by_grp() num_rlzs = len(rlzs_assoc.realizations) rlzs_by_gsim = rlzs_assoc.get_rlzs_by_gsim(DEFAULT_TRT) getter = getters.GmfGetter(rlzs_by_gsim, ebruptures, src_filter.sitecol, param['oqparam'], param['min_iml'], samples[grp_id]) ri = riskinput.RiskInput(getter, param['assetcol'].assets_by_site()) res.append(ucerf_risk(ri, riskmodel, param, monitor)) res.sm_id = ssm.sm_id res.num_events = len(ri.hazard_getter.eids) start = res.sm_id * num_rlzs res.rlz_slice = slice(start, start + num_rlzs) res.events_by_grp = hazard.events_by_grp res.eff_ruptures = hazard.eff_ruptures return res
def _gen_riskinputs(self, kind, eps, num_events): assets_by_site = self.assetcol.assets_by_site() dstore = self.can_read_parent() or self.datastore for sid, assets in enumerate(assets_by_site): if len(assets) == 0: continue # build the riskinputs if kind == 'poe': # hcurves, shape (R, N) getter = PmapGetter(dstore, self.rlzs_assoc, [sid]) getter.num_rlzs = self.R else: # gmf getter = GmfDataGetter(dstore, [sid], self.R, self.oqparam.imtls) if dstore is self.datastore: # read the hazard data in the controller node getter.init() else: # the datastore must be closed to avoid the HDF5 fork bug assert dstore.hdf5 == (), '%s is not closed!' % dstore for block in general.block_splitter(assets, 1000): # dictionary of epsilons for the reduced assets reduced_eps = { ass.ordinal: eps[ass.ordinal] for ass in block if eps is not None and len(eps) } yield riskinput.RiskInput(getter, [block], reduced_eps)
def _gen_riskinputs(self, kind, eps, num_events): rinfo_dt = numpy.dtype([('sid', U16), ('num_assets', U16)]) rinfo = [] assets_by_site = self.assetcol.assets_by_site() dstore = self.can_read_parent() or self.datastore for sid, assets in enumerate(assets_by_site): if len(assets) == 0: continue # build the riskinputs if kind == 'poe': # hcurves, shape (R, N) getter = getters.PmapGetter(dstore, self.rlzs_assoc, [sid]) getter.num_rlzs = self.R else: # gmf getter = getters.GmfDataGetter(dstore, [sid], self.R) if dstore is self.datastore: # read the hazard data in the controller node getter.init() else: # the datastore must be closed to avoid the HDF5 fork bug assert dstore.hdf5 == (), '%s is not closed!' % dstore for block in general.block_splitter( assets, self.oqparam.assets_per_site_limit): # dictionary of epsilons for the reduced assets reduced_eps = {ass.ordinal: eps[ass.ordinal] for ass in block if eps is not None and len(eps)} yield riskinput.RiskInput(getter, [block], reduced_eps) rinfo.append((sid, len(block))) if len(block) >= TWO16: logging.error('There are %d assets on site #%d!', len(block), sid) self.datastore['riskinput_info'] = numpy.array(rinfo, rinfo_dt)
def build_riskinputs(self, kind, eps=None, num_events=0): """ :param kind: kind of hazard getter, can be 'poe' or 'gmf' :param eps: a matrix of epsilons (or None) :param num_events: how many events there are :returns: a list of RiskInputs objects, sorted by IMT. """ logging.info('There are %d realizations', self.R) imtls = self.oqparam.imtls if not set(self.oqparam.risk_imtls) & set(imtls): rsk = ', '.join(self.oqparam.risk_imtls) haz = ', '.join(imtls) raise ValueError('The IMTs in the risk models (%s) are disjoint ' "from the IMTs in the hazard (%s)" % (rsk, haz)) num_tasks = self.oqparam.concurrent_tasks or 1 if not hasattr(self, 'assetcol'): self.assetcol = self.datastore['assetcol'] self.riskmodel.taxonomy = self.assetcol.tagcol.taxonomy assets_by_site = self.assetcol.assets_by_site() with self.monitor('building riskinputs', autoflush=True): riskinputs = [] sid_weight_pairs = [(sid, len(assets)) for sid, assets in enumerate(assets_by_site)] blocks = general.split_in_blocks(sid_weight_pairs, num_tasks, weight=operator.itemgetter(1)) dstore = self.can_read_parent() or self.datastore for block in blocks: sids = numpy.array([sid for sid, _weight in block]) reduced_assets = assets_by_site[sids] # dictionary of epsilons for the reduced assets reduced_eps = {} for assets in reduced_assets: for ass in assets: if eps is not None and len(eps): reduced_eps[ass.ordinal] = eps[ass.ordinal] # build the riskinputs if kind == 'poe': # hcurves, shape (R, N) getter = PmapGetter(dstore, sids, self.rlzs_assoc) getter.num_rlzs = self.R else: # gmf getter = GmfDataGetter(dstore, sids, self.R, num_events) if dstore is self.datastore: # read the hazard data in the controller node logging.info('Reading hazard') getter.init() else: # the datastore must be closed to avoid the HDF5 fork bug assert dstore.hdf5 == (), '%s is not closed!' % dstore ri = riskinput.RiskInput(getter, reduced_assets, reduced_eps) if ri.weight > 0: riskinputs.append(ri) assert riskinputs logging.info('Built %d risk inputs', len(riskinputs)) return riskinputs
def build_riskinputs(self, kind, eps=None, eids=None): """ :param kind: kind of hazard getter, can be 'poe' or 'gmf' :param eps: a matrix of epsilons (or None) :param eids: an array of event IDs (or None) :returns: a list of RiskInputs objects, sorted by IMT. """ imtls = self.oqparam.imtls if not set(self.oqparam.risk_imtls) & set(imtls): rsk = ', '.join(self.oqparam.risk_imtls) haz = ', '.join(imtls) raise ValueError('The IMTs in the risk models (%s) are disjoint ' "from the IMTs in the hazard (%s)" % (rsk, haz)) num_tasks = self.oqparam.concurrent_tasks or 1 assets_by_site = self.assetcol.assets_by_site() self.tagmask = self.assetcol.tagmask() with self.monitor('building riskinputs', autoflush=True): riskinputs = [] sid_weight_pairs = [(sid, len(assets)) for sid, assets in enumerate(assets_by_site)] blocks = general.split_in_blocks(sid_weight_pairs, num_tasks, weight=operator.itemgetter(1)) dstore = self.can_read_parent() for block in blocks: sids = numpy.array([sid for sid, _weight in block]) reduced_assets = assets_by_site[sids] # dictionary of epsilons for the reduced assets reduced_eps = {} for assets in reduced_assets: for ass in assets: ass.tagmask = self.tagmask[ass.ordinal] if eps is not None and len(eps): reduced_eps[ass.ordinal] = eps[ass.ordinal] # build the riskinputs if dstore is None: dstore = self.datastore if kind == 'poe': # hcurves, shape (R, N) getter = calc.PmapGetter(dstore, sids) getter.num_rlzs = self.R else: # gmf getter = riskinput.GmfDataGetter(dstore, sids, self.R, eids) if dstore is self.datastore: # read the hazard data in the controller node logging.info('Reading hazard') getter.init() ri = riskinput.RiskInput(getter, reduced_assets, reduced_eps) if ri.weight > 0: riskinputs.append(ri) assert riskinputs logging.info('Built %d risk inputs', len(riskinputs)) return riskinputs
def start_tasks(self, sm_id, ruptures_by_grp, sitecol, assetcol, riskmodel, imtls, trunc_level, correl_model, min_iml, monitor): """ :param sm_id: source model ordinal :param ruptures_by_grp: dictionary of ruptures by src_group_id :param sitecol: a SiteCollection instance :param assetcol: an AssetCollection instance :param riskmodel: a RiskModel instance :param imtls: Intensity Measure Types and Levels :param trunc_level: truncation level :param correl_model: correlation model :param min_iml: vector of minimum intensities, one per IMT :param monitor: a Monitor instance :returns: an IterResult instance """ csm_info = self.csm_info.get_info(sm_id) grp_ids = sorted(csm_info.get_sm_by_grp()) rlzs_assoc = csm_info.get_rlzs_assoc() # prepare the risk inputs allargs = [] ruptures_per_block = self.oqparam.ruptures_per_block try: csm_info = self.csm.info except AttributeError: # there is no .csm if --hc was given csm_info = self.datastore['csm_info'] samples_by_grp = csm_info.get_samples_by_grp() num_events = 0 for grp_id in grp_ids: rlzs_by_gsim = rlzs_assoc.get_rlzs_by_gsim(grp_id) samples = samples_by_grp[grp_id] for rupts in block_splitter(ruptures_by_grp.get(grp_id, []), ruptures_per_block): n_events = sum(ebr.multiplicity for ebr in rupts) eps = self.get_eps(self.start, self.start + n_events) num_events += n_events self.start += n_events getter = riskinput.GmfGetter(rlzs_by_gsim, rupts, sitecol, imtls, min_iml, self.oqparam.maximum_distance, trunc_level, correl_model, samples) ri = riskinput.RiskInput(getter, self.assets_by_site, eps) allargs.append((ri, riskmodel, assetcol, monitor)) self.vals = self.assetcol.values() taskname = '%s#%d' % (event_based_risk.__name__, sm_id + 1) ires = parallel.Starmap(event_based_risk, allargs, name=taskname).submit_all() ires.num_ruptures = { sg_id: len(rupts) for sg_id, rupts in ruptures_by_grp.items() } ires.num_events = num_events ires.num_rlzs = len(rlzs_assoc.realizations) ires.sm_id = sm_id return ires
def _gen_riskinputs_poe(self, dstore): assets_by_site = self.assetcol.assets_by_site() for sid, assets in enumerate(assets_by_site): if len(assets) == 0: continue # hcurves, shape (R, N) ws = [rlz.weight for rlz in self.realizations] getter = getters.PmapGetter(dstore, ws, [sid], self.oqparam.imtls) for block in general.block_splitter( assets, self.oqparam.assets_per_site_limit): yield riskinput.RiskInput(sid, getter, numpy.array(block)) if len(block) >= TWO16: logging.error('There are %d assets on site #%d!', len(block), sid)
def build_riskinputs(self, kind, hazards, eps=numpy.zeros(0), eids=None): """ :param kind: kind of hazard getter, can be 'poe' or 'gmf' :param hazards: a (composite) array of shape (R, N, ...) :param eps: a matrix of epsilons (possibly empty) :param eids: an array of event IDs (or None) :returns: a list of RiskInputs objects, sorted by IMT. """ self.check_poes(hazards) imtls = self.oqparam.imtls if not set(self.oqparam.risk_imtls) & set(imtls): rsk = ', '.join(self.oqparam.risk_imtls) haz = ', '.join(imtls) raise ValueError('The IMTs in the risk models (%s) are disjoint ' "from the IMTs in the hazard (%s)" % (rsk, haz)) num_tasks = self.oqparam.concurrent_tasks or 1 assets_by_site = self.assetcol.assets_by_site() self.tagmask = self.assetcol.tagmask() with self.monitor('building riskinputs', autoflush=True): riskinputs = [] sid_weight_pairs = [(i, len(assets)) for i, assets in enumerate(assets_by_site)] blocks = general.split_in_blocks(sid_weight_pairs, num_tasks, weight=operator.itemgetter(1)) for block in blocks: sids = numpy.array([sid for sid, _weight in block]) reduced_assets = assets_by_site[sids] # dictionary of epsilons for the reduced assets reduced_eps = collections.defaultdict(F32) if len(eps): for assets in reduced_assets: for asset in assets: reduced_eps[asset.ordinal] = eps[asset.ordinal] reduced_mask = self.tagmask[_get_aids(reduced_assets)] # build the riskinputs ri = riskinput.RiskInput( riskinput.HazardGetter(kind, hazards[:, sids], imtls, eids), reduced_assets, reduced_mask, reduced_eps) if ri.weight > 0: riskinputs.append(ri) assert riskinputs logging.info('Built %d risk inputs', len(riskinputs)) return riskinputs
def _gen_riskinputs(self, kind, eps, num_events): num_tasks = self.oqparam.concurrent_tasks or 1 assets_by_site = self.assetcol.assets_by_site() if kind == 'poe': indices = None else: indices = self.datastore['gmf_data/indices'].value dstore = self.can_read_parent() or self.datastore sid_weight = [] for sid, assets in enumerate(assets_by_site): if len(assets) == 0: continue elif indices is None: weight = len(assets) else: idx = indices[sid] if indices.dtype.names: # engine < 3.2 num_gmfs = sum(stop - start for start, stop in idx) else: # engine >= 3.2 num_gmfs = (idx[1] - idx[0]).sum() weight = len(assets) * (num_gmfs or 1) sid_weight.append((sid, weight)) for block in general.split_in_blocks(sid_weight, num_tasks, weight=operator.itemgetter(1)): sids = numpy.array([sid for sid, _weight in block]) reduced_assets = assets_by_site[sids] # dictionary of epsilons for the reduced assets reduced_eps = {} for assets in reduced_assets: for ass in assets: if eps is not None and len(eps): reduced_eps[ass.ordinal] = eps[ass.ordinal] # build the riskinputs if kind == 'poe': # hcurves, shape (R, N) getter = PmapGetter(dstore, self.rlzs_assoc, sids) getter.num_rlzs = self.R else: # gmf getter = GmfDataGetter(dstore, sids, self.R, num_events, self.oqparam.imtls) if dstore is self.datastore: # read the hazard data in the controller node getter.init() else: # the datastore must be closed to avoid the HDF5 fork bug assert dstore.hdf5 == (), '%s is not closed!' % dstore ri = riskinput.RiskInput(getter, reduced_assets, reduced_eps) ri.weight = block.weight yield ri
def build_riskinputs(self, kind, hazards_by_rlz, eps=numpy.zeros(0)): """ :param kind: kind of hazard getter, can be 'poe' or 'gmf' :param hazards_by_rlz: a dictionary rlz -> IMT -> array of length num_sites :param eps: a matrix of epsilons (possibly empty) :returns: a list of RiskInputs objects, sorted by IMT. """ self.check_poes(hazards_by_rlz) imtls = self.oqparam.imtls if not set(self.oqparam.risk_imtls) & set(imtls): rsk = ', '.join(self.oqparam.risk_imtls) haz = ', '.join(imtls) raise ValueError('The IMTs in the risk models (%s) are disjoint ' "from the IMTs in the hazard (%s)" % (rsk, haz)) num_tasks = self.oqparam.concurrent_tasks or 1 rlzs = range(len(hazards_by_rlz)) assets_by_site = self.assetcol.assets_by_site() with self.monitor('building riskinputs', autoflush=True): riskinputs = [] idx_weight_pairs = [(i, len(assets)) for i, assets in enumerate(assets_by_site)] blocks = general.split_in_blocks(idx_weight_pairs, num_tasks, weight=operator.itemgetter(1)) for block in blocks: indices = numpy.array([idx for idx, _weight in block]) reduced_assets = assets_by_site[indices] # dictionary of epsilons for the reduced assets reduced_eps = collections.defaultdict(F32) if len(eps): for assets in reduced_assets: for asset in assets: reduced_eps[asset.ordinal] = eps[asset.ordinal] # build the riskinputs ri = riskinput.RiskInput( riskinput.HazardGetter(kind, 0, {None: rlzs}, hazards_by_rlz, indices, list(imtls)), reduced_assets, reduced_eps) if ri.weight > 0: riskinputs.append(ri) assert riskinputs logging.info('Built %d risk inputs', len(riskinputs)) return riskinputs
def _gen_riskinputs(self, kind): rinfo_dt = numpy.dtype([('sid', U16), ('num_assets', U16)]) rinfo = [] assets_by_site = self.assetcol.assets_by_site() for sid, assets in enumerate(assets_by_site): if len(assets) == 0: continue getter = self.get_getter(kind, sid) for block in general.block_splitter( assets, self.oqparam.assets_per_site_limit): yield riskinput.RiskInput(getter, numpy.array(block)) rinfo.append((sid, len(block))) if len(block) >= TWO16: logging.error('There are %d assets on site #%d!', len(block), sid) self.datastore['riskinput_info'] = numpy.array(rinfo, rinfo_dt)
def _gen_riskinputs(self, kind, eps, num_events): rinfo_dt = numpy.dtype([('sid', U16), ('num_assets', U16)]) rinfo = [] assets_by_site = self.assetcol.assets_by_site() for sid, assets in enumerate(assets_by_site): if len(assets) == 0: continue getter = self.get_getter(kind, sid) for block in general.block_splitter( assets, self.oqparam.assets_per_site_limit): # dictionary of epsilons for the reduced assets reduced_eps = {ass.ordinal: eps[ass.ordinal] for ass in block if eps is not None and len(eps)} yield riskinput.RiskInput(getter, [block], reduced_eps) rinfo.append((sid, len(block))) if len(block) >= TWO16: logging.error('There are %d assets on site #%d!', len(block), sid) self.datastore['riskinput_info'] = numpy.array(rinfo, rinfo_dt)
def compute_losses(ssm, src_filter, param, riskmodel, imts, trunc_level, correl_model, min_iml, monitor): """ Compute the losses for a single source model. Returns the ruptures as an attribute `.ruptures_by_grp` of the list of losses. :param ssm: CompositeSourceModel containing a single source model :param sitecol: a SiteCollection instance :param param: a dictionary of parameters :param riskmodel: a RiskModel instance :param imts: a list of Intensity Measure Types :param trunc_level: truncation level :param correl_model: correlation model :param min_iml: vector of minimum intensities, one per IMT :param monitor: a Monitor instance :returns: a List containing the losses by taxonomy and some attributes """ [grp] = ssm.src_groups res = List() gsims = ssm.gsim_lt.values[DEFAULT_TRT] ruptures_by_grp = compute_ruptures( grp, src_filter, gsims, param, monitor) [(grp_id, ebruptures)] = ruptures_by_grp.items() rlzs_assoc = ssm.info.get_rlzs_assoc() samples = ssm.info.get_samples_by_grp() num_rlzs = len(rlzs_assoc.realizations) rlzs_by_gsim = rlzs_assoc.get_rlzs_by_gsim(DEFAULT_TRT) getter = getters.GmfGetter( rlzs_by_gsim, ebruptures, src_filter.sitecol, imts, min_iml, src_filter.integration_distance, trunc_level, correl_model, samples[grp_id]) ri = riskinput.RiskInput(getter, param['assetcol'].assets_by_site()) res.append(event_based_risk(ri, riskmodel, param, monitor)) res.sm_id = ssm.sm_id res.num_events = len(ri.hazard_getter.eids) start = res.sm_id * num_rlzs res.rlz_slice = slice(start, start + num_rlzs) res.events_by_grp = ruptures_by_grp.events_by_grp res.eff_ruptures = ruptures_by_grp.eff_ruptures return res
def _gen_riskinputs(self, kind): hazard = ('gmf_data' in self.datastore or 'poes' in self.datastore or 'multi_peril' in self.datastore) if not hazard: raise InvalidFile('Did you forget gmfs_csv|hazard_curves_csv|' 'multi_peril_csv in %s?' % self.oqparam.inputs['job_ini']) rinfo_dt = numpy.dtype([('sid', U16), ('num_assets', U16)]) rinfo = [] assets_by_site = self.assetcol.assets_by_site() for sid, assets in enumerate(assets_by_site): if len(assets) == 0: continue getter = self.get_getter(kind, sid) for block in general.block_splitter( assets, self.oqparam.assets_per_site_limit): yield riskinput.RiskInput(sid, getter, numpy.array(block)) rinfo.append((sid, len(block))) if len(block) >= TWO16: logging.error('There are %d assets on site #%d!', len(block), sid) self.datastore['riskinput_info'] = numpy.array(rinfo, rinfo_dt)
def start_tasks(self, sm_id, sitecol, assetcol, riskmodel, imtls, trunc_level, correl_model, min_iml): """ :param sm_id: source model ordinal :param sitecol: a SiteCollection instance :param assetcol: an AssetCollection instance :param riskmodel: a RiskModel instance :param imtls: Intensity Measure Types and Levels :param trunc_level: truncation level :param correl_model: correlation model :param min_iml: vector of minimum intensities, one per IMT :returns: an IterResult instance """ sm_info = self.csm_info.get_info(sm_id) grp_ids = sorted(sm_info.get_sm_by_grp()) rlzs_assoc = sm_info.get_rlzs_assoc() # prepare the risk inputs allargs = [] ruptures_per_block = self.oqparam.ruptures_per_block try: csm_info = self.csm.info except AttributeError: # there is no .csm if --hc was given csm_info = self.datastore['csm_info'] samples_by_grp = csm_info.get_samples_by_grp() num_events = 0 num_ruptures = {} taskname = '%s#%d' % (event_based_risk.__name__, sm_id + 1) monitor = self.monitor(taskname) for grp_id in grp_ids: ruptures = self.ruptures_by_grp.get(grp_id, []) rlzs_by_gsim = rlzs_assoc.get_rlzs_by_gsim(grp_id) samples = samples_by_grp[grp_id] num_ruptures[grp_id] = len(ruptures) from_parent = hasattr(ruptures, 'split') if from_parent: # read the ruptures from the parent datastore logging.info('Reading ruptures group #%d', grp_id) with self.monitor('reading ruptures', measuremem=True): blocks = ruptures.split(ruptures_per_block) else: # the ruptures are already in memory blocks = block_splitter(ruptures, ruptures_per_block) for rupts in blocks: n_events = (rupts.n_events if from_parent else sum(ebr.multiplicity for ebr in rupts)) eps = self.get_eps(self.start, self.start + n_events) num_events += n_events self.start += n_events getter = getters.GmfGetter( rlzs_by_gsim, rupts, sitecol, imtls, min_iml, self.oqparam.maximum_distance, trunc_level, correl_model, self.oqparam.filter_distance, samples) ri = riskinput.RiskInput(getter, self.assets_by_site, eps) allargs.append((ri, riskmodel, assetcol, monitor)) if self.datastore.parent: # avoid hdf5 fork issues self.datastore.parent.close() ires = parallel.Starmap( event_based_risk, allargs, name=taskname).submit_all() ires.num_ruptures = num_ruptures ires.num_events = num_events ires.num_rlzs = len(rlzs_assoc.realizations) ires.sm_id = sm_id return ires