def __call__(self, rlz): gsim = self.gsims[rlz.ordinal] gmfdict = collections.defaultdict(dict) for computer in self.computers: rup = computer.rupture if self.samples > 1: eids = get_array(rup.events, sample=rlz.sampleid)['eid'] else: eids = rup.events['eid'] array = computer.compute(gsim, len(eids)) # (i, n, e) for imti, imt in enumerate(self.imts): min_gmv = self.min_iml[imti] for eid, gmf in zip(eids, array[imti].T): for sid, gmv in zip(computer.sites.sids, gmf): if gmv > min_gmv: dic = gmfdict[sid] if imt in dic: dic[imt].append((gmv, eid)) else: dic[imt] = [(gmv, eid)] dicts = [] # a list of dictionaries imt -> array(gmv, eid) for sid in self.sids: dic = gmfdict[sid] for imt in dic: dic[imt] = arr = numpy.array(dic[imt], self.dt) self.gmfbytes += arr.nbytes dicts.append(dic) return dicts
def _aggregate(outputs, compositemodel, agg, ass, idx, result, monitor): # update the result dictionary and the agg array with each output lrs = set() for out in outputs: l, r = out.lr lrs.add(out.lr) loss_type = compositemodel.loss_types[l] indices = numpy.array([idx[eid] for eid in out.eids]) agglr = agg[l, r] for i, asset in enumerate(out.assets): aid = asset.ordinal loss_ratios = out.loss_ratios[i] losses = loss_ratios * asset.value(loss_type) # average losses if monitor.avg_losses: result['avglosses'][l, r][aid] += ( loss_ratios.sum(axis=0) * monitor.ses_ratio) # asset losses if monitor.loss_ratios: data = [(eid, aid, loss) for eid, loss in zip(out.eids, loss_ratios) if loss.sum() > 0] if data: ass[l, r].append(numpy.array(data, monitor.ela_dt)) # agglosses agglr[indices] += losses return sorted(lrs)
def __init__(self, oqparam, rmdict, retrodict): self.damage_states = [] self._riskmodels = {} if getattr(oqparam, 'limit_states', []): # classical_damage/scenario_damage calculator if oqparam.calculation_mode in ('classical', 'scenario'): # case when the risk files are in the job_hazard.ini file oqparam.calculation_mode += '_damage' self.damage_states = ['no_damage'] + oqparam.limit_states delattr(oqparam, 'limit_states') for taxonomy, ffs_by_lt in rmdict.items(): self._riskmodels[taxonomy] = riskmodels.get_riskmodel( taxonomy, oqparam, fragility_functions=ffs_by_lt) elif oqparam.calculation_mode.endswith('_bcr'): # classical_bcr calculator for (taxonomy, vf_orig), (taxonomy_, vf_retro) in \ zip(rmdict.items(), retrodict.items()): assert taxonomy == taxonomy_ # same imt and taxonomy self._riskmodels[taxonomy] = riskmodels.get_riskmodel( taxonomy, oqparam, vulnerability_functions_orig=vf_orig, vulnerability_functions_retro=vf_retro) else: # classical, event based and scenario calculators for taxonomy, vfs in rmdict.items(): for vf in vfs.values(): # set the seed; this is important for the case of # VulnerabilityFunctionWithPMF vf.seed = oqparam.random_seed self._riskmodels[taxonomy] = riskmodels.get_riskmodel( taxonomy, oqparam, vulnerability_functions=vfs) self.init(oqparam)
def save(self, eid, imti, rlz, gmf, sids): for gmv, sid in zip(gmf, sids): key = '%s/%s/%s' % (sid, self.imts[imti], rlz.ordinal) glist, elist = self.dic[key] glist.append(gmv) elist.append(eid) self.nbytes += gmf.nbytes * 2
def generate_event_set(self, background_sids): """ Generates the event set corresponding to a particular branch """ # get rates from file with h5py.File(self.source_file, 'r') as hdf5: rates = hdf5[self.idx_set["rate_idx"]].value occurrences = self.tom.sample_number_of_occurrences(rates) indices = numpy.where(occurrences)[0] logging.debug( 'Considering "%s", %d ruptures', self.branch_id, len(indices)) # get ruptures from the indices ruptures = [] rupture_occ = [] for idx, n_occ in zip(indices, occurrences[indices]): ucerf_rup, _ = get_ucerf_rupture( hdf5, idx, self.idx_set, self.tom, self.sites, self.integration_distance, self.mesh_spacing, self.tectonic_region_type) if ucerf_rup: ruptures.append(ucerf_rup) rupture_occ.append(n_occ) # sample background sources background_ruptures, background_n_occ = sample_background_model( hdf5, self.idx_set["grid_key"], self.tom, background_sids, self.min_mag, self.npd, self.hdd, self.usd, self.lsd, self.msr, self.aspect, self.tectonic_region_type) ruptures.extend(background_ruptures) rupture_occ.extend(background_n_occ) return ruptures, rupture_occ
def build_ruptures(sources, src_filter, param, monitor): """ :param sources: a list with a single UCERF source :param param: extra parameters :param monitor: a Monitor instance :returns: an AccumDict grp_id -> EBRuptures """ [src] = sources res = AccumDict() res.calc_times = [] sampl_mon = monitor('sampling ruptures', measuremem=True) res.trt = DEFAULT_TRT background_sids = src.get_background_sids(src_filter) samples = getattr(src, 'samples', 1) n_occ = AccumDict(accum=0) t0 = time.time() with sampl_mon: for sam_idx in range(samples): for ses_idx, ses_seed in param['ses_seeds']: seed = sam_idx * TWO16 + ses_seed rups, occs = generate_event_set( src, background_sids, src_filter, ses_idx, seed) for rup, occ in zip(rups, occs): n_occ[rup] += occ tot_occ = sum(n_occ.values()) dic = {'eff_ruptures': {src.src_group_id: src.num_ruptures}} eb_ruptures = [EBRupture(rup, src.id, src.src_group_id, n, samples) for rup, n in n_occ.items()] dic['rup_array'] = stochastic.get_rup_array(eb_ruptures, src_filter) dt = time.time() - t0 dic['calc_times'] = {src.id: numpy.array([tot_occ, dt], F32)} return dic
def _collect_all_data(self): # called only if 'rcurves-rlzs' in dstore; return a list of outputs all_data = [] assets = self.datastore['asset_refs'].value[self.assetcol.array['idx']] rlzs = self.rlzs_assoc.realizations insured = self.oqparam.insured_losses if self.oqparam.avg_losses: avg_losses = self.datastore['avg_losses-rlzs'].value else: avg_losses = self.avg_losses r_curves = self.datastore['rcurves-rlzs'].value for loss_type, cbuilder in zip( self.riskmodel.loss_types, self.riskmodel.curve_builders): rcurves = r_curves[loss_type] asset_values = self.vals[loss_type] data = [] for rlz in rlzs: average_losses = avg_losses[loss_type][:, rlz.ordinal] average_insured_losses = ( avg_losses[loss_type + '_ins'][:, rlz.ordinal] if insured else None) loss_curves = _old_loss_curves( asset_values, rcurves[:, rlz.ordinal, 0], cbuilder.ratios) insured_curves = _old_loss_curves( asset_values, rcurves[:, rlz.ordinal, 1], cbuilder.ratios) if insured else None out = scientific.Output( assets, loss_type, rlz.ordinal, rlz.weight, loss_curves=loss_curves, insured_curves=insured_curves, average_losses=average_losses, average_insured_losses=average_insured_losses) data.append(out) all_data.append(data) return all_data
def calcgmfs(self, seed, events, rlzs_by_gsim, min_iml=None): """ Yield the ground motion field for each seismic event. :param seed: seed for the numpy random number generator :param events: composite array of seismic events (eid, ses, occ, samples) :param rlzs_by_gsim: a dictionary {gsim instance: realizations} :yields: tuples (eid, imti, rlz, gmf_sids) """ sids = self.sites.sids imt_range = range(len(self.imts)) for i, gsim in enumerate(self.gsims): for j, rlz in enumerate(rlzs_by_gsim[gsim]): if self.samples > 1: eids = get_array(events, sample=rlz.sampleid)['eid'] else: eids = events['eid'] arr = self.compute(seed + j, gsim, len(eids)).transpose( 0, 2, 1) # array of shape (I, E, S) for imti in imt_range: for eid, gmf in zip(eids, arr[imti]): if min_iml is not None: # is an array ok = gmf >= min_iml[imti] gmf_sids = (gmf[ok], sids[ok]) else: gmf_sids = (gmf, sids) if len(gmf): yield eid, imti, rlz, gmf_sids
def __call__(self, fname, sm, apply_uncertainties, investigation_time): """ :param fname: the full pathname of a source model file :param sm: the original source model :param apply_uncertainties: a function modifying the sources (or None) :param investigation_time: the investigation_time in the job.ini :returns: a copy of the original source model with changed sources, if any """ check_nonparametric_sources(fname, sm, investigation_time) newsm = nrml.SourceModel( [], sm.name, sm.investigation_time, sm.start_time) for group in sm: newgroup = apply_uncertainties(group) newsm.src_groups.append(newgroup) if hasattr(newgroup, 'changed') and newgroup.changed.any(): self.changes += newgroup.changed.sum() for src, changed in zip(newgroup, newgroup.changed): # redoing count_ruptures can be slow if changed: src.num_ruptures = src.count_ruptures() self.fname_hits[fname] += 1 return newsm
def __fromh5__(self, dic, attrs): # rebuild the map from sids and probs arrays array = dic['array'] sids = dic['sids'] self.shape_y = array.shape[1] self.shape_z = array.shape[2] for sid, prob in zip(sids, array): self[sid] = ProbabilityCurve(prob)
def get_mesh(oqparam): """ Extract the mesh of points to compute from the sites, the sites_csv, or the region. :param oqparam: an :class:`openquake.commonlib.oqvalidation.OqParam` instance """ global pmap, exposure, gmfs, eids if 'exposure' in oqparam.inputs and exposure is None: # read it only once exposure = get_exposure(oqparam) if oqparam.sites: return geo.Mesh.from_coords(oqparam.sites) elif 'sites' in oqparam.inputs: csv_data = open(oqparam.inputs['sites'], 'U').readlines() has_header = csv_data[0].startswith('site_id') if has_header: # strip site_id data = [] for i, line in enumerate(csv_data[1:]): row = line.replace(',', ' ').split() sid = row[0] if sid != str(i): raise InvalidFile('%s: expected site_id=%d, got %s' % ( oqparam.inputs['sites'], i, sid)) data.append(' '.join(row[1:])) elif 'gmfs' in oqparam.inputs: raise InvalidFile('Missing header in %(sites)s' % oqparam.inputs) else: data = [line.replace(',', ' ') for line in csv_data] coords = valid.coordinates(','.join(data)) start, stop = oqparam.sites_slice c = coords[start:stop] if has_header else sorted(coords[start:stop]) # TODO: sort=True below would break a lot of tests :-( return geo.Mesh.from_coords(c, sort=False) elif 'hazard_curves' in oqparam.inputs: fname = oqparam.inputs['hazard_curves'] if fname.endswith('.csv'): mesh, pmap = get_pmap_from_csv(oqparam, fname) elif fname.endswith('.xml'): mesh, pmap = get_pmap_from_nrml(oqparam, fname) else: raise NotImplementedError('Reading from %s' % fname) return mesh elif 'gmfs' in oqparam.inputs: eids, gmfs = _get_gmfs(oqparam) # sets oqparam.sites return geo.Mesh.from_coords(oqparam.sites) elif oqparam.region and oqparam.region_grid_spacing: poly = geo.Polygon.from_wkt(oqparam.region) try: mesh = poly.discretize(oqparam.region_grid_spacing) return geo.Mesh.from_coords(zip(mesh.lons, mesh.lats)) except Exception: raise ValueError( 'Could not discretize region %(region)s with grid spacing ' '%(region_grid_spacing)s' % vars(oqparam)) elif 'exposure' in oqparam.inputs: return exposure.mesh
def get_sitecol_assetcol(oqparam, haz_sitecol=None, cost_types=()): """ :param oqparam: calculation parameters :param haz_sitecol: the hazard site collection :param cost_types: the expected cost types :returns: (site collection, asset collection, discarded) """ global exposure asset_hazard_distance = oqparam.asset_hazard_distance['default'] if exposure is None: # haz_sitecol not extracted from the exposure exposure = get_exposure(oqparam) if haz_sitecol is None: haz_sitecol = get_site_collection(oqparam) if oqparam.region_grid_spacing: haz_distance = oqparam.region_grid_spacing * 1.414 if haz_distance != asset_hazard_distance: logging.debug('Using asset_hazard_distance=%d km instead of %d km', haz_distance, asset_hazard_distance) else: haz_distance = asset_hazard_distance if haz_sitecol.mesh != exposure.mesh: # associate the assets to the hazard sites sitecol, assets_by, discarded = geo.utils.assoc( exposure.assets_by_site, haz_sitecol, haz_distance, 'filter') assets_by_site = [[] for _ in sitecol.complete.sids] num_assets = 0 for sid, assets in zip(sitecol.sids, assets_by): assets_by_site[sid] = assets num_assets += len(assets) logging.info('Associated {:_d} assets to {:_d} sites'.format( num_assets, len(sitecol))) else: # asset sites and hazard sites are the same sitecol = haz_sitecol assets_by_site = exposure.assets_by_site discarded = [] logging.info('Read %d sites and %d assets from the exposure', len(sitecol), sum(len(a) for a in assets_by_site)) assetcol = asset.AssetCollection(exposure, assets_by_site, oqparam.time_event, oqparam.aggregate_by) if assetcol.occupancy_periods: missing = set(cost_types) - set(exposure.cost_types['name']) - set( ['occupants']) else: missing = set(cost_types) - set(exposure.cost_types['name']) if missing and not oqparam.calculation_mode.endswith('damage'): raise InvalidFile('The exposure %s is missing %s' % (oqparam.inputs['exposure'], missing)) if (not oqparam.hazard_calculation_id and 'gmfs' not in oqparam.inputs and 'hazard_curves' not in oqparam.inputs and sitecol is not sitecol.complete): # for predefined hazard you cannot reduce the site collection; instead # you can in other cases, typically with a grid which is mostly empty # (i.e. there are many hazard sites with no assets) assetcol.reduce_also(sitecol) return sitecol, assetcol, discarded
def build_loss_tables(dstore): """ Compute the total losses by rupture and losses by rlzi. """ oq = dstore['oqparam'] L = len(oq.loss_dt().names) R = dstore['csm_info'].get_num_rlzs() events = dstore['events'] serials = dstore['ruptures']['serial'] rup_by_eid = dict(zip(events['eid'], events['rup_id'])) idx_by_ser = dict(zip(serials, range(len(serials)))) tbl = numpy.zeros((len(serials), L), F32) lbr = numpy.zeros((R, L), F32) # losses by rlz for rec in dstore['losses_by_event'].value: # call .value for speed rupid = rup_by_eid[rec['eid']] tbl[idx_by_ser[rupid]] += rec['loss'] lbr[rec['rlzi']] += rec['loss'] return tbl, lbr
def compute_ruptures(sources, src_filter, gsims, param, monitor): """ :param sources: a list with a single UCERF source :param src_filter: a SourceFilter instance :param gsims: a list of GSIMs :param param: extra parameters :param monitor: a Monitor instance :returns: an AccumDict grp_id -> EBRuptures """ [src] = sources res = AccumDict() res.calc_times = AccumDict() serial = 1 sampl_mon = monitor('sampling ruptures', measuremem=True) filt_mon = monitor('filtering ruptures', measuremem=False) res.trt = DEFAULT_TRT t0 = time.time() ebruptures = [] background_sids = src.get_background_sids(src_filter) sitecol = src_filter.sitecol idist = src_filter.integration_distance for sample in range(param['samples']): for ses_idx, ses_seed in param['ses_seeds']: seed = sample * event_based.TWO16 + ses_seed with sampl_mon: rups, n_occs = src.generate_event_set(background_sids, src_filter, seed) with filt_mon: for rup, n_occ in zip(rups, n_occs): rup.seed = seed try: r_sites, rrup = idist.get_closest(sitecol, rup) except FarAwayRupture: continue indices = (numpy.arange(len(r_sites)) if r_sites.indices is None else r_sites.indices) events = [] for _ in range(n_occ): events.append((0, src.src_group_id, ses_idx, sample)) if events: evs = numpy.array(events, calc.event_dt) ebruptures.append(EBRupture(rup, indices, evs, serial)) serial += 1 res.num_events = event_based.set_eids(ebruptures) res[src.src_group_id] = ebruptures res.calc_times[src.src_group_id] = { src.source_id: numpy.array([src.weight, len(sitecol), time.time() - t0, 1]) } if not param['save_ruptures']: res.events_by_grp = { grp_id: event_based.get_events(res[grp_id]) for grp_id in res } res.eff_ruptures = {src.src_group_id: src.num_ruptures} return res
def _save_maps(self, dic, aids): for key in ('loss_maps-rlzs', 'loss_maps-stats'): array = dic.get(key) # shape (A, S) if array is not None: loss_maps = numpy.zeros(array.shape[:2], self.loss_maps_dt) for lti, lt in enumerate(self.loss_maps_dt.names): loss_maps[lt] = array[:, :, :, lti] for aid, arr in zip(aids, loss_maps): self.datastore[key][aid] = arr
def poe_map(src, s_sites, imtls, cmaker, trunclevel, bbs, ctx_mon, pne_mon, disagg_mon): """ Compute the ProbabilityMap generated by the given source. Also, store some information in the monitors and optionally in the bounding boxes. """ pmap = ProbabilityMap.build(len(imtls.array), len(cmaker.gsims), s_sites.sids, initvalue=1.) try: for rup in src.iter_ruptures(): with ctx_mon: # compute distances try: sctx, rctx, dctx = cmaker.make_contexts(s_sites, rup) except FarAwayRupture: continue with pne_mon: # compute probabilities and updates the pmap pnes = get_probability_no_exceedance(rup, sctx, rctx, dctx, imtls, cmaker.gsims, trunclevel) for sid, pne in zip(sctx.sites.sids, pnes): pmap[sid].array *= pne # add optional disaggregation information (bounding boxes) if bbs: with disagg_mon: sids = set(sctx.sites.sids) jb_dists = dctx.rjb closest_points = rup.surface.get_closest_points( sctx.sites.mesh) bs = [bb for bb in bbs if bb.site_id in sids] # NB: the assert below is always true; we are # protecting against possible refactoring errors assert len(bs) == len(jb_dists) == len(closest_points) for bb, dist, p in zip(bs, jb_dists, closest_points): bb.update([dist], [p.longitude], [p.latitude]) except Exception as err: etype, err, tb = sys.exc_info() msg = 'An error occurred with source id=%s. Error: %s' msg %= (src.source_id, str(err)) raise_(etype, msg, tb) return ~pmap
def ucerf_risk(riskinput, riskmodel, param, monitor): """ :param riskinput: a :class:`openquake.risklib.riskinput.RiskInput` object :param riskmodel: a :class:`openquake.risklib.riskinput.CompositeRiskModel` instance :param param: a dictionary of parameters :param monitor: :class:`openquake.baselib.performance.Monitor` instance :returns: a dictionary of numpy arrays of shape (L, R) """ with monitor('getting hazard'): riskinput.hazard_getter.init() hazard = riskinput.hazard_getter.get_hazard() eids = riskinput.hazard_getter.eids A = len(riskinput.aids) E = len(eids) assert not param['insured_losses'] L = len(riskmodel.lti) R = riskinput.hazard_getter.num_rlzs param['lrs_dt'] = numpy.dtype([('rlzi', U16), ('ratios', (F32, L))]) agg = numpy.zeros((E, R, L), F32) avg = numpy.zeros((A, R, L), F32) result = dict(aids=riskinput.aids, avglosses=avg) # update the result dictionary and the agg array with each output for out in riskmodel.gen_outputs(riskinput, monitor, hazard): if len(out.eids) == 0: # this happens for sites with no events continue r = out.rlzi idx = riskinput.hazard_getter.eid2idx for l, loss_ratios in enumerate(out): if loss_ratios is None: # for GMFs below the minimum_intensity continue loss_type = riskmodel.loss_types[l] indices = numpy.array([idx[eid] for eid in out.eids]) for a, asset in enumerate(out.assets): ratios = loss_ratios[a] # shape (E, 1) aid = asset.ordinal losses = ratios * asset.value(loss_type) # average losses if param['avg_losses']: avg[aid, :, :] = losses.sum(axis=0) * param['ses_ratio'] # this is the critical loop: it is important to keep it # vectorized in terms of the event indices agg[indices, r, l] += losses[:, 0] # 0 == no insured it = ((eid, r, losses) for eid, all_losses in zip(eids, agg) for r, losses in enumerate(all_losses) if losses.sum()) result['agglosses'] = numpy.fromiter(it, param['elt_dt']) # store info about the GMFs, must be done at the end result['gmdata'] = riskinput.gmdata return result
def get_sitecol_assetcol(oqparam, haz_sitecol): """ :param oqparam: calculation parameters :param haz_sitecol: the hazard site collection :returns: (site collection, asset collection) instances """ global exposure if exposure is None: # haz_sitecol not extracted from the exposure exposure = get_exposure(oqparam) if oqparam.region_grid_spacing and not oqparam.region: # extract the hazard grid from the exposure exposure.mesh = exposure.mesh.get_convex_hull().dilate( oqparam.region_grid_spacing).discretize( oqparam.region_grid_spacing) haz_sitecol = get_site_collection(oqparam) haz_distance = oqparam.region_grid_spacing if haz_distance != oqparam.asset_hazard_distance: logging.info('Using asset_hazard_distance=%d km instead of %d km', haz_distance, oqparam.asset_hazard_distance) else: haz_distance = oqparam.asset_hazard_distance if haz_sitecol.mesh != exposure.mesh: # associate the assets to the hazard sites tot_assets = sum(len(assets) for assets in exposure.assets_by_site) mode = 'strict' if oqparam.region_grid_spacing else 'filter' sitecol, assets_by = geo.utils.assoc(exposure.assets_by_site, haz_sitecol, haz_distance, mode) assets_by_site = [[] for _ in sitecol.complete.sids] num_assets = 0 for sid, assets in zip(sitecol.sids, assets_by): assets_by_site[sid] = assets num_assets += len(assets) logging.info('Associated %d assets to %d sites', num_assets, len(sitecol)) if num_assets < tot_assets: logging.warn( 'Discarded %d assets outside the ' 'asset_hazard_distance of %d km', tot_assets - num_assets, haz_distance) else: # asset sites and hazard sites are the same sitecol = haz_sitecol assets_by_site = exposure.assets_by_site asset_refs = [ exposure.asset_refs[asset.ordinal] for assets in assets_by_site for asset in assets ] assetcol = asset.AssetCollection(asset_refs, assets_by_site, exposure.tagcol, exposure.cost_calculator, oqparam.time_event, exposure.occupancy_periods) return sitecol, assetcol
def compute_hazard(sources, src_filter, rlzs_by_gsim, param, monitor): """ :param sources: a list with a single UCERF source :param src_filter: a SourceFilter instance :param rlzs_by_gsim: a dictionary gsim -> rlzs :param param: extra parameters :param monitor: a Monitor instance :returns: an AccumDict grp_id -> EBRuptures """ [src] = sources res = AccumDict() res.calc_times = [] serial = 1 sampl_mon = monitor('sampling ruptures', measuremem=True) filt_mon = monitor('filtering ruptures', measuremem=False) res.trt = DEFAULT_TRT ebruptures = [] background_sids = src.get_background_sids(src_filter) sitecol = src_filter.sitecol cmaker = ContextMaker(rlzs_by_gsim, src_filter.integration_distance) for sample in range(param['samples']): for ses_idx, ses_seed in param['ses_seeds']: seed = sample * TWO16 + ses_seed with sampl_mon: rups, n_occs = generate_event_set(src, background_sids, src_filter, seed) with filt_mon: for rup, n_occ in zip(rups, n_occs): rup.serial = serial try: rup.sctx, rup.dctx = cmaker.make_contexts(sitecol, rup) indices = rup.sctx.sids except FarAwayRupture: continue events = [] for _ in range(n_occ): events.append((0, src.src_group_id, ses_idx, sample)) if events: evs = numpy.array(events, stochastic.event_dt) ebruptures.append(EBRupture(rup, src.id, indices, evs)) serial += 1 res.num_events = len(stochastic.set_eids(ebruptures)) res['ruptures'] = {src.src_group_id: ebruptures} if param['save_ruptures']: res.ruptures_by_grp = {src.src_group_id: ebruptures} else: res.events_by_grp = { src.src_group_id: event_based.get_events(ebruptures) } res.eff_ruptures = {src.src_group_id: src.num_ruptures} if param.get('gmf'): getter = getters.GmfGetter(rlzs_by_gsim, ebruptures, sitecol, param['oqparam'], param['min_iml'], param['samples']) res.update(getter.compute_gmfs_curves(monitor)) return res
def get_sitecol_assetcol(oqparam, haz_sitecol=None, cost_types=()): """ :param oqparam: calculation parameters :param haz_sitecol: the hazard site collection :param cost_types: the expected cost types :returns: (site collection, asset collection, discarded) """ global exposure asset_hazard_distance = oqparam.asset_hazard_distance['default'] if exposure is None: # haz_sitecol not extracted from the exposure exposure = get_exposure(oqparam) if haz_sitecol is None: haz_sitecol = get_site_collection(oqparam) if oqparam.region_grid_spacing: haz_distance = oqparam.region_grid_spacing * 1.414 if haz_distance != asset_hazard_distance: logging.info('Using asset_hazard_distance=%d km instead of %d km', haz_distance, asset_hazard_distance) else: haz_distance = asset_hazard_distance if haz_sitecol.mesh != exposure.mesh: # associate the assets to the hazard sites sitecol, assets_by, discarded = geo.utils.assoc( exposure.assets_by_site, haz_sitecol, haz_distance, 'filter', exposure.asset_refs) assets_by_site = [[] for _ in sitecol.complete.sids] num_assets = 0 for sid, assets in zip(sitecol.sids, assets_by): assets_by_site[sid] = assets num_assets += len(assets) logging.info( 'Associated %d assets to %d sites', num_assets, len(sitecol)) else: # asset sites and hazard sites are the same sitecol = haz_sitecol assets_by_site = exposure.assets_by_site discarded = [] logging.info('Read %d sites and %d assets from the exposure', len(sitecol), sum(len(a) for a in assets_by_site)) assetcol = asset.AssetCollection( exposure, assets_by_site, oqparam.time_event, oqparam.loss_dt().names) if assetcol.occupancy_periods: missing = set(cost_types) - set(exposure.cost_types['name']) - set( ['occupants']) else: missing = set(cost_types) - set(exposure.cost_types['name']) if missing and not oqparam.calculation_mode.endswith('damage'): raise InvalidFile('The exposure %s is missing %s' % (oqparam.inputs['exposure'], missing)) if (not oqparam.hazard_calculation_id and 'gmfs' not in oqparam.inputs and 'hazard_curves' not in oqparam.inputs and sitecol is not sitecol.complete): assetcol.reduce_also(sitecol) return sitecol, assetcol, discarded
def classical(group, src_filter, gsims, param, monitor=Monitor()): """ Compute the hazard curves for a set of sources belonging to the same tectonic region type for all the GSIMs associated to that TRT. The arguments are the same as in :func:`calc_hazard_curves`, except for ``gsims``, which is a list of GSIM instances. :returns: a dictionary {grp_id: pmap} with attributes .grp_ids, .calc_times, .eff_ruptures """ if getattr(group, 'src_interdep', None) == 'mutex': mutex_weight = { src.source_id: weight for src, weight in zip(group.sources, group.srcs_weights) } else: mutex_weight = None grp_ids = set() for src in group: grp_ids.update(src.src_group_ids) maxdist = src_filter.integration_distance imtls = param['imtls'] trunclevel = param.get('truncation_level') cmaker = ContextMaker(gsims, maxdist, param['filter_distance'], monitor) pmap = AccumDict({ grp_id: ProbabilityMap(len(imtls.array), len(gsims)) for grp_id in grp_ids }) # AccumDict of arrays with 4 elements weight, nsites, calc_time, split pmap.calc_times = AccumDict(accum=numpy.zeros(4)) pmap.eff_ruptures = AccumDict() # grp_id -> num_ruptures for src, s_sites in src_filter(group): # filter now t0 = time.time() indep = group.rup_interdep == 'indep' if mutex_weight else True poemap = cmaker.poe_map(src, s_sites, imtls, trunclevel, indep) if mutex_weight: # mutex sources weight = mutex_weight[src.source_id] for sid in poemap: pcurve = pmap[group.id].setdefault(sid, 0) pcurve += poemap[sid] * weight elif poemap: for grp_id in src.src_group_ids: pmap[grp_id] |= poemap src_id = src.source_id.split(':', 1)[0] pmap.calc_times[src_id] += numpy.array( [src.weight, len(s_sites), time.time() - t0, 1]) # storing the number of contributing ruptures too pmap.eff_ruptures += { grp_id: getattr(poemap, 'eff_ruptures', 0) for grp_id in src.src_group_ids } if mutex_weight and group.grp_probability is not None: pmap[group.id] *= group.grp_probability return pmap
def rupture_weight_pairs(src): """ Generator yielding (rupture, weight) for each rupture in the source """ if hasattr(src, 'weights'): for pair in zip(src.iter_ruptures(), src.weights): yield pair weight = 1. / (src.num_ruptures or src.count_ruptures()) for rup in src.iter_ruptures(): yield rup, weight
def rupture_weight_pairs(src): """ Generator yielding (rupture, weight) for each rupture in the source """ if hasattr(src, 'weights'): for pair in zip(src.iter_ruptures(), src.weights): yield pair weight = 1. / (src.num_ruptures or src.count_ruptures()) for rup in src.iter_ruptures(): yield rup, weight
def poe_map(src, s_sites, imtls, cmaker, trunclevel, bbs, rup_indep, ctx_mon, pne_mon, disagg_mon): """ Compute the ProbabilityMap generated by the given source. Also, store some information in the monitors and optionally in the bounding boxes. """ pmap = ProbabilityMap.build( len(imtls.array), len(cmaker.gsims), s_sites.sids, initvalue=rup_indep) try: for rup, weight in rupture_weight_pairs(src): with ctx_mon: # compute distances try: sctx, rctx, dctx = cmaker.make_contexts(s_sites, rup) except FarAwayRupture: continue with pne_mon: # compute probabilities and updates the pmap pnes = get_probability_no_exceedance( rup, sctx, rctx, dctx, imtls, cmaker.gsims, trunclevel) for sid, pne in zip(sctx.sites.sids, pnes): if rup_indep: pmap[sid].array *= pne else: pmap[sid].array += pne * weight # add optional disaggregation information (bounding boxes) if bbs: with disagg_mon: sids = set(sctx.sites.sids) jb_dists = dctx.rjb closest_points = rup.surface.get_closest_points( sctx.sites.mesh) bs = [bb for bb in bbs if bb.site_id in sids] # NB: the assert below is always true; we are # protecting against possible refactoring errors assert len(bs) == len(jb_dists) == len(closest_points) for bb, dist, p in zip(bs, jb_dists, closest_points): bb.update([dist], [p.longitude], [p.latitude]) except Exception as err: etype, err, tb = sys.exc_info() msg = 'An error occurred with source id=%s. Error: %s' msg %= (src.source_id, str(err)) raise_(etype, msg, tb) return ~pmap
def compute_gmfs_and_curves(getters, oq, monitor): """ :param getters: a list of GmfGetter instances :param oq: an OqParam instance :param monitor: a Monitor instance :returns: a list of dictionaries with keys gmfcoll and hcurves """ results = [] for getter in getters: with monitor('GmfGetter.init', measuremem=True): getter.init() hcurves = {} # key -> poes if oq.hazard_curves_from_gmfs: hc_mon = monitor('building hazard curves', measuremem=False) duration = oq.investigation_time * oq.ses_per_logic_tree_path with monitor('building hazard', measuremem=True): gmfdata = numpy.fromiter(getter.gen_gmv(), getter.gmf_data_dt) hazard = getter.get_hazard(data=gmfdata) for sid, hazardr in zip(getter.sids, hazard): for rlzi, array in hazardr.items(): if len(array) == 0: # no data continue with hc_mon: gmvs = array['gmv'] for imti, imt in enumerate(getter.imtls): poes = calc._gmvs_to_haz_curve( gmvs[:, imti], oq.imtls[imt], oq.investigation_time, duration) hcurves[rsi2str(rlzi, sid, imt)] = poes else: # fast lane with monitor('building hazard', measuremem=True): gmfdata = numpy.fromiter(getter.gen_gmv(), getter.gmf_data_dt) indices = [] if oq.ground_motion_fields: gmfdata.sort(order=('sid', 'rlzi', 'eid')) start = stop = 0 for sid, rows in itertools.groupby(gmfdata['sid']): for row in rows: stop += 1 indices.append((sid, start, stop)) start = stop else: gmfdata = None res = dict(gmfdata=gmfdata, hcurves=hcurves, gmdata=getter.gmdata, taskno=monitor.task_no, indices=numpy.array(indices, (U32, 3))) if len(getter.gmdata): results.append(res) return results
def get_mesh_csvdata(csvfile, imts, num_values, validvalues): """ Read CSV data in the format `IMT lon lat value1 ... valueN`. :param csvfile: a file or file-like object with the CSV data :param imts: a list of intensity measure types :param num_values: dictionary with the number of expected values per IMT :param validvalues: validation function for the values :returns: the mesh of points and the data as a dictionary imt -> array of curves for each site """ number_of_values = dict(zip(imts, num_values)) lon_lats = {imt: set() for imt in imts} data = AccumDict() # imt -> list of arrays check_imt = valid.Choice(*imts) for line, row in enumerate(csv.reader(csvfile, delimiter=' '), 1): try: imt = check_imt(row[0]) lon_lat = valid.longitude(row[1]), valid.latitude(row[2]) if lon_lat in lon_lats[imt]: raise DuplicatedPoint(lon_lat) lon_lats[imt].add(lon_lat) values = validvalues(' '.join(row[3:])) if len(values) != number_of_values[imt]: raise ValueError('Found %d values, expected %d' % (len(values), number_of_values[imt])) except (ValueError, DuplicatedPoint) as err: raise err.__class__('%s: file %s, line %d' % (err, csvfile, line)) data += {imt: [numpy.array(values)]} points = lon_lats.pop(imts[0]) for other_imt, other_points in lon_lats.items(): if points != other_points: raise ValueError('Inconsistent locations between %s and %s' % (imts[0], other_imt)) lons, lats = zip(*sorted(points)) mesh = geo.Mesh(numpy.array(lons), numpy.array(lats)) return mesh, {imt: numpy.array(lst) for imt, lst in data.items()}
def assets_by_site(self): """ :returns: numpy array of lists with the assets by each site """ assetcol = self.array site_ids = sorted(set(assetcol['site_id'])) assets_by_site = [[] for sid in site_ids] index = dict(zip(site_ids, range(len(site_ids)))) for i, ass in enumerate(assetcol): assets_by_site[index[ass['site_id']]].append(self[i]) return numpy.array(assets_by_site)
def assets_by_site(self): """ :returns: numpy array of lists with the assets by each site """ assetcol = self.array site_ids = sorted(set(assetcol['site_id'])) assets_by_site = [[] for sid in site_ids] index = dict(zip(site_ids, range(len(site_ids)))) for i, ass in enumerate(assetcol): assets_by_site[index[ass['site_id']]].append(self[i]) return numpy.array(assets_by_site)
def set_counts(dstore, dsetname): """ :param dstore: a DataStore instance :param dsetname: name of dataset with a field `grp_id` :returns: a dictionary grp_id > counts """ groups = dstore[dsetname]['grp_id'] unique, counts = numpy.unique(groups, return_counts=True) dic = dict(zip(unique, counts)) dstore.set_attrs(dsetname, by_grp=sorted(dic.items())) return dic
def get_all(self, aids): """ :param aids: a list of A asset ordinals :returns: a list of A composite arrays of dtype `lrs_dt` """ data = self.dstore['all_loss_ratios/data'] indices = self.dstore['all_loss_ratios/indices'][aids] # (A, T, 2) loss_ratio_data = [] for aid, idxs in zip(aids, indices): arr = numpy.concatenate([data[idx[0]:idx[1]] for idx in idxs]) loss_ratio_data.append(arr) return loss_ratio_data
def set_random_years(dstore, name, investigation_time): """ Set on the `events` dataset year labels sensitive to the SES ordinal and the investigation time. """ events = dstore[name].value years = numpy.random.choice(investigation_time, len(events)) + 1 year_of = dict(zip(numpy.sort(events['eid']), years)) # eid -> year for event in events: idx = event['ses'] - 1 # starts from 0 event['year'] = idx * investigation_time + year_of[event['eid']] dstore[name] = events
def reduce_sm(paths, source_ids): """ :param paths: list of source_model.xml files :param source_ids: dictionary src_id -> array[src_id, code] :returns: dictionary with keys good, total, model, path, xmlns NB: duplicate sources are not removed from the XML """ if isinstance(source_ids, dict): # in oq reduce_sm def ok(src_node): code = tag2code[re.search(r'\}(\w\w)', src_node.tag).group(1)] arr = source_ids.get(src_node['id']) if arr is None: return False return (arr['code'] == code).any() else: # list of source IDs, in extract_source def ok(src_node): return src_node['id'] in source_ids for path in paths: good = 0 total = 0 logging.info('Reading %s', path) root = nrml.read(path) model = Node('sourceModel', root[0].attrib) origmodel = root[0] if root['xmlns'] == 'http://openquake.org/xmlns/nrml/0.4': for src_node in origmodel: total += 1 if ok(src_node): good += 1 model.nodes.append(src_node) else: # nrml/0.5 for src_group in origmodel: sg = copy.copy(src_group) sg.nodes = [] weights = src_group.get('srcs_weights') if weights: assert len(weights) == len(src_group.nodes) else: weights = [1] * len(src_group.nodes) reduced_weigths = [] for src_node, weight in zip(src_group, weights): total += 1 if ok(src_node): good += 1 sg.nodes.append(src_node) reduced_weigths.append(weight) src_node.attrib.pop('tectonicRegion', None) src_group['srcs_weights'] = reduced_weigths if sg.nodes: model.nodes.append(sg) yield dict(good=good, total=total, model=model, path=path, xmlns=root['xmlns'])
def scenario_risk(riskinput, riskmodel, param, monitor): """ Core function for a scenario computation. :param riskinput: a of :class:`openquake.risklib.riskinput.RiskInput` object :param riskmodel: a :class:`openquake.risklib.riskinput.CompositeRiskModel` instance :param param: dictionary of extra parameters :param monitor: :class:`openquake.baselib.performance.Monitor` instance :returns: a dictionary { 'agg': array of shape (E, L, R, 2), 'avg': list of tuples (lt_idx, rlz_idx, asset_idx, statistics) } where E is the number of simulated events, L the number of loss types, R the number of realizations and statistics is an array of shape (n, R, 4), with n the number of assets in the current riskinput object """ E = param['number_of_ground_motion_fields'] L = len(riskmodel.loss_types) T = riskinput.tagmask.shape[1] R = riskinput.hazard_getter.num_rlzs I = param['insured_losses'] + 1 asset_loss_table = param['asset_loss_table'] lbt = numpy.zeros((T, R, L * I), F32) result = dict(agg=numpy.zeros((E, R, L * I), F32), avg=[], losses_by_tag=lbt, all_losses=AccumDict(accum={})) for outputs in riskmodel.gen_outputs(riskinput, monitor): r = outputs.r assets = outputs.assets for l, losses in enumerate(outputs): if losses is None: # this may happen continue stats = numpy.zeros((len(assets), I), stat_dt) # mean, stddev for a, asset in enumerate(assets): stats['mean'][a] = losses[a].mean() stats['stddev'][a] = losses[a].std(ddof=1) result['avg'].append((l, r, asset.ordinal, stats[a])) t = riskinput.tagmask[a] for i in range(I): lbt[t, r, l + L * i] += losses[a].sum() agglosses = losses.sum(axis=0) # shape E, I for i in range(I): result['agg'][:, r, l + L * i] += agglosses[:, i] if asset_loss_table: aids = [asset.ordinal for asset in outputs.assets] result['all_losses'][l, r] += AccumDict(zip(aids, losses)) return result
def event_based_risk(riskinput, riskmodel, rlzs_assoc, assetcol, monitor): """ :param riskinput: a :class:`openquake.risklib.riskinput.RiskInput` object :param riskmodel: a :class:`openquake.risklib.riskinput.CompositeRiskModel` instance :param rlzs_assoc: a class:`openquake.commonlib.source.RlzsAssoc` instance :param assetcol: AssetCollection instance :param monitor: :class:`openquake.baselib.performance.Monitor` instance :returns: a dictionary of numpy arrays of shape (L, R) """ lti = riskmodel.lti # loss type -> index L, R = len(lti), len(rlzs_assoc.realizations) I = monitor.insured_losses + 1 eids = riskinput.eids E = len(eids) idx = dict(zip(eids, range(E))) agg = numpy.zeros((E, L, R, I), F32) ass = collections.defaultdict(list) def zeroN(): return numpy.zeros((monitor.num_assets, I)) result = dict(RC=square(L, R, AccumDict), IC=square(L, R, AccumDict), AGGLOSS=AccumDict(), ASSLOSS=AccumDict()) if monitor.avg_losses: result['AVGLOSS'] = square(L, R, zeroN) agglosses_mon = monitor('aggregate losses', measuremem=False) for output in riskmodel.gen_outputs(riskinput, rlzs_assoc, monitor, assetcol): with agglosses_mon: _aggregate_output(output, riskmodel, agg, ass, idx, result, monitor) for (l, r) in itertools.product(range(L), range(R)): records = [(eids[i], loss) for i, loss in enumerate(agg[:, l, r]) if loss.sum() > 0] if records: result['AGGLOSS'][l, r] = numpy.array(records, monitor.elt_dt) for lr in ass: if ass[lr]: result['ASSLOSS'][lr] = numpy.concatenate(ass[lr]) # store the size of the GMFs result['gmfbytes'] = monitor.gmfbytes return result
def __init__(self, trt, imts, sitecol, ses_ruptures, trunc_level, correl_model, min_iml, epsilons=None): self.sitecol = sitecol self.ses_ruptures = numpy.array(ses_ruptures) self.trt = trt self.trunc_level = trunc_level self.correl_model = correl_model self.min_iml = min_iml self.weight = sum(sr.weight for sr in ses_ruptures) self.imts = imts self.eids = numpy.concatenate([r.events["eid"] for r in ses_ruptures]) if epsilons is not None: self.eps = epsilons # matrix N x E, events in this block self.eid2idx = dict(zip(self.eids, range(len(self.eids))))
def set_random_years(dstore, events_sm, investigation_time): """ Sort the `events` array and attach year labels sensitive to the SES ordinal and the investigation time. """ events = dstore[events_sm].value eids = numpy.sort(events['eid']) years = numpy.random.choice(investigation_time, len(events)) + 1 year_of = dict(zip(eids, years)) for event in events: idx = event['ses'] - 1 # starts from 0 event['year'] = idx * investigation_time + year_of[event['eid']] dstore[events_sm] = events
def get(self, rlzi): """ :param rlzi: a realization ordinal :returns: a dictionary aid -> list of loss ratios """ data = self.dstore['all_loss_ratios/data'] dic = collections.defaultdict(list) # aid -> ratios for aid, idxs in zip(self.aids, self.indices): for idx in idxs: for rec in data[idx[0]:idx[1]]: if rlzi == rec['rlzi']: dic[aid].append(rec['ratios']) return dic
def event_based_risk(riskinputs, riskmodel, rlzs_assoc, assetcol, monitor): """ :param riskinputs: a list of :class:`openquake.risklib.riskinput.RiskInput` objects :param riskmodel: a :class:`openquake.risklib.riskinput.CompositeRiskModel` instance :param rlzs_assoc: a class:`openquake.commonlib.source.RlzsAssoc` instance :param assetcol: AssetCollection instance :param monitor: :class:`openquake.baselib.performance.Monitor` instance :returns: a dictionary of numpy arrays of shape (L, R) """ lti = riskmodel.lti # loss type -> index L, R = len(lti), len(rlzs_assoc.realizations) I = monitor.insured_losses + 1 eids = numpy.concatenate([ri.eids for ri in riskinputs]) E = len(eids) idx = dict(zip(eids, range(E))) agg = numpy.zeros((E, L, R, I), F32) def zeroN(): return numpy.zeros((monitor.num_assets, I)) result = dict(RC=square(L, R, list), IC=square(L, R, list), AGGLOSS=square(L, R, list)) if monitor.asset_loss_table: result['ASSLOSS'] = square(L, R, list) if monitor.avg_losses: result['AVGLOSS'] = square(L, R, zeroN) agglosses_mon = monitor('aggregate losses', measuremem=False) for output in riskmodel.gen_outputs(riskinputs, rlzs_assoc, monitor, assetcol): with agglosses_mon: _aggregate_output(output, riskmodel, agg, idx, result, monitor) for (l, r), lst in numpy.ndenumerate(result['AGGLOSS']): records = numpy.array( [(eids[i], loss) for i, loss in enumerate(agg[:, l, r]) if loss.sum() > 0], monitor.elt_dt) result['AGGLOSS'][l, r] = records for (l, r), lst in numpy.ndenumerate(result['RC']): result['RC'][l, r] = sum(lst, AccumDict()) for (l, r), lst in numpy.ndenumerate(result['IC']): result['IC'][l, r] = sum(lst, AccumDict()) return result
def compute_ruptures(sources, src_filter, gsims, param, monitor): """ :param sources: a list with a single UCERF source :param src_filter: a SourceFilter instance :param gsims: a list of GSIMs :param param: extra parameters :param monitor: a Monitor instance :returns: an AccumDict grp_id -> EBRuptures """ [src] = sources res = AccumDict() res.calc_times = [] serial = 1 sampl_mon = monitor('sampling ruptures', measuremem=True) filt_mon = monitor('filtering ruptures', measuremem=False) res.trt = DEFAULT_TRT ebruptures = [] background_sids = src.get_background_sids(src_filter) sitecol = src_filter.sitecol cmaker = ContextMaker(gsims, src_filter.integration_distance) for sample in range(param['samples']): for ses_idx, ses_seed in param['ses_seeds']: seed = sample * TWO16 + ses_seed with sampl_mon: rups, n_occs = generate_event_set(src, background_sids, src_filter, seed) with filt_mon: for rup, n_occ in zip(rups, n_occs): rup.serial = serial rup.seed = seed try: rup.sctx, rup.dctx = cmaker.make_contexts(sitecol, rup) indices = rup.sctx.sids except FarAwayRupture: continue events = [] for _ in range(n_occ): events.append((0, src.src_group_id, ses_idx, sample)) if events: evs = numpy.array(events, stochastic.event_dt) ebruptures.append(EBRupture(rup, indices, evs)) serial += 1 res.num_events = len(stochastic.set_eids(ebruptures)) res[src.src_group_id] = ebruptures if not param['save_ruptures']: res.events_by_grp = { grp_id: event_based.get_events(res[grp_id]) for grp_id in res } res.eff_ruptures = {src.src_group_id: src.num_ruptures} return res
def compute_hazard(sources, src_filter, rlzs_by_gsim, param, monitor): """ :param sources: a list with a single UCERF source :param src_filter: a SourceFilter instance :param rlzs_by_gsim: a dictionary gsim -> rlzs :param param: extra parameters :param monitor: a Monitor instance :returns: an AccumDict grp_id -> EBRuptures """ [src] = sources res = AccumDict() res.calc_times = [] serial = 1 sampl_mon = monitor('sampling ruptures', measuremem=True) filt_mon = monitor('filtering ruptures', measuremem=False) res.trt = DEFAULT_TRT background_sids = src.get_background_sids(src_filter) sitecol = src_filter.sitecol cmaker = ContextMaker(rlzs_by_gsim, src_filter.integration_distance) num_ses = param['ses_per_logic_tree_path'] samples = getattr(src, 'samples', 1) n_occ = AccumDict(accum=numpy.zeros((samples, num_ses), numpy.uint16)) with sampl_mon: for sam_idx in range(samples): for ses_idx, ses_seed in param['ses_seeds']: seed = sam_idx * TWO16 + ses_seed rups, occs = generate_event_set(src, background_sids, src_filter, seed) for rup, occ in zip(rups, occs): n_occ[rup][sam_idx, ses_idx] = occ rup.serial = serial serial += 1 with filt_mon: rlzs = numpy.concatenate(list(rlzs_by_gsim.values())) ebruptures = stochastic.build_eb_ruptures(src, rlzs, num_ses, cmaker, sitecol, n_occ.items()) res.num_events = sum(ebr.multiplicity for ebr in ebruptures) res['ruptures'] = {src.src_group_id: ebruptures} if param['save_ruptures']: res.ruptures_by_grp = {src.src_group_id: ebruptures} else: res.events_by_grp = { src.src_group_id: event_based.get_events(ebruptures) } res.eff_ruptures = {src.src_group_id: src.num_ruptures} if param.get('gmf'): getter = getters.GmfGetter(rlzs_by_gsim, ebruptures, sitecol, param['oqparam'], param['min_iml'], samples) res.update(getter.compute_gmfs_curves(monitor)) return res
def from_array(cls, array, sids): """ :param array: array of shape (N, L, I) :param sids: array of N site IDs """ n_sites = len(sids) n = len(array) if n_sites != n: raise ValueError('Passed %d site IDs, but the array has length %d' % (n_sites, n)) self = cls(*array.shape[1:]) for sid, poes in zip(sids, array): self[sid] = ProbabilityCurve(poes) return self
def get_aggkey_attrs(tagcol, aggby): aggkey = {(): 0} attrs = [{}] if not aggby: return aggkey, attrs alltags = [getattr(tagcol, tagname) for tagname in aggby] ranges = [range(1, len(tags)) for tags in alltags] i = 1 for idxs in itertools.product(*ranges): d = {name: tags[idx] for idx, name, tags in zip(idxs, aggby, alltags)} aggkey[idxs] = i attrs.append(d) i += 1 return aggkey, attrs
def post_execute(self, result): """ Save risk data and build the aggregate loss curves """ logging.info('Saving event loss table') elt_dt = numpy.dtype([('eid', U64), ('rlzi', U16), ('loss', (F32, (self.L * self.I, )))]) with self.monitor('saving event loss table', measuremem=True): # saving zeros is a lot faster than adding an `if loss.sum()` agglosses = numpy.fromiter( ((e, r, loss) for e, losses in zip(self.eids, self.agglosses) for r, loss in enumerate(losses) if loss.sum()), elt_dt) self.datastore['losses_by_event'] = agglosses self.postproc()
def scenario_risk(riskinputs, crmodel, param, monitor): """ Core function for a scenario computation. :param riskinput: a of :class:`openquake.risklib.riskinput.RiskInput` object :param crmodel: a :class:`openquake.risklib.riskinput.CompositeRiskModel` instance :param param: dictionary of extra parameters :param monitor: :class:`openquake.baselib.performance.Monitor` instance :returns: a dictionary { 'agg': array of shape (E, L, R, 2), 'avg': list of tuples (lt_idx, rlz_idx, asset_ordinal, statistics) } where E is the number of simulated events, L the number of loss types, R the number of realizations and statistics is an array of shape (n, R, 4), with n the number of assets in the current riskinput object """ E = param['E'] L = len(crmodel.loss_types) result = dict(agg=numpy.zeros((E, L), F32), avg=[]) mon = monitor('getting hazard', measuremem=False) acc = AccumDict(accum=numpy.zeros(L, F64)) # aid,eid->loss for ri in riskinputs: with mon: ri.hazard_getter.init() for out in ri.gen_outputs(crmodel, monitor, param['tempname']): r = out.rlzi slc = param['event_slice'](r) for l, loss_type in enumerate(crmodel.loss_types): losses = out[loss_type] if numpy.product(losses.shape) == 0: # happens for all NaNs continue stats = numpy.zeros(len(ri.assets), stat_dt) # mean, stddev for a, asset in enumerate(ri.assets): aid = asset['ordinal'] stats['mean'][a] = losses[a].mean() stats['stddev'][a] = losses[a].std(ddof=1) result['avg'].append((l, r, asset['ordinal'], stats[a])) for loss, eid in zip(losses[a], out.eids): acc[aid, eid][l] = loss agglosses = losses.sum(axis=0) # shape num_gmfs result['agg'][slc, l] += agglosses ael = [(aid, eid, loss) for (aid, eid), loss in sorted(acc.items())] result['ael'] = numpy.array(ael, param['ael_dt']) return result
def from_array(cls, array, sids): """ :param array: array of shape (N, L, I) :param sids: array of N site IDs """ n_sites = len(sids) n = len(array) if n_sites != n: raise ValueError('Passed %d site IDs, but the array has length %d' % (n_sites, n)) self = cls(*array.shape[1:]) for sid, poes in zip(sids, array): self[sid] = ProbabilityCurve(poes) return self
def event_based_risk(riskinput, riskmodel, param, monitor): """ :param riskinput: a :class:`openquake.risklib.riskinput.RiskInput` object :param riskmodel: a :class:`openquake.risklib.riskinput.CompositeRiskModel` instance :param param: a dictionary of parameters :param monitor: :class:`openquake.baselib.performance.Monitor` instance :returns: a dictionary of numpy arrays of shape (L, R) """ riskinput.hazard_getter.init() assetcol = param['assetcol'] A = len(assetcol) I = param['insured_losses'] + 1 eids = riskinput.hazard_getter.eids E = len(eids) L = len(riskmodel.lti) taxid = {t: i for i, t in enumerate(sorted(assetcol.taxonomies))} T = len(taxid) R = sum( len(rlzs) for gsim, rlzs in riskinput.hazard_getter.rlzs_by_gsim.items()) param['lrs_dt'] = numpy.dtype([('rlzi', U16), ('ratios', (F32, (L * I, )))]) idx = dict(zip(eids, range(E))) agg = AccumDict(accum=numpy.zeros((E, L, I), F32)) # r -> array result = dict(agglosses=AccumDict(), assratios=[], lrs_idx=numpy.zeros((A, 2), U32), losses_by_taxon=numpy.zeros((T, R, L * I), F32), aids=None) if param['avg_losses']: result['avglosses'] = AccumDict(accum=numpy.zeros(A, F64)) else: result['avglosses'] = {} outputs = riskmodel.gen_outputs(riskinput, monitor, assetcol) _aggregate(outputs, riskmodel, taxid, agg, idx, result, param) for r in sorted(agg): records = [(eids[i], loss) for i, loss in enumerate(agg[r]) if loss.sum() > 0] if records: result['agglosses'][r] = numpy.array(records, param['elt_dt']) # store info about the GMFs result['gmdata'] = riskinput.gmdata return result
def event_based_risk(riskinput, riskmodel, rlzs_assoc, assetcol, monitor): """ :param riskinput: a :class:`openquake.risklib.riskinput.RiskInput` object :param riskmodel: a :class:`openquake.risklib.riskinput.CompositeRiskModel` instance :param rlzs_assoc: a class:`openquake.commonlib.source.RlzsAssoc` instance :param assetcol: AssetCollection instance :param monitor: :class:`openquake.baselib.performance.Monitor` instance :returns: a dictionary of numpy arrays of shape (L, R) """ lti = riskmodel.lti # loss type -> index L, R = len(lti), len(rlzs_assoc.realizations) I = monitor.insured_losses + 1 eids = riskinput.eids E = len(eids) idx = dict(zip(eids, range(E))) agg = numpy.zeros((E, L, R, I), F32) ass = collections.defaultdict(list) def zeroN(): return numpy.zeros((monitor.num_assets, I)) result = dict(RC=square(L, R, AccumDict), IC=square(L, R, AccumDict), AGGLOSS=AccumDict(), ASSLOSS=AccumDict()) if monitor.avg_losses: result['AVGLOSS'] = square(L, R, zeroN) agglosses_mon = monitor('aggregate losses', measuremem=False) for output in riskmodel.gen_outputs( riskinput, rlzs_assoc, monitor, assetcol): with agglosses_mon: _aggregate_output( output, riskmodel, agg, ass, idx, result, monitor) for (l, r) in itertools.product(range(L), range(R)): records = [(eids[i], loss) for i, loss in enumerate(agg[:, l, r]) if loss.sum() > 0] if records: result['AGGLOSS'][l, r] = numpy.array(records, monitor.elt_dt) for lr in ass: if ass[lr]: result['ASSLOSS'][lr] = numpy.concatenate(ass[lr]) # store the size of the GMFs result['gmfbytes'] = monitor.gmfbytes return result
def compute_ruptures(sources, sitecol, gsims, monitor): """ :param sources: a sequence of UCERF sources :param sitecol: a SiteCollection instance :param gsims: a list of GSIMs :param monitor: a Monitor instance :returns: an AccumDict grp_id -> EBRuptures """ [src] = sources # there is a single source per UCERF branch integration_distance = monitor.maximum_distance[DEFAULT_TRT] res = AccumDict() res.calc_times = AccumDict() serial = 1 event_mon = monitor('sampling ruptures', measuremem=False) res.num_events = 0 res.trt = DEFAULT_TRT t0 = time.time() # set the seed before calling generate_event_set numpy.random.seed(monitor.seed + src.src_group_id) ebruptures = [] eid = 0 background_sids = src.get_background_sids(sitecol, integration_distance) for ses_idx in range(1, monitor.ses_per_logic_tree_path + 1): with event_mon: rups, n_occs = src.generate_event_set(background_sids) for rup, n_occ in zip(rups, n_occs): rup.seed = monitor.seed # to think rrup = rup.surface.get_min_distance(sitecol.mesh) r_sites = sitecol.filter(rrup <= integration_distance) if r_sites is None: continue indices = r_sites.indices events = [] for occ in range(n_occ): events.append((eid, ses_idx, occ, 0)) # 0 is the sampling eid += 1 if events: evs = numpy.array(events, calc.event_dt) ebruptures.append( calc.EBRupture(rup, indices, evs, src.source_id, src.src_group_id, serial)) serial += 1 res.num_events += len(events) res[src.src_group_id] = ebruptures res.calc_times[src.src_group_id] = ( src.source_id, len(sitecol), time.time() - t0) res.rup_data = {src.src_group_id: calc.RuptureData(DEFAULT_TRT, gsims).to_array(ebruptures)} return res
def _aggregate(outputs, compositemodel, agg, ass, idx, result, monitor): # update the result dictionary and the agg array with each output for out in outputs: l, r = out.lr asset_ids = [a.ordinal for a in out.assets] loss_type = compositemodel.loss_types[l] indices = numpy.array([idx[eid] for eid in out.eids]) cb = compositemodel.curve_builders[l] if cb.user_provided: counts_matrix = cb.build_counts(out.loss_ratios[:, :, 0]) result['RC'][l, r] += dict(zip(asset_ids, counts_matrix)) if monitor.insured_losses: result['IC'][l, r] += dict( zip(asset_ids, cb.build_counts(out.loss_ratios[:, :, 1]))) for i, asset in enumerate(out.assets): aid = asset.ordinal loss_ratios = out.loss_ratios[i] losses = loss_ratios * asset.value(loss_type) # average losses if monitor.avg_losses: result['AVGLOSS'][l, r][aid] += ( loss_ratios.sum(axis=0) * monitor.ses_ratio) # asset losses if monitor.asset_loss_table: data = [(eid, aid, loss) for eid, loss in zip(out.eids, losses) if loss.sum() > 0] if data: ass[l, r].append(numpy.array(data, monitor.ela_dt)) # agglosses agg[indices, l, r] += losses
def __init__(self, imt_taxonomies, sitecol, ses_ruptures, trunc_level, correl_model, min_iml, epsilons, eids): self.imt_taxonomies = imt_taxonomies self.sitecol = sitecol self.ses_ruptures = numpy.array(ses_ruptures) self.trt_id = ses_ruptures[0].trt_id self.trunc_level = trunc_level self.correl_model = correl_model self.min_iml = min_iml self.weight = sum(sr.weight for sr in ses_ruptures) self.imts = sorted(set(imt for imt, _ in imt_taxonomies)) self.eids = eids # E events if epsilons is not None: self.eps = epsilons # matrix N x E, events in this block self.eid2idx = dict(zip(eids, range(len(eids))))
def post_execute(self, result): """ Save risk data and build the aggregate loss curves """ logging.info('Saving event loss table') elt_dt = numpy.dtype( [('eid', U64), ('rlzi', U16), ('loss', (F32, (self.L,)))]) with self.monitor('saving event loss table', measuremem=True): agglosses = numpy.fromiter( ((eid, rlz, losses) for (eid, rlz), losses in zip(self.events, self.agglosses) if losses.any()), elt_dt) self.datastore['losses_by_event'] = agglosses loss_types = ' '.join(self.oqparam.loss_dt().names) self.datastore.set_attrs('losses_by_event', loss_types=loss_types) self.postproc()
def build_loss_tables(dstore): """ Compute the total losses by rupture and losses by rlzi. """ oq = dstore['oqparam'] L = len(oq.loss_dt().names) R = dstore['csm_info'].get_num_rlzs() serials = dstore['ruptures']['serial'] idx_by_ser = dict(zip(serials, range(len(serials)))) tbl = numpy.zeros((len(serials), L), F32) lbr = numpy.zeros((R, L), F32) # losses by rlz for rec in dstore['losses_by_event'][()]: # call .value for speed idx = idx_by_ser[rec['eid'] // TWO32] tbl[idx] += rec['loss'] lbr[rec['rlzi']] += rec['loss'] return tbl, lbr
def scenario_risk(riskinputs, riskmodel, param, monitor): """ Core function for a scenario computation. :param riskinput: a of :class:`openquake.risklib.riskinput.RiskInput` object :param riskmodel: a :class:`openquake.risklib.riskinput.CompositeRiskModel` instance :param param: dictionary of extra parameters :param monitor: :class:`openquake.baselib.performance.Monitor` instance :returns: a dictionary { 'agg': array of shape (E, L, R, 2), 'avg': list of tuples (lt_idx, rlz_idx, asset_ordinal, statistics) } where E is the number of simulated events, L the number of loss types, R the number of realizations and statistics is an array of shape (n, R, 4), with n the number of assets in the current riskinput object """ E = param['E'] L = len(riskmodel.loss_types) result = dict(agg=numpy.zeros((E, L), F32), avg=[], all_losses=AccumDict(accum={})) for ri in riskinputs: for out in riskmodel.gen_outputs(ri, monitor, param['epspath']): r = out.rlzi weight = param['weights'][r] slc = param['event_slice'](r) for l, loss_type in enumerate(riskmodel.loss_types): losses = out[loss_type] if numpy.product(losses.shape) == 0: # happens for all NaNs continue stats = numpy.zeros(len(ri.assets), stat_dt) # mean, stddev for a, asset in enumerate(ri.assets): stats['mean'][a] = losses[a].mean() stats['stddev'][a] = losses[a].std(ddof=1) result['avg'].append((l, r, asset['ordinal'], stats[a])) agglosses = losses.sum(axis=0) # shape num_gmfs result['agg'][slc, l] += agglosses * weight if param['asset_loss_table']: aids = ri.assets['ordinal'] result['all_losses'][l, r] += AccumDict(zip(aids, losses)) return result
def compute_gmfs_and_curves(eb_ruptures, sitecol, rlzs_assoc, monitor): """ :param eb_ruptures: a list of blocks of EBRuptures of the same SESCollection :param sitecol: a :class:`openquake.hazardlib.site.SiteCollection` instance :param rlzs_assoc: a RlzsAssoc instance :param monitor: a Monitor instance :returns: a dictionary (trt_model_id, gsim) -> haz_curves and/or trt_model_id -> gmfs """ oq = monitor.oqparam # NB: by construction each block is a non-empty list with # ruptures of the same trt_model_id trt_id = eb_ruptures[0].trt_id gsims = rlzs_assoc.gsims_by_trt_id[trt_id] trunc_level = oq.truncation_level correl_model = readinput.get_correl_model(oq) tot_sites = len(sitecol.complete) gmfa_sids_etags = make_gmfs( eb_ruptures, sitecol, oq.imtls, gsims, trunc_level, correl_model, monitor) result = {trt_id: gmfa_sids_etags if oq.ground_motion_fields else None} if oq.hazard_curves_from_gmfs: with monitor('bulding hazard curves', measuremem=False): duration = oq.investigation_time * oq.ses_per_logic_tree_path # collect the gmvs by site gmvs_by_sid = collections.defaultdict(list) for serial in gmfa_sids_etags: gst = gmfa_sids_etags[serial] for sid, gmvs in zip(gst.sids, gst.gmfa.T): gmvs_by_sid[sid].extend(gmvs) # build the hazard curves for each GSIM for gsim in gsims: gs = str(gsim) result[trt_id, gs] = to_haz_curves( tot_sites, gmvs_by_sid, gs, oq.imtls, oq.investigation_time, duration) return result
def __init__(self, trt, rlzs_assoc, imts, sitecol, ses_ruptures, trunc_level, correl_model, min_iml, epsilons=None): assert sitecol is sitecol.complete self.imts = imts self.sitecol = sitecol self.ses_ruptures = numpy.array(ses_ruptures) grp_id = ses_ruptures[0].grp_id self.trt = trt self.trunc_level = trunc_level self.correl_model = correl_model self.min_iml = min_iml self.gsims = [dic[trt] for dic in rlzs_assoc.gsim_by_trt] self.samples = rlzs_assoc.samples[grp_id] self.rlzs = rlzs_assoc.get_rlzs_by_grp_id()[grp_id] self.weight = sum(sr.weight for sr in ses_ruptures) self.eids = numpy.concatenate([r.events['eid'] for r in ses_ruptures]) if epsilons is not None: self.eps = epsilons # matrix N x E, events in this block self.eid2idx = dict(zip(self.eids, range(len(self.eids))))
def execute(self): A = len(self.assetcol) ltypes = self.riskmodel.loss_types I = self.oqparam.insured_losses + 1 R = len(self.rlzs_assoc.realizations) self.vals = self.assetcol.values() # loss curves multi_lr_dt = numpy.dtype( [(ltype, (F32, len(cbuilder.ratios))) for ltype, cbuilder in zip( ltypes, self.riskmodel.curve_builders)]) rcurves = numpy.zeros((A, R, I), multi_lr_dt) # build rcurves-rlzs if self.oqparam.loss_ratios: assets = list(self.assetcol) cb_inputs = self.cb_inputs('all_loss_ratios') mon = self.monitor('build_rcurves') res = parallel.apply( build_rcurves, (cb_inputs, assets, mon)).reduce() for l, r in res: aids, curves = res[l, r] rcurves[ltypes[l]][aids, r] = curves self.datastore['rcurves-rlzs'] = rcurves # build rcurves-stats (sequentially) # this is a fundamental output, being used to compute loss_maps-stats if R > 1: weights = self.datastore['realizations']['weight'] quantiles = self.oqparam.quantile_loss_curves with self.monitor('computing avg_losses-stats'): self.datastore['avg_losses-stats'] = compute_stats2( self.datastore['avg_losses-rlzs'], quantiles, weights) with self.monitor('computing rcurves-stats'): self.datastore['rcurves-stats'] = compute_stats2( rcurves, quantiles, weights) # build an aggregate loss curve per realization if 'agg_loss_table' in self.datastore: with self.monitor('building agg_curve'): self.build_agg_curve()
def reduce_source_model(smlt_file, source_ids, remove=True): """ Extract sources from the composite source model """ found = 0 to_remove = [] for paths in logictree.collect_info(smlt_file).smpaths.values(): for path in paths: logging.info('Reading %s', path) root = nrml.read(path) model = Node('sourceModel', root[0].attrib) origmodel = root[0] if root['xmlns'] == 'http://openquake.org/xmlns/nrml/0.4': for src_node in origmodel: if src_node['id'] in source_ids: model.nodes.append(src_node) else: # nrml/0.5 for src_group in origmodel: sg = copy.copy(src_group) sg.nodes = [] weights = src_group.get('srcs_weights') if weights: assert len(weights) == len(src_group.nodes) else: weights = [1] * len(src_group.nodes) src_group['srcs_weights'] = reduced_weigths = [] for src_node, weight in zip(src_group, weights): if src_node['id'] in source_ids: found += 1 sg.nodes.append(src_node) reduced_weigths.append(weight) if sg.nodes: model.nodes.append(sg) shutil.copy(path, path + '.bak') if model: with open(path, 'wb') as f: nrml.write([model], f, xmlns=root['xmlns']) elif remove: # remove the files completely reduced to_remove.append(path) if found: for path in to_remove: os.remove(path)
def generate_event_set(self, branch_id, sites=None, integration_distance=1000.): """ Generates the event set corresponding to a particular branch """ if sites: self.update_background_site_filter(sites, integration_distance) idxset = self.build_idx_set(branch_id) # get rates from file with h5py.File(self.source_file, 'r') as hdf5: rates = hdf5[idxset["rate_idx"]][:] occurrences = self.tom.sample_number_of_occurrences(rates) indices = numpy.where(occurrences)[0] logging.info('Considering %s %s', branch_id, indices) # get ruptures from the indices ruptures = [] rupture_occ = [] for idx, n_occ in zip(indices, occurrences[indices]): ucerf_rup, _ = get_ucerf_rupture( hdf5, idx, idxset, self.tom, self.sites, self.integration_distance, self.mesh_spacing, self.tectonic_region_type) if ucerf_rup: ruptures.append(ucerf_rup) rupture_occ.append(n_occ) # sample background sources background_ruptures, background_n_occ = sample_background_model( hdf5, self.tom, self.background_idx, self.min_mag, self.npd, self.hdd, self.usd, self.lsd, self.msr, self.aspect, self.tectonic_region_type) ruptures.extend(background_ruptures) rupture_occ.extend(background_n_occ) return ruptures, rupture_occ
def losses_by_taxonomy(riskinput, riskmodel, rlzs_assoc, assetcol, monitor): """ :param riskinput: a :class:`openquake.risklib.riskinput.RiskInput` object :param riskmodel: a :class:`openquake.risklib.riskinput.CompositeRiskModel` instance :param rlzs_assoc: a class:`openquake.commonlib.source.RlzsAssoc` instance :param assetcol: AssetCollection instance :param monitor: :class:`openquake.baselib.performance.Monitor` instance :returns: a numpy array of shape (T, L, R) """ lti = riskmodel.lti # loss type -> index L, R = len(lti), len(rlzs_assoc.realizations) T = len(assetcol.taxonomies) A = len(assetcol) taxonomy_id = {t: i for i, t in enumerate(sorted(assetcol.taxonomies))} losses = numpy.zeros((T, L, R), F64) avglosses = numpy.zeros((A, L, R), F64) if monitor.avg_losses else None agglosses = AccumDict( {lr: AccumDict() for lr in itertools.product(range(L), range(R))}) for out in riskmodel.gen_outputs(riskinput, rlzs_assoc, monitor, assetcol): # NB: out.assets is a non-empty list of assets with the same taxonomy t = taxonomy_id[out.assets[0].taxonomy] l, r = out.lr losses[t, l, r] += out.alosses.sum() if monitor.avg_losses: for i, loss in enumerate(out.alosses): if loss: avglosses[i, l, r] += loss agglosses[l, r] += {eid: loss for eid, loss in zip(out.eids, out.elosses) if loss} # convert agglosses into arrays to reduce the data transfer agglosses = {lr: numpy.array(sorted(agglosses[lr].items()), elt_dt) for lr in agglosses} return AccumDict(losses=losses, avglosses=avglosses, agglosses=agglosses, gmfbytes=monitor.gmfbytes)
def make_eps(assets_by_site, num_samples, seed, correlation): """ :param assets_by_site: a list of lists of assets :param int num_samples: the number of ruptures :param int seed: a random seed :param float correlation: the correlation coefficient :returns: epsilons matrix of shape (num_assets, num_samples) """ all_assets = (a for assets in assets_by_site for a in assets) assets_by_taxo = groupby(all_assets, by_taxonomy) num_assets = sum(map(len, assets_by_site)) eps = numpy.zeros((num_assets, num_samples), numpy.float32) for taxonomy, assets in assets_by_taxo.items(): # the association with the epsilons is done in order assets.sort(key=operator.attrgetter('idx')) shape = (len(assets), num_samples) logging.info('Building %s epsilons for taxonomy %s', shape, taxonomy) zeros = numpy.zeros(shape) epsilons = scientific.make_epsilons(zeros, seed, correlation) for asset, epsrow in zip(assets, epsilons): eps[asset.ordinal] = epsrow return eps