def calc_risk(gmfs, param, monitor): """ :param gmfs: an array of GMFs with fields sid, eid, gmv :param param: a dictionary of parameters coming from the job.ini :param monitor: a Monitor instance :returns: a dictionary of arrays with keys elt, alt, losses_by_A, ... """ mon_risk = monitor('computing risk', measuremem=False) mon_agg = monitor('aggregating losses', measuremem=False) mon_avg = monitor('averaging losses', measuremem=False) dstore = datastore.read(param['hdf5path']) with monitor('getting assets'): assets_df = dstore.read_df('assetcol/array', 'ordinal') with monitor('getting crmodel'): crmodel = monitor.read('crmodel') weights = dstore['weights'][()] L = len(param['elt'].loss_names) aggkey = param['aggkey'] elt_dt = [('event_id', U32), ('loss', (F32, (L, )))] acc = dict(events_per_sid=0) elt = param['elt'] tempname = param['tempname'] aggby = param['aggregate_by'] haz_by_sid = general.group_array(gmfs, 'sid') losses_by_A = numpy.zeros((len(assets_df), len(elt.loss_names)), F32) for sid, asset_df in assets_df.groupby('site_id'): try: haz = haz_by_sid[sid] except KeyError: # no hazard here continue with mon_risk: assets = asset_df.to_records() # fast acc['events_per_sid'] += len(haz) assets_by_taxo = get_assets_by_taxo(assets, tempname) # fast out = get_output(crmodel, assets_by_taxo, haz) # slow with mon_agg: elt.aggregate(out, param['minimum_asset_loss'], aggby) # NB: after the aggregation out contains losses, not loss_ratios if param['avg_losses']: with mon_avg: ws = weights[haz['rlz']] for lni, ln in enumerate(elt.loss_names): losses_by_A[assets['ordinal'], lni] += out[ln] @ ws if len(gmfs): acc['events_per_sid'] /= len(gmfs) acc['alt'] = alt = {} for key, k in aggkey.items(): s = ','.join(map(str, key)) + ',' alt[s] = numpy.array([(eid, arr[k]) for eid, arr in elt.items() if arr[k].sum()], elt_dt) # in the demo there are 264/1694 nonzero events, i.e. arr[k].sum() # is zero most of the time if param['avg_losses']: acc['losses_by_A'] = losses_by_A * param['ses_ratio'] return acc
def calc_risk(df, param, monitor): """ :param df: a DataFrame of GMFs with fields sid, eid, gmv_... :param param: a dictionary of parameters coming from the job.ini :param monitor: a Monitor instance :returns: a dictionary of arrays with keys alt, losses_by_A """ mon_risk = monitor('computing risk', measuremem=False) mon_agg = monitor('aggregating losses', measuremem=False) mon_avg = monitor('averaging losses', measuremem=False) dstore = datastore.read(param['hdf5path']) with monitor('getting assets'): assets_df = dstore.read_df('assetcol/array', 'ordinal') with monitor('getting crmodel'): crmodel = monitor.read('crmodel') weights = dstore['weights'][()] acc = dict(events_per_sid=0) alt = copy.copy(param['alt']) # avoid issues with OQ_DISTRIBUTE=no tempname = param['tempname'] aggby = param['aggregate_by'] mal = param['minimum_asset_loss'] haz_by_sid = {s: d for s, d in df.groupby('sid')} losses_by_A = numpy.zeros((len(assets_df), len(alt.loss_names)), F32) acc['avg_gmf'] = avg_gmf = {} for col in df.columns: if col not in 'sid eid rlz': avg_gmf[col] = numpy.zeros(param['N'], F32) for sid, asset_df in assets_df.groupby('site_id'): try: haz = haz_by_sid[sid] except KeyError: # no hazard here continue with mon_risk: assets = asset_df.to_records() # fast acc['events_per_sid'] += len(haz) assets_by_taxo = get_assets_by_taxo(assets, tempname) # fast out = get_output(crmodel, assets_by_taxo, haz) # slow with mon_agg: alt.aggregate(out, mal, aggby) # NB: after the aggregation out contains losses, not loss_ratios ws = weights[haz['rlz']] for col in df.columns: if col not in 'sid eid rlz': avg_gmf[col][sid] = haz[col] @ ws if param['avg_losses']: with mon_avg: for lni, ln in enumerate(alt.loss_names): losses_by_A[assets['ordinal'], lni] += out[ln] @ ws if len(df): acc['events_per_sid'] /= len(df) acc['alt'] = alt.to_dframe() if param['avg_losses']: acc['losses_by_A'] = losses_by_A * param['ses_ratio'] return acc
def _calc_risk(hazard, param, monitor): gmfs = numpy.concatenate(hazard['gmfs']) events = numpy.concatenate(hazard['events']) mon_risk = monitor('computing risk', measuremem=False) mon_agg = monitor('aggregating losses', measuremem=False) dstore = datastore.read(param['hdf5path']) with monitor('getting assets'): assetcol = dstore['assetcol'] assets_by_site = assetcol.assets_by_site() with monitor('getting crmodel'): crmodel = riskmodels.CompositeRiskModel.read(dstore) weights = dstore['weights'][()] E = len(events) L = len(param['lba'].loss_names) A = sum(len(assets) for assets in assets_by_site) shape = assetcol.tagcol.agg_shape((E, L), param['aggregate_by']) elt_dt = [('event_id', U32), ('rlzi', U16), ('loss', (F32, shape[1:]))] acc = dict( elt=numpy.zeros(shape, F32), # shape (E, L, T...) alt=numpy.zeros((A, E, L), F32) if param['asset_loss_table'] else None, gmf_info=[], events_per_sid=0, lossbytes=0) arr = acc['elt'] alt = acc['alt'] lba = param['lba'] tempname = param['tempname'] tagnames = param['aggregate_by'] eid2rlz = dict(events[['id', 'rlz_id']]) eid2idx = {eid: idx for idx, eid in enumerate(eid2rlz)} for sid, haz in general.group_array(gmfs, 'sid').items(): assets_on_sid = assets_by_site[sid] if len(assets_on_sid) == 0: continue acc['events_per_sid'] += len(haz) if param['avg_losses']: ws = weights[[eid2rlz[eid] for eid in haz['eid']]] assets_by_taxo = get_assets_by_taxo(assets_on_sid, tempname) eidx = [eid2idx[eid] for eid in haz['eid']] with mon_risk: out = get_output(crmodel, assets_by_taxo, haz) with mon_agg: for a, asset in enumerate(assets_on_sid): aid = asset['ordinal'] tagi = asset[tagnames] if tagnames else () tagidxs = tuple(idx - 1 for idx in tagi) losses_by_lt = {} for lti, lt in enumerate(crmodel.loss_types): lratios = out[lt][a] if lt == 'occupants': losses = lratios * asset['occupants_None'] else: losses = lratios * asset['value-' + lt] if param['asset_loss_table']: alt[aid, eidx, lti] = losses losses_by_lt[lt] = losses for loss_idx, losses in lba.compute(asset, losses_by_lt): arr[(eidx, loss_idx) + tagidxs] += losses if param['avg_losses']: lba.losses_by_A[aid, loss_idx] += (losses @ ws * param['ses_ratio']) acc['lossbytes'] += losses.nbytes if len(gmfs): acc['events_per_sid'] /= len(gmfs) acc['gmf_info'] = numpy.array(hazard['gmf_info'], gmf_info_dt) acc['elt'] = numpy.fromiter( # this is ultra-fast ( (event['id'], event['rlz_id'], losses) # losses (L, T...) for event, losses in zip(events, arr) if losses.sum()), elt_dt) if param['avg_losses']: acc['losses_by_A'] = param['lba'].losses_by_A # without resetting the cache the sequential avg_losses would be wrong! del param['lba'].__dict__['losses_by_A'] if param['asset_loss_table']: acc['alt'] = alt, events['id'] return acc
def ebrisk(rupgetter, srcfilter, param, monitor): """ :param rupgetter: a RuptureGetter instance :param srcfilter: a SourceFilter instance :param param: a dictionary of parameters :param monitor: :class:`openquake.baselib.performance.Monitor` instance :returns: an ArrayWrapper with shape (E, L, T, ...) """ crmodel = param['crmodel'] lba = param['lba'] E = rupgetter.num_events L = len(lba.loss_names) N = len(srcfilter.sitecol.complete) e1 = rupgetter.first_event with monitor('getting assets', measuremem=False): with datastore.read(srcfilter.filename) as dstore: assetcol = dstore['assetcol'] assets_by_site = assetcol.assets_by_site() A = len(assetcol) getter = getters.GmfGetter(rupgetter, srcfilter, param['oqparam']) with monitor('getting hazard'): getter.init() # instantiate the computers hazard = getter.get_hazard_by_sid() # sid -> (sid, eid, gmv) mon_risk = monitor('computing risk', measuremem=False) mon_agg = monitor('aggregating losses', measuremem=False) events = rupgetter.get_eid_rlz() # numpy.testing.assert_equal(events['eid'], sorted(events['eid'])) eid2idx = dict(zip(events['eid'], range(e1, e1 + E))) tagnames = param['aggregate_by'] shape = assetcol.tagcol.agg_shape((E, L), tagnames) elt_dt = [('event_id', U64), ('rlzi', U16), ('loss', (F32, shape[1:]))] if param['asset_loss_table']: alt = numpy.zeros((A, E, L), F32) acc = numpy.zeros(shape, F32) # shape (E, L, T...) # NB: IMT-dependent weights are not supported in ebrisk times = numpy.zeros(N) # risk time per site_id num_events_per_sid = 0 epspath = param['epspath'] gmf_nbytes = 0 for sid, haz in hazard.items(): gmf_nbytes += haz.nbytes t0 = time.time() assets_on_sid = assets_by_site[sid] if len(assets_on_sid) == 0: continue num_events_per_sid += len(haz) if param['avg_losses']: weights = getter.weights[[ getter.eid2rlz[eid] for eid in haz['eid'] ]] assets_by_taxo = get_assets_by_taxo(assets_on_sid, epspath) eidx = numpy.array([eid2idx[eid] for eid in haz['eid']]) - e1 haz['eid'] = eidx + e1 with mon_risk: out = get_output(crmodel, assets_by_taxo, haz) with mon_agg: for a, asset in enumerate(assets_on_sid): aid = asset['ordinal'] tagi = asset[tagnames] if tagnames else () tagidxs = tuple(idx - 1 for idx in tagi) losses_by_lt = {} for lti, lt in enumerate(crmodel.loss_types): lratios = out[lt][a] if lt == 'occupants': losses = lratios * asset['occupants_None'] else: losses = lratios * asset['value-' + lt] if param['asset_loss_table']: alt[aid, eidx, lti] = losses losses_by_lt[lt] = losses for loss_idx, losses in lba.compute(asset, losses_by_lt): acc[(eidx, loss_idx) + tagidxs] += losses if param['avg_losses']: lba.losses_by_A[aid, loss_idx] += (losses @ weights * param['ses_ratio']) times[sid] = time.time() - t0 if hazard: num_events_per_sid /= len(hazard) with monitor('building event loss table'): elt = numpy.fromiter( ( (event['eid'], event['rlz'], losses) # losses (L, T...) for event, losses in zip(events, acc) if losses.sum()), elt_dt) agg = general.AccumDict(accum=numpy.zeros(shape[1:], F32)) # rlz->agg for rec in elt: agg[rec['rlzi']] += rec['loss'] * param['ses_ratio'] res = { 'elt': elt, 'agg_losses': agg, 'times': times, 'events_per_sid': num_events_per_sid, 'gmf_nbytes': gmf_nbytes } res['losses_by_A'] = lba.losses_by_A if param['asset_loss_table']: eidx = numpy.array([eid2idx[eid] for eid in events['eid']]) res['alt_eidx'] = alt, eidx return res
def calc_risk(gmfs, param, monitor): """ :param gmfs: an array of GMFs with fields sid, eid, gmv :param param: a dictionary of parameters coming from the job.ini :param monitor: a Monitor instance :returns: a dictionary of arrays with keys elt, alt, losses_by_A, ... """ mon_risk = monitor('computing risk', measuremem=False) mon_agg = monitor('aggregating losses', measuremem=False) dstore = datastore.read(param['hdf5path']) with monitor('getting assets'): assets_df = dstore.read_df('assetcol/array', 'ordinal') with monitor('getting crmodel'): crmodel = monitor.read('crmodel') weights = dstore['weights'][()] L = len(param['lba'].loss_names) elt_dt = [('event_id', U32), ('loss', (F32, (L, )))] # aggkey -> eid -> loss acc = dict(events_per_sid=0, numlosses=numpy.zeros(2, int)) # (kept, tot) lba = param['lba'] lba.alt = general.AccumDict( # idx -> eid -> loss accum=general.AccumDict(accum=numpy.zeros(L, F32))) lba.losses_by_E = general.AccumDict( # eid -> loss accum=numpy.zeros(L, F32)) tempname = param['tempname'] aggby = param['aggregate_by'] minimum_loss = [] for lt, lti in crmodel.lti.items(): val = param['minimum_asset_loss'][lt] minimum_loss.append(val) if lt in lba.policy_dict: # same order as in lba.compute minimum_loss.append(val) haz_by_sid = general.group_array(gmfs, 'sid') for sid, asset_df in assets_df.groupby('site_id'): try: haz = haz_by_sid[sid] except KeyError: # no hazard here continue with mon_risk: assets = asset_df.to_records() # fast acc['events_per_sid'] += len(haz) if param['avg_losses']: ws = weights[haz['rlz']] else: ws = None assets_by_taxo = get_assets_by_taxo(assets, tempname) # fast out = get_output(crmodel, assets_by_taxo, haz) # slow with mon_agg: tagidxs = assets[aggby] if aggby else None acc['numlosses'] += lba.aggregate(out, haz['eid'], minimum_loss, tagidxs, ws) if len(gmfs): acc['events_per_sid'] /= len(gmfs) acc['elt'] = numpy.fromiter( # this is ultra-fast ((eid, losses) for eid, losses in lba.losses_by_E.items() if losses.sum()), elt_dt) acc['alt'] = { idx: numpy.fromiter( # already sorted by aid, ultra-fast ((eid, loss) for eid, loss in lba.alt[idx].items()), elt_dt) for idx in lba.alt } if param['avg_losses']: acc['losses_by_A'] = param['lba'].losses_by_A * param['ses_ratio'] # without resetting the cache the sequential avg_losses would be wrong! del param['lba'].__dict__['losses_by_A'] return acc
def calc_risk(gmfs, param, monitor): mon_risk = monitor('computing risk', measuremem=False) mon_agg = monitor('aggregating losses', measuremem=False) eids = numpy.unique(gmfs['eid']) dstore = datastore.read(param['hdf5path']) with monitor('getting assets'): assetcol = dstore['assetcol'] assets_by_site = assetcol.assets_by_site() exposed_values = dstore['exposed_values/agg'][()] with monitor('getting crmodel'): crmodel = riskmodels.CompositeRiskModel.read(dstore) events = dstore['events'][list(eids)] weights = dstore['weights'][()] E = len(eids) L = len(param['lba'].loss_names) elt_dt = [('event_id', U32), ('rlzi', U16), ('loss', (F32, (L, )))] alt = general.AccumDict(accum=numpy.zeros(L, F32)) # aid, eid -> loss arr = numpy.zeros((E, L), F32) acc = dict(events_per_sid=0, numlosses=numpy.zeros(2, int)) # (kept, tot) lba = param['lba'] tempname = param['tempname'] eid2rlz = dict(events[['id', 'rlz_id']]) eid2idx = {eid: idx for idx, eid in enumerate(eids)} minimum_loss = [] fraction = param['minimum_loss_fraction'] / len(assetcol) for lt, lti in crmodel.lti.items(): val = exposed_values[lti] * fraction minimum_loss.append(val) if lt in lba.policy_dict: # same order as in lba.compute minimum_loss.append(val) for sid, haz in general.group_array(gmfs, 'sid').items(): assets_on_sid = assets_by_site[sid] if len(assets_on_sid) == 0: continue acc['events_per_sid'] += len(haz) if param['avg_losses']: ws = weights[[eid2rlz[eid] for eid in haz['eid']]] assets_by_taxo = get_assets_by_taxo(assets_on_sid, tempname) eidx = numpy.array([eid2idx[eid] for eid in haz['eid']]) with mon_risk: out = get_output(crmodel, assets_by_taxo, haz) for lti, lt in enumerate(crmodel.loss_types): lratios = out[lt] if lt == 'occupants': field = 'occupants_None' else: field = 'value-' + lt for a, asset in enumerate(assets_on_sid): aid = asset['ordinal'] ls = asset[field] * lratios[a] for loss_idx, losses in lba.compute(asset, ls, lt): kept = 0 with mon_agg: if param['aggregate_by']: for loss, eid in zip(losses, out.eids): if loss >= minimum_loss[loss_idx]: alt[aid, eid][loss_idx] = loss kept += 1 arr[eidx, loss_idx] += losses if param['avg_losses']: # this is really fast lba.losses_by_A[aid, loss_idx] += losses @ ws acc['numlosses'] += numpy.array([kept, len(losses)]) if len(gmfs): acc['events_per_sid'] /= len(gmfs) acc['elt'] = numpy.fromiter( # this is ultra-fast ((event['id'], event['rlz_id'], losses) for event, losses in zip(events, arr) if losses.sum()), elt_dt) acc['alt'] = alt = numpy.fromiter( # already sorted by aid ((aid, eid, eid2rlz[eid], loss) for (aid, eid), loss in alt.items()), param['ael_dt']) alt.sort(order='rlzi') acc['indices'] = general.get_indices(alt['rlzi']) if param['avg_losses']: acc['losses_by_A'] = param['lba'].losses_by_A * param['ses_ratio'] # without resetting the cache the sequential avg_losses would be wrong! del param['lba'].__dict__['losses_by_A'] return acc
def calc_risk(gmfs, param, monitor): """ :param gmfs: an array of GMFs with fields sid, eid, gmv :param param: a dictionary of parameters coming from the job.ini :param monitor: a Monitor instance :returns: a dictionary of arrays with keys alt, losses_by_A """ mon_risk = monitor('computing risk', measuremem=False) mon_agg = monitor('aggregating losses', measuremem=False) mon_avg = monitor('averaging losses', measuremem=False) dstore = datastore.read(param['hdf5path']) with monitor('getting assets'): assets_df = dstore.read_df('assetcol/array', 'ordinal') with monitor('getting crmodel'): crmodel = monitor.read('crmodel') weights = dstore['weights'][()] acc = dict(events_per_sid=0) alt = copy.copy(param['alt']) # avoid issues with OQ_DISTRIBUTE=no alt_dt = param['oqparam'].alt_dt() tempname = param['tempname'] aggby = param['aggregate_by'] haz_by_sid = general.group_array(gmfs, 'sid') losses_by_A = numpy.zeros((len(assets_df), len(alt.loss_names)), F32) acc['avg_gmf'] = avg_gmf = {} for col in gmfs.dtype.names: if col not in 'sid eid rlz': avg_gmf[col] = numpy.zeros(param['N'], F32) for sid, asset_df in assets_df.groupby('site_id'): try: haz = haz_by_sid[sid] except KeyError: # no hazard here continue with mon_risk: assets = asset_df.to_records() # fast acc['events_per_sid'] += len(haz) assets_by_taxo = get_assets_by_taxo(assets, tempname) # fast out = get_output(crmodel, assets_by_taxo, haz) # slow with mon_agg: alt.aggregate(out, param['minimum_asset_loss'], aggby) # NB: after the aggregation out contains losses, not loss_ratios ws = weights[haz['rlz']] for col in gmfs.dtype.names: if col not in 'sid eid rlz': avg_gmf[col][sid] = haz[col] @ ws if param['avg_losses']: with mon_avg: for lni, ln in enumerate(alt.loss_names): losses_by_A[assets['ordinal'], lni] += out[ln] @ ws if len(gmfs): acc['events_per_sid'] /= len(gmfs) out = [] for eid, arr in alt.items(): for k, vals in enumerate(arr): # arr has shape K, L' if vals.sum() > 0: # in the demo there are 264/1694 nonzero events, i.e. # vals.sum() is zero most of the time out.append((eid, k) + tuple(vals)) acc['alt'] = numpy.array(out, alt_dt) if param['avg_losses']: acc['losses_by_A'] = losses_by_A * param['ses_ratio'] return acc
def calc_risk(gmfs, param, monitor): """ :param gmfs: an array of GMFs with fields sid, eid, gmv :param param: a dictionary of parameters coming from the job.ini :param monitor: a Monitor instance :returns: a dictionary of arrays with keys elt, alt, losses_by_A, ... """ mon_risk = monitor('computing risk', measuremem=False) mon_agg = monitor('aggregating losses', measuremem=False) eids = numpy.unique(gmfs['eid']) dstore = datastore.read(param['hdf5path']) with monitor('getting assets'): assets_df = dstore.read_df('assetcol/array', 'ordinal') exposed_values = dstore['exposed_values/agg'][()] with monitor('getting crmodel'): crmodel = riskmodels.CompositeRiskModel.read(dstore) events = dstore['events'][list(eids)] weights = dstore['weights'][()] E = len(eids) L = len(param['lba'].loss_names) elt_dt = [('event_id', U32), ('rlzi', U16), ('loss', (F32, (L,)))] alt = general.AccumDict(accum=general.AccumDict(accum=numpy.zeros(L, F32))) # aggkey -> eid -> loss arr = numpy.zeros((E, L), F32) acc = dict(events_per_sid=0, numlosses=numpy.zeros(2, int)) # (kept, tot) lba = param['lba'] tempname = param['tempname'] eid2rlz = dict(events[['id', 'rlz_id']]) eid2idx = {eid: idx for idx, eid in enumerate(eids)} aggby = param['aggregate_by'] minimum_loss = [] fraction = param['minimum_loss_fraction'] / len(assets_df) for lt, lti in crmodel.lti.items(): val = exposed_values[lti] * fraction minimum_loss.append(val) if lt in lba.policy_dict: # same order as in lba.compute minimum_loss.append(val) haz_by_sid = general.group_array(gmfs, 'sid') for sid, asset_df in assets_df.groupby('site_id'): try: haz = haz_by_sid[sid] except KeyError: # no hazard here continue assets = asset_df.to_records() with mon_risk: acc['events_per_sid'] += len(haz) if param['avg_losses']: ws = weights[[eid2rlz[eid] for eid in haz['eid']]] assets_by_taxo = get_assets_by_taxo(assets, tempname) # fast eidx = numpy.array([eid2idx[eid] for eid in haz['eid']]) out = get_output(crmodel, assets_by_taxo, haz) with mon_agg: for lti, lt in enumerate(crmodel.loss_types): lratios = out[lt] if lt == 'occupants': field = 'occupants_None' else: field = 'value-' + lt if aggby: tagidxs = assets[aggby] for a, asset in enumerate(assets): if aggby: idx = ','.join(map(str, tagidxs[a])) aid = asset['ordinal'] ls = asset[field] * lratios[a] for loss_idx, losses in lba.compute(asset, ls, lt): kept = 0 if aggby: for loss, eid in zip(losses, out.eids): if loss >= minimum_loss[loss_idx]: alt[idx][eid][loss_idx] += loss kept += 1 arr[eidx, loss_idx] += losses if param['avg_losses']: # this is really fast lba.losses_by_A[aid, loss_idx] += losses @ ws acc['numlosses'] += numpy.array([kept, len(losses)]) if len(gmfs): acc['events_per_sid'] /= len(gmfs) acc['elt'] = numpy.fromiter( # this is ultra-fast ((event['id'], event['rlz_id'], losses) for event, losses in zip(events, arr) if losses.sum()), elt_dt) acc['alt'] = {idx: numpy.fromiter( # already sorted by aid, ultra-fast ((eid, eid2rlz[eid], loss) for eid, loss in alt[idx].items()), elt_dt) for idx in alt} if param['avg_losses']: acc['losses_by_A'] = param['lba'].losses_by_A * param['ses_ratio'] # without resetting the cache the sequential avg_losses would be wrong! del param['lba'].__dict__['losses_by_A'] return acc