Beispiel #1
0
def calc_risk(gmfs, param, monitor):
    """
    :param gmfs: an array of GMFs with fields sid, eid, gmv
    :param param: a dictionary of parameters coming from the job.ini
    :param monitor: a Monitor instance
    :returns: a dictionary of arrays with keys elt, alt, losses_by_A, ...
    """
    mon_risk = monitor('computing risk', measuremem=False)
    mon_agg = monitor('aggregating losses', measuremem=False)
    mon_avg = monitor('averaging losses', measuremem=False)
    dstore = datastore.read(param['hdf5path'])
    with monitor('getting assets'):
        assets_df = dstore.read_df('assetcol/array', 'ordinal')
    with monitor('getting crmodel'):
        crmodel = monitor.read('crmodel')
        weights = dstore['weights'][()]
    L = len(param['elt'].loss_names)
    aggkey = param['aggkey']
    elt_dt = [('event_id', U32), ('loss', (F32, (L, )))]
    acc = dict(events_per_sid=0)
    elt = param['elt']
    tempname = param['tempname']
    aggby = param['aggregate_by']
    haz_by_sid = general.group_array(gmfs, 'sid')
    losses_by_A = numpy.zeros((len(assets_df), len(elt.loss_names)), F32)
    for sid, asset_df in assets_df.groupby('site_id'):
        try:
            haz = haz_by_sid[sid]
        except KeyError:  # no hazard here
            continue
        with mon_risk:
            assets = asset_df.to_records()  # fast
            acc['events_per_sid'] += len(haz)
            assets_by_taxo = get_assets_by_taxo(assets, tempname)  # fast
            out = get_output(crmodel, assets_by_taxo, haz)  # slow
        with mon_agg:
            elt.aggregate(out, param['minimum_asset_loss'], aggby)
            # NB: after the aggregation out contains losses, not loss_ratios
        if param['avg_losses']:
            with mon_avg:
                ws = weights[haz['rlz']]
                for lni, ln in enumerate(elt.loss_names):
                    losses_by_A[assets['ordinal'], lni] += out[ln] @ ws
    if len(gmfs):
        acc['events_per_sid'] /= len(gmfs)
    acc['alt'] = alt = {}
    for key, k in aggkey.items():
        s = ','.join(map(str, key)) + ','
        alt[s] = numpy.array([(eid, arr[k])
                              for eid, arr in elt.items() if arr[k].sum()],
                             elt_dt)
        # in the demo there are 264/1694 nonzero events, i.e. arr[k].sum()
        # is zero most of the time
    if param['avg_losses']:
        acc['losses_by_A'] = losses_by_A * param['ses_ratio']
    return acc
Beispiel #2
0
def calc_risk(df, param, monitor):
    """
    :param df: a DataFrame of GMFs with fields sid, eid, gmv_...
    :param param: a dictionary of parameters coming from the job.ini
    :param monitor: a Monitor instance
    :returns: a dictionary of arrays with keys alt, losses_by_A
    """
    mon_risk = monitor('computing risk', measuremem=False)
    mon_agg = monitor('aggregating losses', measuremem=False)
    mon_avg = monitor('averaging losses', measuremem=False)
    dstore = datastore.read(param['hdf5path'])
    with monitor('getting assets'):
        assets_df = dstore.read_df('assetcol/array', 'ordinal')
    with monitor('getting crmodel'):
        crmodel = monitor.read('crmodel')
        weights = dstore['weights'][()]
    acc = dict(events_per_sid=0)
    alt = copy.copy(param['alt'])  # avoid issues with OQ_DISTRIBUTE=no
    tempname = param['tempname']
    aggby = param['aggregate_by']
    mal = param['minimum_asset_loss']
    haz_by_sid = {s: d for s, d in df.groupby('sid')}
    losses_by_A = numpy.zeros((len(assets_df), len(alt.loss_names)), F32)
    acc['avg_gmf'] = avg_gmf = {}
    for col in df.columns:
        if col not in 'sid eid rlz':
            avg_gmf[col] = numpy.zeros(param['N'], F32)

    for sid, asset_df in assets_df.groupby('site_id'):
        try:
            haz = haz_by_sid[sid]
        except KeyError:  # no hazard here
            continue
        with mon_risk:
            assets = asset_df.to_records()  # fast
            acc['events_per_sid'] += len(haz)
            assets_by_taxo = get_assets_by_taxo(assets, tempname)  # fast
            out = get_output(crmodel, assets_by_taxo, haz)  # slow
        with mon_agg:
            alt.aggregate(out, mal, aggby)
            # NB: after the aggregation out contains losses, not loss_ratios
        ws = weights[haz['rlz']]
        for col in df.columns:
            if col not in 'sid eid rlz':
                avg_gmf[col][sid] = haz[col] @ ws
        if param['avg_losses']:
            with mon_avg:
                for lni, ln in enumerate(alt.loss_names):
                    losses_by_A[assets['ordinal'], lni] += out[ln] @ ws
    if len(df):
        acc['events_per_sid'] /= len(df)
    acc['alt'] = alt.to_dframe()
    if param['avg_losses']:
        acc['losses_by_A'] = losses_by_A * param['ses_ratio']
    return acc
Beispiel #3
0
def _calc_risk(hazard, param, monitor):
    gmfs = numpy.concatenate(hazard['gmfs'])
    events = numpy.concatenate(hazard['events'])
    mon_risk = monitor('computing risk', measuremem=False)
    mon_agg = monitor('aggregating losses', measuremem=False)
    dstore = datastore.read(param['hdf5path'])
    with monitor('getting assets'):
        assetcol = dstore['assetcol']
        assets_by_site = assetcol.assets_by_site()
    with monitor('getting crmodel'):
        crmodel = riskmodels.CompositeRiskModel.read(dstore)
        weights = dstore['weights'][()]
    E = len(events)
    L = len(param['lba'].loss_names)
    A = sum(len(assets) for assets in assets_by_site)
    shape = assetcol.tagcol.agg_shape((E, L), param['aggregate_by'])
    elt_dt = [('event_id', U32), ('rlzi', U16), ('loss', (F32, shape[1:]))]
    acc = dict(
        elt=numpy.zeros(shape, F32),  # shape (E, L, T...)
        alt=numpy.zeros((A, E, L), F32) if param['asset_loss_table'] else None,
        gmf_info=[],
        events_per_sid=0,
        lossbytes=0)
    arr = acc['elt']
    alt = acc['alt']
    lba = param['lba']
    tempname = param['tempname']
    tagnames = param['aggregate_by']
    eid2rlz = dict(events[['id', 'rlz_id']])
    eid2idx = {eid: idx for idx, eid in enumerate(eid2rlz)}

    for sid, haz in general.group_array(gmfs, 'sid').items():
        assets_on_sid = assets_by_site[sid]
        if len(assets_on_sid) == 0:
            continue
        acc['events_per_sid'] += len(haz)
        if param['avg_losses']:
            ws = weights[[eid2rlz[eid] for eid in haz['eid']]]
        assets_by_taxo = get_assets_by_taxo(assets_on_sid, tempname)
        eidx = [eid2idx[eid] for eid in haz['eid']]
        with mon_risk:
            out = get_output(crmodel, assets_by_taxo, haz)
        with mon_agg:
            for a, asset in enumerate(assets_on_sid):
                aid = asset['ordinal']
                tagi = asset[tagnames] if tagnames else ()
                tagidxs = tuple(idx - 1 for idx in tagi)
                losses_by_lt = {}
                for lti, lt in enumerate(crmodel.loss_types):
                    lratios = out[lt][a]
                    if lt == 'occupants':
                        losses = lratios * asset['occupants_None']
                    else:
                        losses = lratios * asset['value-' + lt]
                    if param['asset_loss_table']:
                        alt[aid, eidx, lti] = losses
                    losses_by_lt[lt] = losses
                for loss_idx, losses in lba.compute(asset, losses_by_lt):
                    arr[(eidx, loss_idx) + tagidxs] += losses
                    if param['avg_losses']:
                        lba.losses_by_A[aid, loss_idx] += (losses @ ws *
                                                           param['ses_ratio'])
                    acc['lossbytes'] += losses.nbytes
    if len(gmfs):
        acc['events_per_sid'] /= len(gmfs)
    acc['gmf_info'] = numpy.array(hazard['gmf_info'], gmf_info_dt)
    acc['elt'] = numpy.fromiter(  # this is ultra-fast
        (
            (event['id'], event['rlz_id'], losses)  # losses (L, T...)
            for event, losses in zip(events, arr) if losses.sum()),
        elt_dt)
    if param['avg_losses']:
        acc['losses_by_A'] = param['lba'].losses_by_A
        # without resetting the cache the sequential avg_losses would be wrong!
        del param['lba'].__dict__['losses_by_A']
    if param['asset_loss_table']:
        acc['alt'] = alt, events['id']
    return acc
Beispiel #4
0
def ebrisk(rupgetter, srcfilter, param, monitor):
    """
    :param rupgetter:
        a RuptureGetter instance
    :param srcfilter:
        a SourceFilter instance
    :param param:
        a dictionary of parameters
    :param monitor:
        :class:`openquake.baselib.performance.Monitor` instance
    :returns:
        an ArrayWrapper with shape (E, L, T, ...)
    """
    riskmodel = param['riskmodel']
    E = rupgetter.num_events
    L = len(riskmodel.lti)
    N = len(srcfilter.sitecol.complete)
    e1 = rupgetter.first_event
    with monitor('getting assets', measuremem=False):
        with datastore.read(srcfilter.filename) as dstore:
            assetcol = dstore['assetcol']
        assets_by_site = assetcol.assets_by_site()
    A = len(assetcol)
    getter = getters.GmfGetter(rupgetter, srcfilter, param['oqparam'])
    with monitor('getting hazard'):
        getter.init()  # instantiate the computers
        hazard = getter.get_hazard()  # sid -> (sid, eid, gmv)
    mon_risk = monitor('computing risk', measuremem=False)
    mon_agg = monitor('aggregating losses', measuremem=False)
    events = rupgetter.get_eid_rlz()
    # numpy.testing.assert_equal(events['eid'], sorted(events['eid']))
    eid2idx = dict(zip(events['eid'], range(e1, e1 + E)))
    tagnames = param['aggregate_by']
    shape = assetcol.tagcol.agg_shape((E, L), tagnames)
    elt_dt = [('eid', U64), ('rlzi', U16), ('loss', (F32, shape[1:]))]
    if param['asset_loss_table']:
        alt = numpy.zeros((A, E, L), F32)
    acc = numpy.zeros(shape, F32)  # shape (E, L, T...)
    if param['avg_losses']:
        losses_by_A = numpy.zeros((A, L), F32)
    else:
        losses_by_A = 0
    # NB: IMT-dependent weights are not supported in ebrisk
    times = numpy.zeros(N)  # risk time per site_id
    num_events_per_sid = 0
    epspath = param['epspath']
    gmf_nbytes = 0
    for sid, haz in hazard.items():
        gmf_nbytes += haz.nbytes
        t0 = time.time()
        assets_on_sid = assets_by_site[sid]
        if len(assets_on_sid) == 0:
            continue
        num_events_per_sid += len(haz)
        if param['avg_losses']:
            weights = getter.weights[
                [getter.eid2rlz[eid] for eid in haz['eid']]]
        assets_by_taxo = get_assets_by_taxo(assets_on_sid, epspath)
        eidx = numpy.array([eid2idx[eid] for eid in haz['eid']]) - e1
        haz['eid'] = eidx + e1
        with mon_risk:
            out = riskmodel.get_output(assets_by_taxo, haz)
        with mon_agg:
            for a, asset in enumerate(assets_on_sid):
                aid = asset['ordinal']
                tagi = asset[tagnames] if tagnames else ()
                tagidxs = tuple(idx - 1 for idx in tagi)
                for lti, lt in enumerate(riskmodel.loss_types):
                    lratios = out[lt][a]
                    if lt == 'occupants':
                        losses = lratios * asset['occupants_None']
                    else:
                        losses = lratios * asset['value-' + lt]
                    if param['asset_loss_table']:
                        alt[aid, eidx, lti] = losses
                    acc[(eidx, lti) + tagidxs] += losses
                    if param['avg_losses']:
                        losses_by_A[aid, lti] += losses @ weights
            times[sid] = time.time() - t0
    if hazard:
        num_events_per_sid /= len(hazard)
    with monitor('building event loss table'):
        elt = numpy.fromiter(
            ((event['eid'], event['rlz'], losses)
             for event, losses in zip(events, acc) if losses.sum()), elt_dt)
        agg = general.AccumDict(accum=numpy.zeros(shape[1:], F32))  # rlz->agg
        for rec in elt:
            agg[rec['rlzi']] += rec['loss'] * param['ses_ratio']
    res = {'elt': elt, 'agg_losses': agg, 'times': times,
           'events_per_sid': num_events_per_sid, 'gmf_nbytes': gmf_nbytes}
    if param['avg_losses']:
        res['losses_by_A'] = losses_by_A * param['ses_ratio']
    if param['asset_loss_table']:
        eidx = numpy.array([eid2idx[eid] for eid in events['eid']])
        res['alt_eidx'] = alt, eidx
    return res
Beispiel #5
0
def ebrisk(rupgetter, srcfilter, param, monitor):
    """
    :param rupgetter:
        a RuptureGetter instance
    :param srcfilter:
        a SourceFilter instance
    :param param:
        a dictionary of parameters
    :param monitor:
        :class:`openquake.baselib.performance.Monitor` instance
    :returns:
        an ArrayWrapper with shape (E, L, T, ...)
    """
    riskmodel = param['riskmodel']
    E = rupgetter.num_events
    L = len(riskmodel.lti)
    N = len(srcfilter.sitecol.complete)
    e1 = rupgetter.first_event
    with monitor('getting assets', measuremem=False):
        with datastore.read(srcfilter.filename) as dstore:
            assetcol = dstore['assetcol']
        assets_by_site = assetcol.assets_by_site()
    A = len(assetcol)
    getter = getters.GmfGetter(rupgetter, srcfilter, param['oqparam'])
    with monitor('getting hazard'):
        getter.init()  # instantiate the computers
        hazard = getter.get_hazard_by_sid()  # sid -> (sid, eid, gmv)
    mon_risk = monitor('computing risk', measuremem=False)
    mon_agg = monitor('aggregating losses', measuremem=False)
    events = rupgetter.get_eid_rlz()
    # numpy.testing.assert_equal(events['eid'], sorted(events['eid']))
    eid2idx = dict(zip(events['eid'], range(e1, e1 + E)))
    tagnames = param['aggregate_by']
    shape = assetcol.tagcol.agg_shape((E, L), tagnames)
    elt_dt = [('eid', U64), ('rlzi', U16), ('loss', (F32, shape[1:]))]
    if param['asset_loss_table']:
        alt = numpy.zeros((A, E, L), F32)
    acc = numpy.zeros(shape, F32)  # shape (E, L, T...)
    if param['avg_losses']:
        losses_by_A = numpy.zeros((A, L), F32)
    else:
        losses_by_A = 0
    # NB: IMT-dependent weights are not supported in ebrisk
    times = numpy.zeros(N)  # risk time per site_id
    num_events_per_sid = 0
    epspath = param['epspath']
    gmf_nbytes = 0
    for sid, haz in hazard.items():
        gmf_nbytes += haz.nbytes
        t0 = time.time()
        assets_on_sid = assets_by_site[sid]
        if len(assets_on_sid) == 0:
            continue
        num_events_per_sid += len(haz)
        if param['avg_losses']:
            weights = getter.weights[[
                getter.eid2rlz[eid] for eid in haz['eid']
            ]]
        assets_by_taxo = get_assets_by_taxo(assets_on_sid, epspath)
        eidx = numpy.array([eid2idx[eid] for eid in haz['eid']]) - e1
        haz['eid'] = eidx + e1
        with mon_risk:
            out = riskmodel.get_output(assets_by_taxo, haz)
        with mon_agg:
            for a, asset in enumerate(assets_on_sid):
                aid = asset['ordinal']
                tagi = asset[tagnames] if tagnames else ()
                tagidxs = tuple(idx - 1 for idx in tagi)
                for lti, lt in enumerate(riskmodel.loss_types):
                    lratios = out[lt][a]
                    if lt == 'occupants':
                        losses = lratios * asset['occupants_None']
                    else:
                        losses = lratios * asset['value-' + lt]
                    if param['asset_loss_table']:
                        alt[aid, eidx, lti] = losses
                    acc[(eidx, lti) + tagidxs] += losses
                    if param['avg_losses']:
                        losses_by_A[aid, lti] += losses @ weights
            times[sid] = time.time() - t0
    if hazard:
        num_events_per_sid /= len(hazard)
    with monitor('building event loss table'):
        elt = numpy.fromiter(
            ((event['eid'], event['rlz'], losses)
             for event, losses in zip(events, acc) if losses.sum()), elt_dt)
        agg = general.AccumDict(accum=numpy.zeros(shape[1:], F32))  # rlz->agg
        for rec in elt:
            agg[rec['rlzi']] += rec['loss'] * param['ses_ratio']
    res = {
        'elt': elt,
        'agg_losses': agg,
        'times': times,
        'events_per_sid': num_events_per_sid,
        'gmf_nbytes': gmf_nbytes
    }
    if param['avg_losses']:
        res['losses_by_A'] = losses_by_A * param['ses_ratio']
    if param['asset_loss_table']:
        eidx = numpy.array([eid2idx[eid] for eid in events['eid']])
        res['alt_eidx'] = alt, eidx
    return res
Beispiel #6
0
def calc_risk(gmfs, param, monitor):
    """
    :param gmfs: an array of GMFs with fields sid, eid, gmv
    :param param: a dictionary of parameters coming from the job.ini
    :param monitor: a Monitor instance
    :returns: a dictionary of arrays with keys elt, alt, losses_by_A, ...
    """
    mon_risk = monitor('computing risk', measuremem=False)
    mon_agg = monitor('aggregating losses', measuremem=False)
    dstore = datastore.read(param['hdf5path'])
    with monitor('getting assets'):
        assets_df = dstore.read_df('assetcol/array', 'ordinal')
    with monitor('getting crmodel'):
        crmodel = monitor.read('crmodel')
        weights = dstore['weights'][()]
    L = len(param['lba'].loss_names)
    elt_dt = [('event_id', U32), ('loss', (F32, (L, )))]
    # aggkey -> eid -> loss
    acc = dict(events_per_sid=0, numlosses=numpy.zeros(2, int))  # (kept, tot)
    lba = param['lba']
    lba.alt = general.AccumDict(  # idx -> eid -> loss
        accum=general.AccumDict(accum=numpy.zeros(L, F32)))
    lba.losses_by_E = general.AccumDict(  # eid -> loss
        accum=numpy.zeros(L, F32))
    tempname = param['tempname']
    aggby = param['aggregate_by']

    minimum_loss = []
    for lt, lti in crmodel.lti.items():
        val = param['minimum_asset_loss'][lt]
        minimum_loss.append(val)
        if lt in lba.policy_dict:  # same order as in lba.compute
            minimum_loss.append(val)

    haz_by_sid = general.group_array(gmfs, 'sid')
    for sid, asset_df in assets_df.groupby('site_id'):
        try:
            haz = haz_by_sid[sid]
        except KeyError:  # no hazard here
            continue
        with mon_risk:
            assets = asset_df.to_records()  # fast
            acc['events_per_sid'] += len(haz)
            if param['avg_losses']:
                ws = weights[haz['rlz']]
            else:
                ws = None
            assets_by_taxo = get_assets_by_taxo(assets, tempname)  # fast
            out = get_output(crmodel, assets_by_taxo, haz)  # slow
        with mon_agg:
            tagidxs = assets[aggby] if aggby else None
            acc['numlosses'] += lba.aggregate(out, haz['eid'], minimum_loss,
                                              tagidxs, ws)
    if len(gmfs):
        acc['events_per_sid'] /= len(gmfs)
    acc['elt'] = numpy.fromiter(  # this is ultra-fast
        ((eid, losses)
         for eid, losses in lba.losses_by_E.items() if losses.sum()), elt_dt)
    acc['alt'] = {
        idx: numpy.fromiter(  # already sorted by aid, ultra-fast
            ((eid, loss) for eid, loss in lba.alt[idx].items()), elt_dt)
        for idx in lba.alt
    }
    if param['avg_losses']:
        acc['losses_by_A'] = param['lba'].losses_by_A * param['ses_ratio']
        # without resetting the cache the sequential avg_losses would be wrong!
        del param['lba'].__dict__['losses_by_A']
    return acc
Beispiel #7
0
def calc_risk(gmfs, param, monitor):
    mon_risk = monitor('computing risk', measuremem=False)
    mon_agg = monitor('aggregating losses', measuremem=False)
    eids = numpy.unique(gmfs['eid'])
    dstore = datastore.read(param['hdf5path'])
    with monitor('getting assets'):
        assetcol = dstore['assetcol']
        assets_by_site = assetcol.assets_by_site()
        exposed_values = dstore['exposed_values/agg'][()]
    with monitor('getting crmodel'):
        crmodel = riskmodels.CompositeRiskModel.read(dstore)
        events = dstore['events'][list(eids)]
        weights = dstore['weights'][()]
    E = len(eids)
    L = len(param['lba'].loss_names)
    elt_dt = [('event_id', U32), ('rlzi', U16), ('loss', (F32, (L, )))]
    alt = general.AccumDict(accum=numpy.zeros(L, F32))  # aid, eid -> loss
    arr = numpy.zeros((E, L), F32)
    acc = dict(events_per_sid=0, numlosses=numpy.zeros(2, int))  # (kept, tot)
    lba = param['lba']
    tempname = param['tempname']
    eid2rlz = dict(events[['id', 'rlz_id']])
    eid2idx = {eid: idx for idx, eid in enumerate(eids)}

    minimum_loss = []
    fraction = param['minimum_loss_fraction'] / len(assetcol)
    for lt, lti in crmodel.lti.items():
        val = exposed_values[lti] * fraction
        minimum_loss.append(val)
        if lt in lba.policy_dict:  # same order as in lba.compute
            minimum_loss.append(val)

    for sid, haz in general.group_array(gmfs, 'sid').items():
        assets_on_sid = assets_by_site[sid]
        if len(assets_on_sid) == 0:
            continue
        acc['events_per_sid'] += len(haz)
        if param['avg_losses']:
            ws = weights[[eid2rlz[eid] for eid in haz['eid']]]
        assets_by_taxo = get_assets_by_taxo(assets_on_sid, tempname)
        eidx = numpy.array([eid2idx[eid] for eid in haz['eid']])
        with mon_risk:
            out = get_output(crmodel, assets_by_taxo, haz)
        for lti, lt in enumerate(crmodel.loss_types):
            lratios = out[lt]
            if lt == 'occupants':
                field = 'occupants_None'
            else:
                field = 'value-' + lt
            for a, asset in enumerate(assets_on_sid):
                aid = asset['ordinal']
                ls = asset[field] * lratios[a]
                for loss_idx, losses in lba.compute(asset, ls, lt):
                    kept = 0
                    with mon_agg:
                        if param['aggregate_by']:
                            for loss, eid in zip(losses, out.eids):
                                if loss >= minimum_loss[loss_idx]:
                                    alt[aid, eid][loss_idx] = loss
                                    kept += 1
                        arr[eidx, loss_idx] += losses
                    if param['avg_losses']:  # this is really fast
                        lba.losses_by_A[aid, loss_idx] += losses @ ws
                    acc['numlosses'] += numpy.array([kept, len(losses)])
    if len(gmfs):
        acc['events_per_sid'] /= len(gmfs)
    acc['elt'] = numpy.fromiter(  # this is ultra-fast
        ((event['id'], event['rlz_id'], losses)
         for event, losses in zip(events, arr) if losses.sum()), elt_dt)
    acc['alt'] = alt = numpy.fromiter(  # already sorted by aid
        ((aid, eid, eid2rlz[eid], loss) for (aid, eid), loss in alt.items()),
        param['ael_dt'])
    alt.sort(order='rlzi')
    acc['indices'] = general.get_indices(alt['rlzi'])
    if param['avg_losses']:
        acc['losses_by_A'] = param['lba'].losses_by_A * param['ses_ratio']
        # without resetting the cache the sequential avg_losses would be wrong!
        del param['lba'].__dict__['losses_by_A']
    return acc
Beispiel #8
0
def calc_risk(gmfs, param, monitor):
    """
    :param gmfs: an array of GMFs with fields sid, eid, gmv
    :param param: a dictionary of parameters coming from the job.ini
    :param monitor: a Monitor instance
    :returns: a dictionary of arrays with keys alt, losses_by_A
    """
    mon_risk = monitor('computing risk', measuremem=False)
    mon_agg = monitor('aggregating losses', measuremem=False)
    mon_avg = monitor('averaging losses', measuremem=False)
    dstore = datastore.read(param['hdf5path'])
    with monitor('getting assets'):
        assets_df = dstore.read_df('assetcol/array', 'ordinal')
    with monitor('getting crmodel'):
        crmodel = monitor.read('crmodel')
        weights = dstore['weights'][()]
    acc = dict(events_per_sid=0)
    alt = copy.copy(param['alt'])  # avoid issues with OQ_DISTRIBUTE=no
    alt_dt = param['oqparam'].alt_dt()
    tempname = param['tempname']
    aggby = param['aggregate_by']
    haz_by_sid = general.group_array(gmfs, 'sid')
    losses_by_A = numpy.zeros((len(assets_df), len(alt.loss_names)), F32)
    acc['avg_gmf'] = avg_gmf = {}
    for col in gmfs.dtype.names:
        if col not in 'sid eid rlz':
            avg_gmf[col] = numpy.zeros(param['N'], F32)

    for sid, asset_df in assets_df.groupby('site_id'):
        try:
            haz = haz_by_sid[sid]
        except KeyError:  # no hazard here
            continue
        with mon_risk:
            assets = asset_df.to_records()  # fast
            acc['events_per_sid'] += len(haz)
            assets_by_taxo = get_assets_by_taxo(assets, tempname)  # fast
            out = get_output(crmodel, assets_by_taxo, haz)  # slow
        with mon_agg:
            alt.aggregate(out, param['minimum_asset_loss'], aggby)
            # NB: after the aggregation out contains losses, not loss_ratios
        ws = weights[haz['rlz']]
        for col in gmfs.dtype.names:
            if col not in 'sid eid rlz':
                avg_gmf[col][sid] = haz[col] @ ws
        if param['avg_losses']:
            with mon_avg:
                for lni, ln in enumerate(alt.loss_names):
                    losses_by_A[assets['ordinal'], lni] += out[ln] @ ws
    if len(gmfs):
        acc['events_per_sid'] /= len(gmfs)
    out = []
    for eid, arr in alt.items():
        for k, vals in enumerate(arr):  # arr has shape K, L'
            if vals.sum() > 0:
                # in the demo there are 264/1694 nonzero events, i.e.
                # vals.sum() is zero most of the time
                out.append((eid, k) + tuple(vals))
    acc['alt'] = numpy.array(out, alt_dt)
    if param['avg_losses']:
        acc['losses_by_A'] = losses_by_A * param['ses_ratio']
    return acc
Beispiel #9
0
def calc_risk(gmfs, param, monitor):
    """
    :param gmfs: an array of GMFs with fields sid, eid, gmv
    :param param: a dictionary of parameters coming from the job.ini
    :param monitor: a Monitor instance
    :returns: a dictionary of arrays with keys elt, alt, losses_by_A, ...
    """
    mon_risk = monitor('computing risk', measuremem=False)
    mon_agg = monitor('aggregating losses', measuremem=False)
    eids = numpy.unique(gmfs['eid'])
    dstore = datastore.read(param['hdf5path'])
    with monitor('getting assets'):
        assets_df = dstore.read_df('assetcol/array', 'ordinal')
        exposed_values = dstore['exposed_values/agg'][()]
    with monitor('getting crmodel'):
        crmodel = riskmodels.CompositeRiskModel.read(dstore)
        events = dstore['events'][list(eids)]
        weights = dstore['weights'][()]
    E = len(eids)
    L = len(param['lba'].loss_names)
    elt_dt = [('event_id', U32), ('rlzi', U16), ('loss', (F32, (L,)))]
    alt = general.AccumDict(accum=general.AccumDict(accum=numpy.zeros(L, F32)))
    # aggkey -> eid -> loss
    arr = numpy.zeros((E, L), F32)
    acc = dict(events_per_sid=0, numlosses=numpy.zeros(2, int))  # (kept, tot)
    lba = param['lba']
    tempname = param['tempname']
    eid2rlz = dict(events[['id', 'rlz_id']])
    eid2idx = {eid: idx for idx, eid in enumerate(eids)}
    aggby = param['aggregate_by']

    minimum_loss = []
    fraction = param['minimum_loss_fraction'] / len(assets_df)
    for lt, lti in crmodel.lti.items():
        val = exposed_values[lti] * fraction
        minimum_loss.append(val)
        if lt in lba.policy_dict:  # same order as in lba.compute
            minimum_loss.append(val)

    haz_by_sid = general.group_array(gmfs, 'sid')
    for sid, asset_df in assets_df.groupby('site_id'):
        try:
            haz = haz_by_sid[sid]
        except KeyError:  # no hazard here
            continue
        assets = asset_df.to_records()
        with mon_risk:
            acc['events_per_sid'] += len(haz)
            if param['avg_losses']:
                ws = weights[[eid2rlz[eid] for eid in haz['eid']]]
            assets_by_taxo = get_assets_by_taxo(assets, tempname)  # fast
            eidx = numpy.array([eid2idx[eid] for eid in haz['eid']])
            out = get_output(crmodel, assets_by_taxo, haz)
        with mon_agg:
            for lti, lt in enumerate(crmodel.loss_types):
                lratios = out[lt]
                if lt == 'occupants':
                    field = 'occupants_None'
                else:
                    field = 'value-' + lt
                if aggby:
                    tagidxs = assets[aggby]
                for a, asset in enumerate(assets):
                    if aggby:
                        idx = ','.join(map(str, tagidxs[a]))
                    aid = asset['ordinal']
                    ls = asset[field] * lratios[a]
                    for loss_idx, losses in lba.compute(asset, ls, lt):
                        kept = 0
                        if aggby:
                            for loss, eid in zip(losses, out.eids):
                                if loss >= minimum_loss[loss_idx]:
                                    alt[idx][eid][loss_idx] += loss
                                    kept += 1
                        arr[eidx, loss_idx] += losses
                        if param['avg_losses']:  # this is really fast
                            lba.losses_by_A[aid, loss_idx] += losses @ ws
                        acc['numlosses'] += numpy.array([kept, len(losses)])
    if len(gmfs):
        acc['events_per_sid'] /= len(gmfs)
    acc['elt'] = numpy.fromiter(  # this is ultra-fast
        ((event['id'], event['rlz_id'], losses)
         for event, losses in zip(events, arr) if losses.sum()), elt_dt)
    acc['alt'] = {idx: numpy.fromiter(  # already sorted by aid, ultra-fast
        ((eid, eid2rlz[eid], loss) for eid, loss in alt[idx].items()),
        elt_dt) for idx in alt}
    if param['avg_losses']:
        acc['losses_by_A'] = param['lba'].losses_by_A * param['ses_ratio']
        # without resetting the cache the sequential avg_losses would be wrong!
        del param['lba'].__dict__['losses_by_A']
    return acc