예제 #1
0
def _disaggregate(cmaker,
                  sitecol,
                  ctxs,
                  iml2,
                  eps3,
                  pne_mon=performance.Monitor(),
                  gmf_mon=performance.Monitor()):
    # disaggregate (separate) PoE in different contributions
    # returns AccumDict with keys (poe, imt) and mags, dists, lons, lats
    acc = dict(pnes=[], mags=[], dists=[], lons=[], lats=[])
    try:
        gsim = cmaker.gsim_by_rlzi[iml2.rlzi]
    except KeyError:
        return pack(acc, 'mags dists lons lats pnes'.split())
    for rctx, dctx in ctxs:
        [dist] = dctx.rrup
        if gsim.minimum_distance and dist < gsim.minimum_distance:
            dist = gsim.minimum_distance
        acc['mags'].append(rctx.mag)
        acc['lons'].append(dctx.lon)
        acc['lats'].append(dctx.lat)
        acc['dists'].append(dist)
        with gmf_mon:
            mean_std = get_mean_std(sitecol, rctx, dctx, iml2.imts,
                                    [gsim])[..., 0]  # (2, N, M)
        with pne_mon:
            iml = numpy.array([
                to_distribution_values(lvl, imt)
                for imt, lvl in zip(iml2.imts, iml2)
            ])  # shape (M, P)
            pne = _disaggregate_pne(rctx, mean_std, iml, *eps3)
            acc['pnes'].append(pne)
    return pack(acc, 'mags dists lons lats pnes'.split())
예제 #2
0
def disaggregate(ctxs,
                 mean_std,
                 zs_by_g,
                 iml2dict,
                 eps3,
                 sid=0,
                 bin_edges=(),
                 pne_mon=performance.Monitor(),
                 mat_mon=performance.Monitor()):
    """
    :param ctxs: a list of U fat RuptureContexts
    :param imts: a list of Intensity Measure Type objects
    :param zs_by_g: a dictionary g -> Z indices
    :param imt: an Intensity Measure Type
    :param iml2dict: a dictionary of arrays imt -> (P, Z)
    :param eps3: a triplet (truncnorm, epsilons, eps_bands)
    :param pne_mon: monitor for the probabilities of no exceedance
    """
    # disaggregate (separate) PoE in different contributions
    U, E, M = len(ctxs), len(eps3[2]), len(iml2dict)
    iml2 = next(iter(iml2dict.values()))
    P, Z = iml2.shape
    dists = numpy.zeros(U)
    lons = numpy.zeros(U)
    lats = numpy.zeros(U)

    # switch to logarithmic intensities
    iml3 = numpy.zeros((M, P, Z))
    for m, (imt, iml2) in enumerate(iml2dict.items()):
        iml3[m] = to_distribution_values(iml2, imt)

    truncnorm, epsilons, eps_bands = eps3
    cum_bands = numpy.array([eps_bands[e:].sum() for e in range(E)] + [0])
    for u, ctx in enumerate(ctxs):
        dists[u] = ctx.rrup[sid]  # distance to the site
        lons[u] = ctx.clon[sid]  # closest point of the rupture lon
        lats[u] = ctx.clat[sid]  # closest point of the rupture lat
    with pne_mon:
        poes = numpy.zeros((U, E, M, P, Z))
        pnes = numpy.ones((U, E, M, P, Z))
        for g, zs in zs_by_g.items():
            for (m, p, z), iml in numpy.ndenumerate(iml3):
                if z in zs:
                    lvls = (iml - mean_std[0, :, sid, m, g]) / (
                        mean_std[1, :, sid, m, g])
                    idxs = numpy.searchsorted(epsilons, lvls)
                    poes[:, :, m, p, z] = _disagg_eps(truncnorm.sf(lvls), idxs,
                                                      eps_bands, cum_bands)
        for u, ctx in enumerate(ctxs):
            pnes[u] *= ctx.get_probability_no_exceedance(poes[u])
    bindata = BinData(dists, lons, lats, pnes)
    if not bin_edges:
        return bindata
    with mat_mon:
        return _build_disagg_matrix(bindata, bin_edges)
예제 #3
0
def disaggregate(mean_std,
                 rups,
                 imt,
                 imls,
                 eps3,
                 pne_mon=performance.Monitor()):
    # disaggregate (separate) PoE in different contributions
    U, P, E = len(rups), len(imls), len(eps3[2])
    bdata = BinData(mags=numpy.zeros(U),
                    dists=numpy.zeros(U),
                    lons=numpy.zeros(U),
                    lats=numpy.zeros(U),
                    pnes=numpy.zeros((U, P, E)))
    with pne_mon:
        truncnorm, epsilons, eps_bands = eps3
        cum_bands = numpy.array([eps_bands[e:].sum() for e in range(E)] + [0])
        imls = to_distribution_values(imls, imt)  # shape P
        for u, rup in enumerate(rups):
            bdata.mags[u] = rup.mag
            bdata.lons[u] = rup.lon
            bdata.lats[u] = rup.lat
            bdata.dists[u] = rup.rrup[0]
        for p, iml in enumerate(imls):
            lvls = (iml - mean_std[0]) / mean_std[1]
            survival = truncnorm.sf(lvls)
            bins = numpy.searchsorted(epsilons, lvls)
            for e, eps_band in enumerate(eps_bands):
                poes = _disagg_eps(survival, bins, e, eps_band, cum_bands)
                for u, rup in enumerate(rups):
                    bdata.pnes[u, p,
                               e] = rup.get_probability_no_exceedance(poes[u])
    return bdata
예제 #4
0
파일: run.py 프로젝트: ruthali/oq-risklib
def run(job_ini, concurrent_tasks=None,
        loglevel='info', hc=None, exports=''):
    """
    Run a calculation. Optionally, set the number of concurrent_tasks
    (0 to disable the parallelization).
    """
    logging.basicConfig(level=getattr(logging, loglevel.upper()))
    job_inis = job_ini.split(',')
    assert len(job_inis) in (1, 2), job_inis
    monitor = performance.Monitor('total', measuremem=True)

    if len(job_inis) == 1:  # run hazard or risk
        oqparam = readinput.get_oqparam(job_inis[0], hc_id=hc)
        if hc and hc < 0:  # interpret negative calculation ids
            calc_ids = datastore.get_calc_ids()
            try:
                hc = calc_ids[hc]
            except IndexError:
                raise SystemExit('There are %d old calculations, cannot '
                                 'retrieve the %s' % (len(calc_ids), hc))
        calc = base.calculators(oqparam, monitor)
        monitor.monitor_dir = calc.datastore.calc_dir
        with monitor:
            calc.run(concurrent_tasks=concurrent_tasks, exports=exports,
                     hazard_calculation_id=hc)
    else:  # run hazard + risk
        calc = run2(
            job_inis[0], job_inis[1], concurrent_tasks, exports, monitor)

    logging.info('Total time spent: %s s', monitor.duration)
    logging.info('Memory allocated: %s', general.humansize(monitor.mem))
    monitor.flush()
    print('See the output with hdfview %s/output.hdf5' %
          calc.datastore.calc_dir)
    return calc
예제 #5
0
def _run(job_ini, concurrent_tasks, pdb, reuse_input, loglevel, exports,
         params):
    global calc_path
    if 'hazard_calculation_id' in params:
        hc_id = int(params['hazard_calculation_id'])
        if hc_id < 0:  # interpret negative calculation ids
            calc_ids = datastore.get_calc_ids()
            try:
                params['hazard_calculation_id'] = calc_ids[hc_id]
            except IndexError:
                raise SystemExit('There are %d old calculations, cannot '
                                 'retrieve the %s' % (len(calc_ids), hc_id))
        else:
            params['hazard_calculation_id'] = hc_id
    dic = readinput.get_params(job_ini, params)
    # set the logs first of all
    log = logs.init("job", dic, getattr(logging, loglevel.upper()))

    # disable gzip_input
    base.BaseCalculator.gzip_inputs = lambda self: None
    with log, performance.Monitor('total runtime', measuremem=True) as monitor:
        calc = base.calculators(log.get_oqparam(), log.calc_id)
        if reuse_input:  # enable caching
            calc.oqparam.cachedir = datastore.get_datadir()
        calc.run(concurrent_tasks=concurrent_tasks, pdb=pdb, exports=exports)

    logging.info('Total time spent: %s s', monitor.duration)
    logging.info('Memory allocated: %s', general.humansize(monitor.mem))
    print('See the output with silx view %s' % calc.datastore.filename)
    calc_path, _ = os.path.splitext(calc.datastore.filename)  # used below
    return calc
예제 #6
0
def extract(what, calc_id=-1, webapi=False, local=False, extract_dir='.'):
    """
    Extract an output from the datastore and save it into an .hdf5 file.
    By default uses the WebAPI, otherwise the extraction is done locally.
    """
    with performance.Monitor('extract', measuremem=True) as mon:
        if local:
            if calc_id == -1:
                calc_id = logs.dbcmd('get_job', calc_id).id
            aw = WebExtractor(calc_id, 'http://localhost:8800', '').get(what)
        elif webapi:
            aw = WebExtractor(calc_id).get(what)
        else:
            aw = Extractor(calc_id).get(what)
        w = what.replace('/', '-').replace('?', '-')
        if isinstance(aw.array, str):  # a big string
            fname = os.path.join(extract_dir, '%s_%d.csv' % (w, calc_id))
            with open(fname, 'w', encoding='utf-8') as f:
                f.write(aw.array)
        elif aw.is_good():  # a regular ArrayWrapper
            fname = os.path.join(extract_dir, '%s_%d.npz' % (w, calc_id))
            hdf5.save_npz(aw, fname)
        else:  # ArrayWrapper of strings, dictionaries or other types
            fname = os.path.join(extract_dir, '%s_%d.txt' % (w, calc_id))
            open(fname, 'w').write(aw.toml())
        print('Saved', fname)
    if mon.duration > 1:
        print(mon)
예제 #7
0
def main(calc_id: int):
    """
    Reduce the source model of the given (pre)calculation by discarding all
    sources that do not contribute to the hazard.
    """
    if os.environ.get('OQ_DISTRIBUTE') not in ('no', 'processpool'):
        os.environ['OQ_DISTRIBUTE'] = 'processpool'
    with datastore.read(calc_id) as dstore:
        oqparam = dstore['oqparam']
        info = dstore['source_info'][()]
    src_ids = info['source_id']
    num_ids = len(src_ids)
    bad_ids = info[info['eff_ruptures'] == 0]['source_id']
    logging.info('Found %d far away sources', len(bad_ids))
    bad_ids = set(
        src_id.split(';')[0] for src_id in python3compat.decode(bad_ids))
    bad_dupl = bad_ids & get_dupl(python3compat.decode(src_ids))
    if bad_dupl:
        logging.info('Duplicates %s not removed' % bad_dupl)
    ok = info['eff_ruptures'] > 0
    if ok.sum() == 0:
        raise RuntimeError('All sources were filtered away!')
    ok_ids = general.group_array(info[ok][['source_id', 'code']], 'source_id')
    with performance.Monitor() as mon:
        good, total = readinput.reduce_source_model(
            oqparam.inputs['source_model_logic_tree'], ok_ids)
    logging.info('Removed %d/%d sources', total - good, num_ids)
    print(mon)
예제 #8
0
def reduce_sm(calc_id):
    """
    Reduce the source model of the given (pre)calculation by discarding all
    sources that do not contribute to the hazard.
    """
    with datastore.read(calc_id) as dstore:
        oqparam = dstore['oqparam']
        info = dstore['source_info'][()]
    bad_ids = set(info[info['eff_ruptures'] == 0]['source_id'])
    if len(bad_ids) == 0:
        logging.warning('All sources are relevant, nothing to remove')
        return
    ok = info['eff_ruptures'] > 0
    if ok.sum() == 0:
        raise RuntimeError('All sources were filtered away!')
    ok_ids = general.group_array(info[ok][['source_id', 'code']], 'source_id')
    with performance.Monitor() as mon:
        good, total = readinput.reduce_source_model(
            oqparam.inputs['source_model_logic_tree'], ok_ids)
    logging.info('Removed %d/%d sources', total - good, good)
    srcs, cnts = np.unique(info[['source_id', 'code']], return_counts=True)
    dupl = srcs[cnts > 1]
    if bad_ids & set(dict(dupl)):
        logging.info('There were duplicated sources %s', dupl)
    print(mon)
예제 #9
0
    def gen_outputs(self, riskinput, monitor=performance.Monitor()):
        """
        Group the assets per taxonomy and compute the outputs by using the
        underlying riskmodels. Yield the outputs generated as dictionaries
        out_by_lr.

        :param riskinput: a RiskInput instance
        :param monitor: a monitor object used to measure the performance
        """
        self.monitor = monitor
        hazard_getter = riskinput.hazard_getter
        with monitor('getting hazard'):
            hazard_getter.init()
        sids = hazard_getter.sids
        # group the assets by taxonomy
        dic = collections.defaultdict(list)
        for sid, assets in zip(sids, riskinput.assets_by_site):
            group = groupby(assets, by_taxonomy)
            for taxonomy in group:
                epsgetter = riskinput.epsilon_getter
                dic[taxonomy].append((sid, group[taxonomy], epsgetter))
        if hasattr(hazard_getter, 'rlzs_by_gsim'):
            # save memory in event based risk by working one gsim at the time
            for gsim in hazard_getter.rlzs_by_gsim:
                for out in self._gen_outputs(hazard_getter, dic, gsim):
                    yield out
        else:
            for out in self._gen_outputs(hazard_getter, dic, None):
                yield out

        if hasattr(hazard_getter, 'gmdata'):  # for event based risk
            riskinput.gmdata = hazard_getter.gmdata
예제 #10
0
def extract(what, calc_id=-1):
    """
    Extract an output from the datastore and save it into an .hdf5 file.
    """
    logging.basicConfig(level=logging.INFO)
    if calc_id < 0:
        calc_id = datastore.get_calc_ids()[calc_id]
    if dbserver.get_status() == 'running':
        job = dbcmd('get_job', calc_id)
        if job is not None:
            calc_id = job.ds_calc_dir + '.hdf5'
    dstore = datastore.read(calc_id)
    parent_id = dstore['oqparam'].hazard_calculation_id
    if parent_id:
        dstore.parent = datastore.read(parent_id)
    print('Emulating call to /v1/calc/%s/extract/%s' % (calc_id, quote(what)))
    with performance.Monitor('extract', measuremem=True) as mon, dstore:
        items = extract_(dstore, what)
        if not inspect.isgenerator(items):
            items = [(items.__class__.__name__, items)]
        fname = '%s_%d.hdf5' % (what.replace('/', '-').replace(
            '?', '-'), dstore.calc_id)
        hdf5.save(fname, items)
        print('Saved', fname)
    if mon.duration > 1:
        print(mon)
예제 #11
0
def extract(what, calc_id=-1, hostport=None):
    """
    Extract an output from the datastore and save it into an .hdf5 file.
    """
    logging.basicConfig(level=logging.INFO)
    if calc_id < 0:
        calc_id = datastore.get_calc_ids()[calc_id]
    hdf5path = None
    if dbserver.get_status() == 'running':
        job = dbcmd('get_job', calc_id)
        if job is not None:
            hdf5path = job.ds_calc_dir + '.hdf5'
    dstore = datastore.read(hdf5path or calc_id)
    parent_id = dstore['oqparam'].hazard_calculation_id
    if parent_id:
        dstore.parent = datastore.read(parent_id)
    urlpath = '/v1/calc/%d/extract/%s' % (calc_id, quote(what))
    with performance.Monitor('extract', measuremem=True) as mon, dstore:
        if hostport:
            data = urlopen('http://%s%s' % (hostport, urlpath)).read()
            items = (item for item in numpy.load(io.BytesIO(data)).items())
        else:
            print('Emulating call to %s' % urlpath)
            items = extract_(dstore, what)
        if not inspect.isgenerator(items):
            items = [(items.__class__.__name__, items)]
        fname = '%s_%d.hdf5' % (what.replace('/', '-').replace(
            '?', '-'), dstore.calc_id)
        hdf5.save(fname, items)
        print('Saved', fname)
    if mon.duration > 1:
        print(mon)
예제 #12
0
def main(what,
         calc_id: int = -1,
         webapi=False,
         local=False,
         *,
         extract_dir='.'):
    """
    Extract an output from the datastore and save it into an .hdf5 file.
    By default uses the WebAPI, otherwise the extraction is done locally.
    """
    with performance.Monitor('extract', measuremem=True) as mon:
        if local:
            if calc_id == -1:
                calc_id = logs.dbcmd('get_job', calc_id).id
            aw = WebExtractor(calc_id, 'http://localhost:8800', '').get(what)
        elif webapi:
            aw = WebExtractor(calc_id).get(what)
        else:
            aw = Extractor(calc_id).get(what)
        w = what.replace('/', '-').replace('?', '-')
        if hasattr(aw, 'array') and isinstance(aw.array, str):  # CSV string
            fname = os.path.join(extract_dir, '%s_%d.csv' % (w, calc_id))
            with open(fname, 'w', encoding='utf-8') as f:
                f.write(aw.array)
        else:  # save as npz
            fname = os.path.join(extract_dir, '%s_%d.npz' % (w, calc_id))
            hdf5.save_npz(aw, fname)
        print('Saved', fname)
    if mon.duration > 1:
        print(mon)
예제 #13
0
    def pfilter(self,
                src_filter,
                concurrent_tasks,
                monitor=performance.Monitor()):
        """
        Generate a new CompositeSourceModel by filtering the sources on
        the given site collection.

        :param src_filter: a SourceFilter instance
        :param concurrent_tasks: how many tasks to generate
        :param monitor: a Monitor instance
        :returns: a new CompositeSourceModel instance
        """
        sources_by_grp = src_filter.pfilter(self.get_sources(),
                                            concurrent_tasks, monitor)
        source_models = []
        for sm in self.source_models:
            src_groups = []
            for src_group in sm.src_groups:
                sg = copy.copy(src_group)
                sg.sources = sources_by_grp.get(sg.id, [])
                src_groups.append(sg)
            newsm = logictree.LtSourceModel(sm.names, sm.weight, sm.path,
                                            src_groups, sm.num_gsim_paths,
                                            sm.ordinal, sm.samples)
            source_models.append(newsm)
        new = self.__class__(self.gsim_lt, self.source_model_lt, source_models,
                             self.optimize_same_id)
        new.info.update_eff_ruptures(new.get_num_ruptures().__getitem__)
        return new
예제 #14
0
def reduce_sm(calc_id):
    """
    Reduce the source model of the given (pre)calculation by discarding all
    sources that do not contribute to the hazard.
    """
    with datastore.read(calc_id) as dstore:
        oqparam = dstore['oqparam']
        info = dstore['source_info'][()]
    num_ids = len(info['source_id'])
    bad_ids = set(info[info['eff_ruptures'] == 0]['source_id'])
    if len(bad_ids) == 0:
        dupl = info[info['multiplicity'] > 1]['source_id']
        if len(dupl) == 0:
            logging.info('Nothing to remove')
        else:
            logging.info('Nothing to remove, but there are duplicated source '
                         'IDs that could prevent the removal: %s' % dupl)
        return
    logging.info('Found %d far away sources', len(bad_ids))
    ok = info['eff_ruptures'] > 0
    if ok.sum() == 0:
        raise RuntimeError('All sources were filtered away!')
    ok_ids = general.group_array(info[ok][['source_id', 'code']], 'source_id')
    with performance.Monitor() as mon:
        good, total = readinput.reduce_source_model(
            oqparam.inputs['source_model_logic_tree'], ok_ids)
    logging.info('Removed %d/%d sources', total - good, num_ids)
    print(mon)
예제 #15
0
 def post_execute(self, times):
     """
     Compute and store average losses from the losses_by_event dataset,
     and then loss curves and maps.
     """
     self.datastore.set_attrs('task_info/start_ebrisk', times=times)
     oq = self.oqparam
     elt_length = len(self.datastore['losses_by_event'])
     builder = get_loss_builder(self.datastore)
     self.build_datasets(builder)
     mon = performance.Monitor(hdf5=hdf5.File(self.datastore.hdf5cache()))
     smap = parallel.Starmap(compute_loss_curves_maps, monitor=mon)
     self.datastore.close()
     acc = []
     ct = oq.concurrent_tasks or 1
     for elt_slice in general.split_in_slices(elt_length, ct):
         smap.submit(self.datastore.filename, elt_slice,
                     oq.conditional_loss_poes, oq.individual_curves)
     acc = smap.reduce(acc=[])
     # copy performance information from the cache to the datastore
     pd = mon.hdf5['performance_data'].value
     hdf5.extend3(self.datastore.filename, 'performance_data', pd)
     self.datastore.open('r+')  # reopen
     self.datastore['task_info/compute_loss_curves_and_maps'] = (
         mon.hdf5['task_info/compute_loss_curves_maps'].value)
     with self.monitor('saving loss_curves and maps', autoflush=True):
         for name, idx, arr in acc:
             for ij, val in numpy.ndenumerate(arr):
                 self.datastore[name][ij + idx] = val
예제 #16
0
파일: disagg.py 프로젝트: tieganh/oq-engine
def _disaggregate(cmaker,
                  sitecol,
                  rupdata,
                  indices,
                  iml2,
                  eps3,
                  pne_mon=performance.Monitor(),
                  gmf_mon=performance.Monitor()):
    # disaggregate (separate) PoE in different contributions
    # returns AccumDict with keys (poe, imt) and mags, dists, lons, lats
    [sid] = sitecol.sids
    acc = dict(pnes=[], mags=[], dists=[], lons=[], lats=[])
    try:
        gsim = cmaker.gsim_by_rlzi[iml2.rlzi]
    except KeyError:
        return pack(acc, 'mags dists lons lats pnes'.split())
    maxdist = cmaker.maximum_distance(cmaker.trt)
    fildist = rupdata[cmaker.filter_distance + '_']
    for ridx, sidx in enumerate(indices):
        if sidx == -1:  # no contribution for this site
            continue
        dist = fildist[ridx][sidx]
        if dist >= maxdist:
            continue
        elif gsim.minimum_distance and dist < gsim.minimum_distance:
            dist = gsim.minimum_distance
        rctx = contexts.RuptureContext(
            (par, val[ridx]) for par, val in rupdata.items())
        dctx = contexts.DistancesContext(
            (param, getattr(rctx, param + '_')[[sidx]])
            for param in cmaker.REQUIRES_DISTANCES)
        acc['mags'].append(rctx.mag)
        acc['lons'].append(rctx.lon_[sidx])
        acc['lats'].append(rctx.lat_[sidx])
        acc['dists'].append(dist)
        with gmf_mon:
            mean_std = get_mean_std(sitecol, rctx, dctx, iml2.imts,
                                    [gsim])[..., 0]  # (2, N, M)
        with pne_mon:
            iml = numpy.array([
                to_distribution_values(lvl, imt)
                for imt, lvl in zip(iml2.imts, iml2)
            ])  # shape (M, P)
            pne = _disaggregate_pne(rctx, mean_std, iml, *eps3)
            acc['pnes'].append(pne)
    return pack(acc, 'mags dists lons lats pnes'.split())
예제 #17
0
def download_shakemap(id):
    """
    Example of usage: utils/shakemap usp000fjta
    """
    with performance.Monitor('shakemap', measuremem=True) as mon:
        dest = '%s.npy' % id
        numpy.save(dest, download_array(id))
    print(mon)
    print('Saved %s' % dest)
예제 #18
0
def main(id):
    """
    Example of usage: utils/shakemap usp000fjta
    """
    with performance.Monitor('shakemap', measuremem=True) as mon:
        dest = '%s.npy' % id
        numpy.save(dest, get_array("usgs_id", id))
    print(mon)
    print('Saved %s' % dest)
예제 #19
0
 def test_extra_large_source(self):
     oq = readinput.get_oqparam('job.ini', case_21)
     mon = performance.Monitor('csm', datastore.hdf5new())
     with mock.patch('logging.error') as error:
         with mock.patch('openquake.hazardlib.geo.utils.MAX_EXTENT', 80):
             readinput.get_composite_source_model(oq, mon)
     mon.hdf5.close()
     os.remove(mon.hdf5.path)
     self.assertEqual(error.call_args[0][0],
                      'source SFLT2: too large: 84 km')
예제 #20
0
def prepare_site_model(exposure_xml,
                       vs30_csv,
                       grid_spacing=0,
                       site_param_distance=5,
                       output='sites.csv'):
    """
    Prepare a site_model.csv file from an exposure xml file, a vs30 csv file
    and a grid spacing which can be 0 (meaning no grid). Sites far away from
    the vs30 records are discarded and you can see them with the command
    `oq plot_assets`. It is up to you decide if you need to fix your exposure
    or if it is right to ignore the discarded sites.
    """
    logging.basicConfig(level=logging.INFO)
    hdf5 = datastore.hdf5new()
    with performance.Monitor(hdf5.path, hdf5, measuremem=True) as mon:
        mesh, assets_by_site = Exposure.read(
            exposure_xml, check_dupl=False).get_mesh_assets_by_site()
        mon.hdf5['assetcol'] = assetcol = site.SiteCollection.from_points(
            mesh.lons, mesh.lats, req_site_params={'vs30'})
        if grid_spacing:
            grid = mesh.get_convex_hull().dilate(grid_spacing).discretize(
                grid_spacing)
            haz_sitecol = site.SiteCollection.from_points(
                grid.lons, grid.lats, req_site_params={'vs30'})
            logging.info(
                'Reducing exposure grid with %d locations to %d sites'
                ' with assets', len(haz_sitecol), len(assets_by_site))
            haz_sitecol, assets_by, _discarded = assoc(assets_by_site,
                                                       haz_sitecol,
                                                       grid_spacing * SQRT2,
                                                       'filter')
            haz_sitecol.make_complete()
        else:
            haz_sitecol = assetcol
        vs30orig = read_vs30(vs30_csv.split(','))
        logging.info('Associating %d hazard sites to %d site parameters',
                     len(haz_sitecol), len(vs30orig))
        sitecol, vs30, discarded = assoc(
            vs30orig, haz_sitecol, grid_spacing * SQRT2 or site_param_distance,
            'filter')
        sitecol.array['vs30'] = vs30['vs30']
        mon.hdf5['sitecol'] = sitecol
        if discarded:
            mon.hdf5['discarded'] = numpy.array(discarded)
        sids = numpy.arange(len(vs30), dtype=numpy.uint32)
        sites = compose_arrays(sids, vs30, 'site_id')
        write_csv(output, sites)
    if discarded:
        logging.info('Discarded %d sites with assets [use oq plot_assets]',
                     len(discarded))
    logging.info('Saved %d rows in %s' % (len(sitecol), output))
    logging.info(mon)
    return sitecol
예제 #21
0
 def get_poes(self, mean_std, cmaker, ctx):
     """
     :returns: an array of shape (N, L)
     """
     cm = copy.copy(cmaker)
     cm.poe_mon = performance.Monitor()  # avoid double counts
     cm.gsims = self.gsims
     avgs = []
     for ctx, poes in cm.gen_poes([ctx]):
         # poes has shape N', L, G
         avgs.append(poes @ self.weights)
     return numpy.concatenate(avgs)  # shape (N, L)
예제 #22
0
def main(input):
    """
    Convert .xml and .npz files to .hdf5 files.
    """
    with performance.Monitor('to_hdf5') as mon:
        for input_file in input:
            if input_file.endswith('.npz'):
                output = convert_npz_hdf5(input_file, input_file[:-3] + 'hdf5')
            elif input_file.endswith('.xml'):  # for source model files
                output = convert_xml_hdf5(input_file, input_file[:-3] + 'hdf5')
            else:
                continue
            print('Generated %s' % output)
    print(mon)
예제 #23
0
def reduce_sm(calc_id):
    """
    Reduce the source model of the given (pre)calculation by discarding all
    sources that do not contribute to the hazard.
    """
    with datastore.read(calc_id) as dstore:
        oqparam = dstore['oqparam']
        info = dstore['source_info'].value
        ok = info['weight'] > 0
        source_ids = set(info[ok]['source_id'])
    with performance.Monitor() as mon:
        readinput.reduce_source_model(
            oqparam.inputs['source_model_logic_tree'], source_ids)
    print(mon)
예제 #24
0
파일: disagg.py 프로젝트: ventycn/oq-engine
def _disaggregate(cmaker,
                  sitecol,
                  rupdata,
                  indices,
                  iml2,
                  eps3,
                  pne_mon=performance.Monitor()):
    # disaggregate (separate) PoE in different contributions
    # returns AccumDict with keys (poe, imt) and mags, dists, lons, lats
    [sid] = sitecol.sids
    acc = AccumDict(accum=[],
                    mags=[],
                    dists=[],
                    lons=[],
                    lats=[],
                    M=len(iml2.imts),
                    P=len(iml2.poes_disagg))
    try:
        gsim = cmaker.gsim_by_rlzi[iml2.rlzi]
    except KeyError:
        return pack(acc, 'mags dists lons lats P M'.split())
    maxdist = cmaker.maximum_distance(cmaker.trt)
    fildist = rupdata[cmaker.filter_distance + '_']
    for ridx, sidx in enumerate(indices):
        if sidx == -1:  # no contribution for this site
            continue
        dist = fildist[ridx][sidx]
        if dist >= maxdist:
            continue
        elif gsim.minimum_distance and dist < gsim.minimum_distance:
            dist = gsim.minimum_distance
        rctx = contexts.RuptureContext()
        for par in rupdata:
            setattr(rctx, par, rupdata[par][ridx])
        dctx = contexts.DistancesContext(
            (param, getattr(rctx, param + '_')[[sidx]])
            for param in cmaker.REQUIRES_DISTANCES).roundup(
                gsim.minimum_distance)
        acc['mags'].append(rctx.mag)
        acc['lons'].append(rctx.lon_[sidx])
        acc['lats'].append(rctx.lat_[sidx])
        acc['dists'].append(dist)
        with pne_mon:
            for m, imt in enumerate(iml2.imts):
                for p, poe in enumerate(iml2.poes_disagg):
                    iml = iml2[m, p]
                    pne = disaggregate_pne(gsim, rctx, sitecol, dctx, imt, iml,
                                           *eps3)
                    acc[p, m].append(pne)
    return pack(acc, 'mags dists lons lats P M'.split())
예제 #25
0
    def gen_outputs(self,
                    riskinput,
                    monitor=performance.Monitor(),
                    assetcol=None):
        """
        Group the assets per taxonomy and compute the outputs by using the
        underlying riskmodels. Yield the outputs generated as dictionaries
        out_by_lr.

        :param riskinput: a RiskInput instance
        :param monitor: a monitor object used to measure the performance
        :param assetcol: not None only for event based risk
        """
        mon_context = monitor('building context')
        mon_hazard = monitor('building hazard')
        mon_risk = monitor('computing risk', measuremem=False)
        hazard_getter = riskinput.hazard_getter
        sids = hazard_getter.sids
        with mon_context:
            if assetcol is None:  # scenario, classical
                assets_by_site = riskinput.assets_by_site
            else:
                assets_by_site = assetcol.assets_by_site()
        # group the assets by taxonomy
        dic = collections.defaultdict(list)
        for sid, assets in zip(sids, assets_by_site):
            group = groupby(assets, by_taxonomy)
            for taxonomy in group:
                epsgetter = riskinput.epsilon_getter
                dic[taxonomy].append((sid, group[taxonomy], epsgetter))
        imti = {imt: i for i, imt in enumerate(hazard_getter.imtls)}
        if hasattr(hazard_getter, 'rlzs_by_gsim'):
            # save memory in event based risk by working one gsim at the time
            for gsim in hazard_getter.rlzs_by_gsim:
                with mon_hazard:
                    hazard = hazard_getter.get_hazard(gsim)
                with mon_risk:
                    for out in self._gen_outputs(hazard, imti, dic,
                                                 hazard_getter.eids):
                        yield out
        else:
            with mon_hazard:
                hazard = hazard_getter.get_hazard()
            with mon_risk:
                for out in self._gen_outputs(hazard, imti, dic,
                                             hazard_getter.eids):
                    yield out

        if hasattr(hazard_getter, 'gmdata'):  # for event based risk
            riskinput.gmdata = hazard_getter.gmdata
예제 #26
0
def expo2csv(job_ini):
    """
    Convert an exposure in XML format into CSV format
    """
    oq = readinput.get_oqparam(job_ini)
    exposure = readinput.get_exposure(oq)
    rows = []
    header = ['asset_ref', 'number', 'area', 'taxonomy', 'lon', 'lat']
    for costname in exposure.cost_types['name']:
        if costname != 'occupants':
            header.append(costname)
            header.append(costname + '-deductible')
            header.append(costname + '-insured_limit')
    header.extend(exposure.occupancy_periods)
    header.extend(exposure.tagnames)
    for asset, asset_ref in zip(exposure.assets, exposure.asset_refs):
        row = [
            asset_ref.decode('utf8'), asset.number, asset.area, asset.taxonomy,
            asset.location[0], asset.location[1]
        ]
        for costname in exposure.cost_types['name']:
            if costname != 'occupants':
                row.append(asset.values[costname])
                row.append(asset.deductibles.get(costname, '?'))
                row.append(asset.insurance_limits.get(costname, '?'))
        for time_event in exposure.occupancy_periods:
            row.append(asset.value(time_event))
        for tagname, tagidx in zip(exposure.tagnames, asset.tagidxs):
            row.append(tagidx)
        rows.append(row)

    with performance.Monitor('expo2csv') as mon:
        # save exposure data as csv
        csvname = oq.inputs['exposure'].replace('.xml', '.csv')
        print('Saving %s' % csvname)
        with open(csvname, 'w') as f:
            writer = csv.writer(f)
            writer.writerow(header)
            for row in rows:
                writer.writerow(row)

        # save exposure header as xml
        head = nrml.read(oq.inputs['exposure'], stop='assets')
        xmlname = oq.inputs['exposure'].replace('.xml', '-header.xml')
        print('Saving %s' % xmlname)
        head[0].assets.text = os.path.basename(csvname)
        with open(xmlname, 'wb') as f:
            nrml.write(head, f)
    print(mon)
예제 #27
0
def export_asset_loss_table(ekey, dstore):
    """
    Export in parallel the asset loss table from the datastore.

    NB1: for large calculation this may run out of memory
    NB2: due to an heisenbug in the parallel reading of .hdf5 files this works
    reliably only if the datastore has been created by a different process

    The recommendation is: *do not use this exporter*: rather, study its source
    code and write what you need. Every postprocessing is different.
    """
    key, fmt = ekey
    oq = dstore['oqparam']
    assetcol = dstore['assetcol']
    arefs = dstore['asset_refs'].value
    avals = assetcol.values()
    loss_types = dstore.get_attr('all_loss_ratios', 'loss_types').split()
    dtlist = [(lt, F32) for lt in loss_types]
    if oq.insured_losses:
        for lt in loss_types:
            dtlist.append((lt + '_ins', F32))
    lrs_dt = numpy.dtype([('rlzi', U16), ('losses', dtlist)])
    fname = dstore.export_path('%s.%s' % ekey)
    monitor = performance.Monitor(key, fname)
    lrgetter = riskinput.LossRatiosGetter(dstore)
    aids = range(len(assetcol))
    allargs = [(lrgetter, list(block), monitor)
               for block in split_in_blocks(aids, oq.concurrent_tasks)]
    dstore.close()  # avoid OSError: Can't read data (Wrong b-tree signature)
    L = len(loss_types)
    with hdf5.File(fname, 'w') as f:
        nbytes = 0
        total = numpy.zeros(len(dtlist), F32)
        for pairs in parallel.Starmap(get_loss_ratios, allargs):
            for aid, data in pairs:
                asset = assetcol[aid]
                avalue = avals[aid]
                for l, lt in enumerate(loss_types):
                    aval = avalue[lt]
                    for i in range(oq.insured_losses + 1):
                        data['ratios'][:, l + L * i] *= aval
                aref = arefs[asset.idx]
                f[b'asset_loss_table/' + aref] = data.view(lrs_dt)
                total += data['ratios'].sum(axis=0)
                nbytes += data.nbytes
        f['asset_loss_table'].attrs['loss_types'] = ' '.join(loss_types)
        f['asset_loss_table'].attrs['total'] = total
        f['asset_loss_table'].attrs['nbytes'] = nbytes
    return [fname]
예제 #28
0
 def get_gmfdata(self, mon=performance.Monitor()):
     """
     :returns: an array of the dtype (sid, eid, gmv)
     """
     alldata = []
     self.sig_eps = []
     self.times = []  # rup_id, nsites, dt
     for computer in self.gen_computers(mon):
         data, dt = computer.compute_all(
             self.min_iml, self.rlzs_by_gsim, self.sig_eps)
         self.times.append((computer.ebrupture.id, len(computer.sids), dt))
         alldata.append(data)
     if not alldata:
         return []
     return numpy.concatenate(alldata)
예제 #29
0
def get_effect(mags, sitecol, gsims_by_trt, oq):
    """
    :returns: an ArrayWrapper effect_by_mag_dst_trt
    Also updates oq.maximum_distance.magdist and oq.pointsource_distance
    """
    dist_bins = {
        trt: oq.maximum_distance.get_dist_bins(trt)
        for trt in gsims_by_trt
    }
    # computing the effect make sense only if all IMTs have the same
    # unity of measure; for simplicity we will consider only PGA and SA
    effect = {}
    imts_with_period = [
        imt for imt in oq.imtls if imt == 'PGA' or imt.startswith('SA')
    ]
    imts_ok = len(imts_with_period) == len(oq.imtls)
    aw = hdf5.ArrayWrapper((), dist_bins)
    if sitecol is None:
        return aw
    if len(sitecol) >= oq.max_sites_disagg and imts_ok:
        logging.info('Computing effect of the ruptures')
        mon = performance.Monitor('rupture effect')
        eff_by_mag = parallel.Starmap.apply(
            get_effect_by_mag, (mags, sitecol.one(), gsims_by_trt,
                                oq.maximum_distance, oq.imtls, mon)).reduce()
        aw.array = eff_by_mag
        effect.update({
            trt: Effect({mag: eff_by_mag[mag][:, t]
                         for mag in eff_by_mag}, dist_bins[trt])
            for t, trt in enumerate(gsims_by_trt)
        })
        minint = oq.minimum_intensity.get('default', 0)
        for trt, eff in effect.items():
            if minint:
                oq.maximum_distance.magdist[trt] = eff.dist_by_mag(minint)
            # replace pointsource_distance with a dict trt -> mag -> dst
            if oq.pointsource_distance['default']:
                oq.pointsource_distance[trt] = eff.dist_by_mag(
                    eff.collapse_value(oq.pointsource_distance['default']))
    elif oq.pointsource_distance['default']:
        # replace pointsource_distance with a dict trt -> mag -> dst
        for trt in gsims_by_trt:
            try:
                dst = getdefault(oq.pointsource_distance, trt)
            except TypeError:  # 'NoneType' object is not subscriptable
                dst = getdefault(oq.maximum_distance, trt)
            oq.pointsource_distance[trt] = {mag: dst for mag in mags}
    return aw
예제 #30
0
def extract(what, calc_id, webapi=True):
    """
    Extract an output from the datastore and save it into an .hdf5 file.
    By default uses the WebAPI, otherwise the extraction is done locally.
    """
    with performance.Monitor('extract', measuremem=True) as mon:
        if webapi:
            obj = WebExtractor(calc_id).get(what)
        else:
            obj = Extractor(calc_id).get(what)
        fname = '%s_%d.hdf5' % (what.replace('/', '-').replace('?',
                                                               '-'), calc_id)
        obj.save(fname)
        print('Saved', fname)
    if mon.duration > 1:
        print(mon)