def _disaggregate(cmaker, sitecol, ctxs, iml2, eps3, pne_mon=performance.Monitor(), gmf_mon=performance.Monitor()): # disaggregate (separate) PoE in different contributions # returns AccumDict with keys (poe, imt) and mags, dists, lons, lats acc = dict(pnes=[], mags=[], dists=[], lons=[], lats=[]) try: gsim = cmaker.gsim_by_rlzi[iml2.rlzi] except KeyError: return pack(acc, 'mags dists lons lats pnes'.split()) for rctx, dctx in ctxs: [dist] = dctx.rrup if gsim.minimum_distance and dist < gsim.minimum_distance: dist = gsim.minimum_distance acc['mags'].append(rctx.mag) acc['lons'].append(dctx.lon) acc['lats'].append(dctx.lat) acc['dists'].append(dist) with gmf_mon: mean_std = get_mean_std(sitecol, rctx, dctx, iml2.imts, [gsim])[..., 0] # (2, N, M) with pne_mon: iml = numpy.array([ to_distribution_values(lvl, imt) for imt, lvl in zip(iml2.imts, iml2) ]) # shape (M, P) pne = _disaggregate_pne(rctx, mean_std, iml, *eps3) acc['pnes'].append(pne) return pack(acc, 'mags dists lons lats pnes'.split())
def disaggregate(ctxs, mean_std, zs_by_g, iml2dict, eps3, sid=0, bin_edges=(), pne_mon=performance.Monitor(), mat_mon=performance.Monitor()): """ :param ctxs: a list of U fat RuptureContexts :param imts: a list of Intensity Measure Type objects :param zs_by_g: a dictionary g -> Z indices :param imt: an Intensity Measure Type :param iml2dict: a dictionary of arrays imt -> (P, Z) :param eps3: a triplet (truncnorm, epsilons, eps_bands) :param pne_mon: monitor for the probabilities of no exceedance """ # disaggregate (separate) PoE in different contributions U, E, M = len(ctxs), len(eps3[2]), len(iml2dict) iml2 = next(iter(iml2dict.values())) P, Z = iml2.shape dists = numpy.zeros(U) lons = numpy.zeros(U) lats = numpy.zeros(U) # switch to logarithmic intensities iml3 = numpy.zeros((M, P, Z)) for m, (imt, iml2) in enumerate(iml2dict.items()): iml3[m] = to_distribution_values(iml2, imt) truncnorm, epsilons, eps_bands = eps3 cum_bands = numpy.array([eps_bands[e:].sum() for e in range(E)] + [0]) for u, ctx in enumerate(ctxs): dists[u] = ctx.rrup[sid] # distance to the site lons[u] = ctx.clon[sid] # closest point of the rupture lon lats[u] = ctx.clat[sid] # closest point of the rupture lat with pne_mon: poes = numpy.zeros((U, E, M, P, Z)) pnes = numpy.ones((U, E, M, P, Z)) for g, zs in zs_by_g.items(): for (m, p, z), iml in numpy.ndenumerate(iml3): if z in zs: lvls = (iml - mean_std[0, :, sid, m, g]) / ( mean_std[1, :, sid, m, g]) idxs = numpy.searchsorted(epsilons, lvls) poes[:, :, m, p, z] = _disagg_eps(truncnorm.sf(lvls), idxs, eps_bands, cum_bands) for u, ctx in enumerate(ctxs): pnes[u] *= ctx.get_probability_no_exceedance(poes[u]) bindata = BinData(dists, lons, lats, pnes) if not bin_edges: return bindata with mat_mon: return _build_disagg_matrix(bindata, bin_edges)
def disaggregate(mean_std, rups, imt, imls, eps3, pne_mon=performance.Monitor()): # disaggregate (separate) PoE in different contributions U, P, E = len(rups), len(imls), len(eps3[2]) bdata = BinData(mags=numpy.zeros(U), dists=numpy.zeros(U), lons=numpy.zeros(U), lats=numpy.zeros(U), pnes=numpy.zeros((U, P, E))) with pne_mon: truncnorm, epsilons, eps_bands = eps3 cum_bands = numpy.array([eps_bands[e:].sum() for e in range(E)] + [0]) imls = to_distribution_values(imls, imt) # shape P for u, rup in enumerate(rups): bdata.mags[u] = rup.mag bdata.lons[u] = rup.lon bdata.lats[u] = rup.lat bdata.dists[u] = rup.rrup[0] for p, iml in enumerate(imls): lvls = (iml - mean_std[0]) / mean_std[1] survival = truncnorm.sf(lvls) bins = numpy.searchsorted(epsilons, lvls) for e, eps_band in enumerate(eps_bands): poes = _disagg_eps(survival, bins, e, eps_band, cum_bands) for u, rup in enumerate(rups): bdata.pnes[u, p, e] = rup.get_probability_no_exceedance(poes[u]) return bdata
def run(job_ini, concurrent_tasks=None, loglevel='info', hc=None, exports=''): """ Run a calculation. Optionally, set the number of concurrent_tasks (0 to disable the parallelization). """ logging.basicConfig(level=getattr(logging, loglevel.upper())) job_inis = job_ini.split(',') assert len(job_inis) in (1, 2), job_inis monitor = performance.Monitor('total', measuremem=True) if len(job_inis) == 1: # run hazard or risk oqparam = readinput.get_oqparam(job_inis[0], hc_id=hc) if hc and hc < 0: # interpret negative calculation ids calc_ids = datastore.get_calc_ids() try: hc = calc_ids[hc] except IndexError: raise SystemExit('There are %d old calculations, cannot ' 'retrieve the %s' % (len(calc_ids), hc)) calc = base.calculators(oqparam, monitor) monitor.monitor_dir = calc.datastore.calc_dir with monitor: calc.run(concurrent_tasks=concurrent_tasks, exports=exports, hazard_calculation_id=hc) else: # run hazard + risk calc = run2( job_inis[0], job_inis[1], concurrent_tasks, exports, monitor) logging.info('Total time spent: %s s', monitor.duration) logging.info('Memory allocated: %s', general.humansize(monitor.mem)) monitor.flush() print('See the output with hdfview %s/output.hdf5' % calc.datastore.calc_dir) return calc
def _run(job_ini, concurrent_tasks, pdb, reuse_input, loglevel, exports, params): global calc_path if 'hazard_calculation_id' in params: hc_id = int(params['hazard_calculation_id']) if hc_id < 0: # interpret negative calculation ids calc_ids = datastore.get_calc_ids() try: params['hazard_calculation_id'] = calc_ids[hc_id] except IndexError: raise SystemExit('There are %d old calculations, cannot ' 'retrieve the %s' % (len(calc_ids), hc_id)) else: params['hazard_calculation_id'] = hc_id dic = readinput.get_params(job_ini, params) # set the logs first of all log = logs.init("job", dic, getattr(logging, loglevel.upper())) # disable gzip_input base.BaseCalculator.gzip_inputs = lambda self: None with log, performance.Monitor('total runtime', measuremem=True) as monitor: calc = base.calculators(log.get_oqparam(), log.calc_id) if reuse_input: # enable caching calc.oqparam.cachedir = datastore.get_datadir() calc.run(concurrent_tasks=concurrent_tasks, pdb=pdb, exports=exports) logging.info('Total time spent: %s s', monitor.duration) logging.info('Memory allocated: %s', general.humansize(monitor.mem)) print('See the output with silx view %s' % calc.datastore.filename) calc_path, _ = os.path.splitext(calc.datastore.filename) # used below return calc
def extract(what, calc_id=-1, webapi=False, local=False, extract_dir='.'): """ Extract an output from the datastore and save it into an .hdf5 file. By default uses the WebAPI, otherwise the extraction is done locally. """ with performance.Monitor('extract', measuremem=True) as mon: if local: if calc_id == -1: calc_id = logs.dbcmd('get_job', calc_id).id aw = WebExtractor(calc_id, 'http://localhost:8800', '').get(what) elif webapi: aw = WebExtractor(calc_id).get(what) else: aw = Extractor(calc_id).get(what) w = what.replace('/', '-').replace('?', '-') if isinstance(aw.array, str): # a big string fname = os.path.join(extract_dir, '%s_%d.csv' % (w, calc_id)) with open(fname, 'w', encoding='utf-8') as f: f.write(aw.array) elif aw.is_good(): # a regular ArrayWrapper fname = os.path.join(extract_dir, '%s_%d.npz' % (w, calc_id)) hdf5.save_npz(aw, fname) else: # ArrayWrapper of strings, dictionaries or other types fname = os.path.join(extract_dir, '%s_%d.txt' % (w, calc_id)) open(fname, 'w').write(aw.toml()) print('Saved', fname) if mon.duration > 1: print(mon)
def main(calc_id: int): """ Reduce the source model of the given (pre)calculation by discarding all sources that do not contribute to the hazard. """ if os.environ.get('OQ_DISTRIBUTE') not in ('no', 'processpool'): os.environ['OQ_DISTRIBUTE'] = 'processpool' with datastore.read(calc_id) as dstore: oqparam = dstore['oqparam'] info = dstore['source_info'][()] src_ids = info['source_id'] num_ids = len(src_ids) bad_ids = info[info['eff_ruptures'] == 0]['source_id'] logging.info('Found %d far away sources', len(bad_ids)) bad_ids = set( src_id.split(';')[0] for src_id in python3compat.decode(bad_ids)) bad_dupl = bad_ids & get_dupl(python3compat.decode(src_ids)) if bad_dupl: logging.info('Duplicates %s not removed' % bad_dupl) ok = info['eff_ruptures'] > 0 if ok.sum() == 0: raise RuntimeError('All sources were filtered away!') ok_ids = general.group_array(info[ok][['source_id', 'code']], 'source_id') with performance.Monitor() as mon: good, total = readinput.reduce_source_model( oqparam.inputs['source_model_logic_tree'], ok_ids) logging.info('Removed %d/%d sources', total - good, num_ids) print(mon)
def reduce_sm(calc_id): """ Reduce the source model of the given (pre)calculation by discarding all sources that do not contribute to the hazard. """ with datastore.read(calc_id) as dstore: oqparam = dstore['oqparam'] info = dstore['source_info'][()] bad_ids = set(info[info['eff_ruptures'] == 0]['source_id']) if len(bad_ids) == 0: logging.warning('All sources are relevant, nothing to remove') return ok = info['eff_ruptures'] > 0 if ok.sum() == 0: raise RuntimeError('All sources were filtered away!') ok_ids = general.group_array(info[ok][['source_id', 'code']], 'source_id') with performance.Monitor() as mon: good, total = readinput.reduce_source_model( oqparam.inputs['source_model_logic_tree'], ok_ids) logging.info('Removed %d/%d sources', total - good, good) srcs, cnts = np.unique(info[['source_id', 'code']], return_counts=True) dupl = srcs[cnts > 1] if bad_ids & set(dict(dupl)): logging.info('There were duplicated sources %s', dupl) print(mon)
def gen_outputs(self, riskinput, monitor=performance.Monitor()): """ Group the assets per taxonomy and compute the outputs by using the underlying riskmodels. Yield the outputs generated as dictionaries out_by_lr. :param riskinput: a RiskInput instance :param monitor: a monitor object used to measure the performance """ self.monitor = monitor hazard_getter = riskinput.hazard_getter with monitor('getting hazard'): hazard_getter.init() sids = hazard_getter.sids # group the assets by taxonomy dic = collections.defaultdict(list) for sid, assets in zip(sids, riskinput.assets_by_site): group = groupby(assets, by_taxonomy) for taxonomy in group: epsgetter = riskinput.epsilon_getter dic[taxonomy].append((sid, group[taxonomy], epsgetter)) if hasattr(hazard_getter, 'rlzs_by_gsim'): # save memory in event based risk by working one gsim at the time for gsim in hazard_getter.rlzs_by_gsim: for out in self._gen_outputs(hazard_getter, dic, gsim): yield out else: for out in self._gen_outputs(hazard_getter, dic, None): yield out if hasattr(hazard_getter, 'gmdata'): # for event based risk riskinput.gmdata = hazard_getter.gmdata
def extract(what, calc_id=-1): """ Extract an output from the datastore and save it into an .hdf5 file. """ logging.basicConfig(level=logging.INFO) if calc_id < 0: calc_id = datastore.get_calc_ids()[calc_id] if dbserver.get_status() == 'running': job = dbcmd('get_job', calc_id) if job is not None: calc_id = job.ds_calc_dir + '.hdf5' dstore = datastore.read(calc_id) parent_id = dstore['oqparam'].hazard_calculation_id if parent_id: dstore.parent = datastore.read(parent_id) print('Emulating call to /v1/calc/%s/extract/%s' % (calc_id, quote(what))) with performance.Monitor('extract', measuremem=True) as mon, dstore: items = extract_(dstore, what) if not inspect.isgenerator(items): items = [(items.__class__.__name__, items)] fname = '%s_%d.hdf5' % (what.replace('/', '-').replace( '?', '-'), dstore.calc_id) hdf5.save(fname, items) print('Saved', fname) if mon.duration > 1: print(mon)
def extract(what, calc_id=-1, hostport=None): """ Extract an output from the datastore and save it into an .hdf5 file. """ logging.basicConfig(level=logging.INFO) if calc_id < 0: calc_id = datastore.get_calc_ids()[calc_id] hdf5path = None if dbserver.get_status() == 'running': job = dbcmd('get_job', calc_id) if job is not None: hdf5path = job.ds_calc_dir + '.hdf5' dstore = datastore.read(hdf5path or calc_id) parent_id = dstore['oqparam'].hazard_calculation_id if parent_id: dstore.parent = datastore.read(parent_id) urlpath = '/v1/calc/%d/extract/%s' % (calc_id, quote(what)) with performance.Monitor('extract', measuremem=True) as mon, dstore: if hostport: data = urlopen('http://%s%s' % (hostport, urlpath)).read() items = (item for item in numpy.load(io.BytesIO(data)).items()) else: print('Emulating call to %s' % urlpath) items = extract_(dstore, what) if not inspect.isgenerator(items): items = [(items.__class__.__name__, items)] fname = '%s_%d.hdf5' % (what.replace('/', '-').replace( '?', '-'), dstore.calc_id) hdf5.save(fname, items) print('Saved', fname) if mon.duration > 1: print(mon)
def main(what, calc_id: int = -1, webapi=False, local=False, *, extract_dir='.'): """ Extract an output from the datastore and save it into an .hdf5 file. By default uses the WebAPI, otherwise the extraction is done locally. """ with performance.Monitor('extract', measuremem=True) as mon: if local: if calc_id == -1: calc_id = logs.dbcmd('get_job', calc_id).id aw = WebExtractor(calc_id, 'http://localhost:8800', '').get(what) elif webapi: aw = WebExtractor(calc_id).get(what) else: aw = Extractor(calc_id).get(what) w = what.replace('/', '-').replace('?', '-') if hasattr(aw, 'array') and isinstance(aw.array, str): # CSV string fname = os.path.join(extract_dir, '%s_%d.csv' % (w, calc_id)) with open(fname, 'w', encoding='utf-8') as f: f.write(aw.array) else: # save as npz fname = os.path.join(extract_dir, '%s_%d.npz' % (w, calc_id)) hdf5.save_npz(aw, fname) print('Saved', fname) if mon.duration > 1: print(mon)
def pfilter(self, src_filter, concurrent_tasks, monitor=performance.Monitor()): """ Generate a new CompositeSourceModel by filtering the sources on the given site collection. :param src_filter: a SourceFilter instance :param concurrent_tasks: how many tasks to generate :param monitor: a Monitor instance :returns: a new CompositeSourceModel instance """ sources_by_grp = src_filter.pfilter(self.get_sources(), concurrent_tasks, monitor) source_models = [] for sm in self.source_models: src_groups = [] for src_group in sm.src_groups: sg = copy.copy(src_group) sg.sources = sources_by_grp.get(sg.id, []) src_groups.append(sg) newsm = logictree.LtSourceModel(sm.names, sm.weight, sm.path, src_groups, sm.num_gsim_paths, sm.ordinal, sm.samples) source_models.append(newsm) new = self.__class__(self.gsim_lt, self.source_model_lt, source_models, self.optimize_same_id) new.info.update_eff_ruptures(new.get_num_ruptures().__getitem__) return new
def reduce_sm(calc_id): """ Reduce the source model of the given (pre)calculation by discarding all sources that do not contribute to the hazard. """ with datastore.read(calc_id) as dstore: oqparam = dstore['oqparam'] info = dstore['source_info'][()] num_ids = len(info['source_id']) bad_ids = set(info[info['eff_ruptures'] == 0]['source_id']) if len(bad_ids) == 0: dupl = info[info['multiplicity'] > 1]['source_id'] if len(dupl) == 0: logging.info('Nothing to remove') else: logging.info('Nothing to remove, but there are duplicated source ' 'IDs that could prevent the removal: %s' % dupl) return logging.info('Found %d far away sources', len(bad_ids)) ok = info['eff_ruptures'] > 0 if ok.sum() == 0: raise RuntimeError('All sources were filtered away!') ok_ids = general.group_array(info[ok][['source_id', 'code']], 'source_id') with performance.Monitor() as mon: good, total = readinput.reduce_source_model( oqparam.inputs['source_model_logic_tree'], ok_ids) logging.info('Removed %d/%d sources', total - good, num_ids) print(mon)
def post_execute(self, times): """ Compute and store average losses from the losses_by_event dataset, and then loss curves and maps. """ self.datastore.set_attrs('task_info/start_ebrisk', times=times) oq = self.oqparam elt_length = len(self.datastore['losses_by_event']) builder = get_loss_builder(self.datastore) self.build_datasets(builder) mon = performance.Monitor(hdf5=hdf5.File(self.datastore.hdf5cache())) smap = parallel.Starmap(compute_loss_curves_maps, monitor=mon) self.datastore.close() acc = [] ct = oq.concurrent_tasks or 1 for elt_slice in general.split_in_slices(elt_length, ct): smap.submit(self.datastore.filename, elt_slice, oq.conditional_loss_poes, oq.individual_curves) acc = smap.reduce(acc=[]) # copy performance information from the cache to the datastore pd = mon.hdf5['performance_data'].value hdf5.extend3(self.datastore.filename, 'performance_data', pd) self.datastore.open('r+') # reopen self.datastore['task_info/compute_loss_curves_and_maps'] = ( mon.hdf5['task_info/compute_loss_curves_maps'].value) with self.monitor('saving loss_curves and maps', autoflush=True): for name, idx, arr in acc: for ij, val in numpy.ndenumerate(arr): self.datastore[name][ij + idx] = val
def _disaggregate(cmaker, sitecol, rupdata, indices, iml2, eps3, pne_mon=performance.Monitor(), gmf_mon=performance.Monitor()): # disaggregate (separate) PoE in different contributions # returns AccumDict with keys (poe, imt) and mags, dists, lons, lats [sid] = sitecol.sids acc = dict(pnes=[], mags=[], dists=[], lons=[], lats=[]) try: gsim = cmaker.gsim_by_rlzi[iml2.rlzi] except KeyError: return pack(acc, 'mags dists lons lats pnes'.split()) maxdist = cmaker.maximum_distance(cmaker.trt) fildist = rupdata[cmaker.filter_distance + '_'] for ridx, sidx in enumerate(indices): if sidx == -1: # no contribution for this site continue dist = fildist[ridx][sidx] if dist >= maxdist: continue elif gsim.minimum_distance and dist < gsim.minimum_distance: dist = gsim.minimum_distance rctx = contexts.RuptureContext( (par, val[ridx]) for par, val in rupdata.items()) dctx = contexts.DistancesContext( (param, getattr(rctx, param + '_')[[sidx]]) for param in cmaker.REQUIRES_DISTANCES) acc['mags'].append(rctx.mag) acc['lons'].append(rctx.lon_[sidx]) acc['lats'].append(rctx.lat_[sidx]) acc['dists'].append(dist) with gmf_mon: mean_std = get_mean_std(sitecol, rctx, dctx, iml2.imts, [gsim])[..., 0] # (2, N, M) with pne_mon: iml = numpy.array([ to_distribution_values(lvl, imt) for imt, lvl in zip(iml2.imts, iml2) ]) # shape (M, P) pne = _disaggregate_pne(rctx, mean_std, iml, *eps3) acc['pnes'].append(pne) return pack(acc, 'mags dists lons lats pnes'.split())
def download_shakemap(id): """ Example of usage: utils/shakemap usp000fjta """ with performance.Monitor('shakemap', measuremem=True) as mon: dest = '%s.npy' % id numpy.save(dest, download_array(id)) print(mon) print('Saved %s' % dest)
def main(id): """ Example of usage: utils/shakemap usp000fjta """ with performance.Monitor('shakemap', measuremem=True) as mon: dest = '%s.npy' % id numpy.save(dest, get_array("usgs_id", id)) print(mon) print('Saved %s' % dest)
def test_extra_large_source(self): oq = readinput.get_oqparam('job.ini', case_21) mon = performance.Monitor('csm', datastore.hdf5new()) with mock.patch('logging.error') as error: with mock.patch('openquake.hazardlib.geo.utils.MAX_EXTENT', 80): readinput.get_composite_source_model(oq, mon) mon.hdf5.close() os.remove(mon.hdf5.path) self.assertEqual(error.call_args[0][0], 'source SFLT2: too large: 84 km')
def prepare_site_model(exposure_xml, vs30_csv, grid_spacing=0, site_param_distance=5, output='sites.csv'): """ Prepare a site_model.csv file from an exposure xml file, a vs30 csv file and a grid spacing which can be 0 (meaning no grid). Sites far away from the vs30 records are discarded and you can see them with the command `oq plot_assets`. It is up to you decide if you need to fix your exposure or if it is right to ignore the discarded sites. """ logging.basicConfig(level=logging.INFO) hdf5 = datastore.hdf5new() with performance.Monitor(hdf5.path, hdf5, measuremem=True) as mon: mesh, assets_by_site = Exposure.read( exposure_xml, check_dupl=False).get_mesh_assets_by_site() mon.hdf5['assetcol'] = assetcol = site.SiteCollection.from_points( mesh.lons, mesh.lats, req_site_params={'vs30'}) if grid_spacing: grid = mesh.get_convex_hull().dilate(grid_spacing).discretize( grid_spacing) haz_sitecol = site.SiteCollection.from_points( grid.lons, grid.lats, req_site_params={'vs30'}) logging.info( 'Reducing exposure grid with %d locations to %d sites' ' with assets', len(haz_sitecol), len(assets_by_site)) haz_sitecol, assets_by, _discarded = assoc(assets_by_site, haz_sitecol, grid_spacing * SQRT2, 'filter') haz_sitecol.make_complete() else: haz_sitecol = assetcol vs30orig = read_vs30(vs30_csv.split(',')) logging.info('Associating %d hazard sites to %d site parameters', len(haz_sitecol), len(vs30orig)) sitecol, vs30, discarded = assoc( vs30orig, haz_sitecol, grid_spacing * SQRT2 or site_param_distance, 'filter') sitecol.array['vs30'] = vs30['vs30'] mon.hdf5['sitecol'] = sitecol if discarded: mon.hdf5['discarded'] = numpy.array(discarded) sids = numpy.arange(len(vs30), dtype=numpy.uint32) sites = compose_arrays(sids, vs30, 'site_id') write_csv(output, sites) if discarded: logging.info('Discarded %d sites with assets [use oq plot_assets]', len(discarded)) logging.info('Saved %d rows in %s' % (len(sitecol), output)) logging.info(mon) return sitecol
def get_poes(self, mean_std, cmaker, ctx): """ :returns: an array of shape (N, L) """ cm = copy.copy(cmaker) cm.poe_mon = performance.Monitor() # avoid double counts cm.gsims = self.gsims avgs = [] for ctx, poes in cm.gen_poes([ctx]): # poes has shape N', L, G avgs.append(poes @ self.weights) return numpy.concatenate(avgs) # shape (N, L)
def main(input): """ Convert .xml and .npz files to .hdf5 files. """ with performance.Monitor('to_hdf5') as mon: for input_file in input: if input_file.endswith('.npz'): output = convert_npz_hdf5(input_file, input_file[:-3] + 'hdf5') elif input_file.endswith('.xml'): # for source model files output = convert_xml_hdf5(input_file, input_file[:-3] + 'hdf5') else: continue print('Generated %s' % output) print(mon)
def reduce_sm(calc_id): """ Reduce the source model of the given (pre)calculation by discarding all sources that do not contribute to the hazard. """ with datastore.read(calc_id) as dstore: oqparam = dstore['oqparam'] info = dstore['source_info'].value ok = info['weight'] > 0 source_ids = set(info[ok]['source_id']) with performance.Monitor() as mon: readinput.reduce_source_model( oqparam.inputs['source_model_logic_tree'], source_ids) print(mon)
def _disaggregate(cmaker, sitecol, rupdata, indices, iml2, eps3, pne_mon=performance.Monitor()): # disaggregate (separate) PoE in different contributions # returns AccumDict with keys (poe, imt) and mags, dists, lons, lats [sid] = sitecol.sids acc = AccumDict(accum=[], mags=[], dists=[], lons=[], lats=[], M=len(iml2.imts), P=len(iml2.poes_disagg)) try: gsim = cmaker.gsim_by_rlzi[iml2.rlzi] except KeyError: return pack(acc, 'mags dists lons lats P M'.split()) maxdist = cmaker.maximum_distance(cmaker.trt) fildist = rupdata[cmaker.filter_distance + '_'] for ridx, sidx in enumerate(indices): if sidx == -1: # no contribution for this site continue dist = fildist[ridx][sidx] if dist >= maxdist: continue elif gsim.minimum_distance and dist < gsim.minimum_distance: dist = gsim.minimum_distance rctx = contexts.RuptureContext() for par in rupdata: setattr(rctx, par, rupdata[par][ridx]) dctx = contexts.DistancesContext( (param, getattr(rctx, param + '_')[[sidx]]) for param in cmaker.REQUIRES_DISTANCES).roundup( gsim.minimum_distance) acc['mags'].append(rctx.mag) acc['lons'].append(rctx.lon_[sidx]) acc['lats'].append(rctx.lat_[sidx]) acc['dists'].append(dist) with pne_mon: for m, imt in enumerate(iml2.imts): for p, poe in enumerate(iml2.poes_disagg): iml = iml2[m, p] pne = disaggregate_pne(gsim, rctx, sitecol, dctx, imt, iml, *eps3) acc[p, m].append(pne) return pack(acc, 'mags dists lons lats P M'.split())
def gen_outputs(self, riskinput, monitor=performance.Monitor(), assetcol=None): """ Group the assets per taxonomy and compute the outputs by using the underlying riskmodels. Yield the outputs generated as dictionaries out_by_lr. :param riskinput: a RiskInput instance :param monitor: a monitor object used to measure the performance :param assetcol: not None only for event based risk """ mon_context = monitor('building context') mon_hazard = monitor('building hazard') mon_risk = monitor('computing risk', measuremem=False) hazard_getter = riskinput.hazard_getter sids = hazard_getter.sids with mon_context: if assetcol is None: # scenario, classical assets_by_site = riskinput.assets_by_site else: assets_by_site = assetcol.assets_by_site() # group the assets by taxonomy dic = collections.defaultdict(list) for sid, assets in zip(sids, assets_by_site): group = groupby(assets, by_taxonomy) for taxonomy in group: epsgetter = riskinput.epsilon_getter dic[taxonomy].append((sid, group[taxonomy], epsgetter)) imti = {imt: i for i, imt in enumerate(hazard_getter.imtls)} if hasattr(hazard_getter, 'rlzs_by_gsim'): # save memory in event based risk by working one gsim at the time for gsim in hazard_getter.rlzs_by_gsim: with mon_hazard: hazard = hazard_getter.get_hazard(gsim) with mon_risk: for out in self._gen_outputs(hazard, imti, dic, hazard_getter.eids): yield out else: with mon_hazard: hazard = hazard_getter.get_hazard() with mon_risk: for out in self._gen_outputs(hazard, imti, dic, hazard_getter.eids): yield out if hasattr(hazard_getter, 'gmdata'): # for event based risk riskinput.gmdata = hazard_getter.gmdata
def expo2csv(job_ini): """ Convert an exposure in XML format into CSV format """ oq = readinput.get_oqparam(job_ini) exposure = readinput.get_exposure(oq) rows = [] header = ['asset_ref', 'number', 'area', 'taxonomy', 'lon', 'lat'] for costname in exposure.cost_types['name']: if costname != 'occupants': header.append(costname) header.append(costname + '-deductible') header.append(costname + '-insured_limit') header.extend(exposure.occupancy_periods) header.extend(exposure.tagnames) for asset, asset_ref in zip(exposure.assets, exposure.asset_refs): row = [ asset_ref.decode('utf8'), asset.number, asset.area, asset.taxonomy, asset.location[0], asset.location[1] ] for costname in exposure.cost_types['name']: if costname != 'occupants': row.append(asset.values[costname]) row.append(asset.deductibles.get(costname, '?')) row.append(asset.insurance_limits.get(costname, '?')) for time_event in exposure.occupancy_periods: row.append(asset.value(time_event)) for tagname, tagidx in zip(exposure.tagnames, asset.tagidxs): row.append(tagidx) rows.append(row) with performance.Monitor('expo2csv') as mon: # save exposure data as csv csvname = oq.inputs['exposure'].replace('.xml', '.csv') print('Saving %s' % csvname) with open(csvname, 'w') as f: writer = csv.writer(f) writer.writerow(header) for row in rows: writer.writerow(row) # save exposure header as xml head = nrml.read(oq.inputs['exposure'], stop='assets') xmlname = oq.inputs['exposure'].replace('.xml', '-header.xml') print('Saving %s' % xmlname) head[0].assets.text = os.path.basename(csvname) with open(xmlname, 'wb') as f: nrml.write(head, f) print(mon)
def export_asset_loss_table(ekey, dstore): """ Export in parallel the asset loss table from the datastore. NB1: for large calculation this may run out of memory NB2: due to an heisenbug in the parallel reading of .hdf5 files this works reliably only if the datastore has been created by a different process The recommendation is: *do not use this exporter*: rather, study its source code and write what you need. Every postprocessing is different. """ key, fmt = ekey oq = dstore['oqparam'] assetcol = dstore['assetcol'] arefs = dstore['asset_refs'].value avals = assetcol.values() loss_types = dstore.get_attr('all_loss_ratios', 'loss_types').split() dtlist = [(lt, F32) for lt in loss_types] if oq.insured_losses: for lt in loss_types: dtlist.append((lt + '_ins', F32)) lrs_dt = numpy.dtype([('rlzi', U16), ('losses', dtlist)]) fname = dstore.export_path('%s.%s' % ekey) monitor = performance.Monitor(key, fname) lrgetter = riskinput.LossRatiosGetter(dstore) aids = range(len(assetcol)) allargs = [(lrgetter, list(block), monitor) for block in split_in_blocks(aids, oq.concurrent_tasks)] dstore.close() # avoid OSError: Can't read data (Wrong b-tree signature) L = len(loss_types) with hdf5.File(fname, 'w') as f: nbytes = 0 total = numpy.zeros(len(dtlist), F32) for pairs in parallel.Starmap(get_loss_ratios, allargs): for aid, data in pairs: asset = assetcol[aid] avalue = avals[aid] for l, lt in enumerate(loss_types): aval = avalue[lt] for i in range(oq.insured_losses + 1): data['ratios'][:, l + L * i] *= aval aref = arefs[asset.idx] f[b'asset_loss_table/' + aref] = data.view(lrs_dt) total += data['ratios'].sum(axis=0) nbytes += data.nbytes f['asset_loss_table'].attrs['loss_types'] = ' '.join(loss_types) f['asset_loss_table'].attrs['total'] = total f['asset_loss_table'].attrs['nbytes'] = nbytes return [fname]
def get_gmfdata(self, mon=performance.Monitor()): """ :returns: an array of the dtype (sid, eid, gmv) """ alldata = [] self.sig_eps = [] self.times = [] # rup_id, nsites, dt for computer in self.gen_computers(mon): data, dt = computer.compute_all( self.min_iml, self.rlzs_by_gsim, self.sig_eps) self.times.append((computer.ebrupture.id, len(computer.sids), dt)) alldata.append(data) if not alldata: return [] return numpy.concatenate(alldata)
def get_effect(mags, sitecol, gsims_by_trt, oq): """ :returns: an ArrayWrapper effect_by_mag_dst_trt Also updates oq.maximum_distance.magdist and oq.pointsource_distance """ dist_bins = { trt: oq.maximum_distance.get_dist_bins(trt) for trt in gsims_by_trt } # computing the effect make sense only if all IMTs have the same # unity of measure; for simplicity we will consider only PGA and SA effect = {} imts_with_period = [ imt for imt in oq.imtls if imt == 'PGA' or imt.startswith('SA') ] imts_ok = len(imts_with_period) == len(oq.imtls) aw = hdf5.ArrayWrapper((), dist_bins) if sitecol is None: return aw if len(sitecol) >= oq.max_sites_disagg and imts_ok: logging.info('Computing effect of the ruptures') mon = performance.Monitor('rupture effect') eff_by_mag = parallel.Starmap.apply( get_effect_by_mag, (mags, sitecol.one(), gsims_by_trt, oq.maximum_distance, oq.imtls, mon)).reduce() aw.array = eff_by_mag effect.update({ trt: Effect({mag: eff_by_mag[mag][:, t] for mag in eff_by_mag}, dist_bins[trt]) for t, trt in enumerate(gsims_by_trt) }) minint = oq.minimum_intensity.get('default', 0) for trt, eff in effect.items(): if minint: oq.maximum_distance.magdist[trt] = eff.dist_by_mag(minint) # replace pointsource_distance with a dict trt -> mag -> dst if oq.pointsource_distance['default']: oq.pointsource_distance[trt] = eff.dist_by_mag( eff.collapse_value(oq.pointsource_distance['default'])) elif oq.pointsource_distance['default']: # replace pointsource_distance with a dict trt -> mag -> dst for trt in gsims_by_trt: try: dst = getdefault(oq.pointsource_distance, trt) except TypeError: # 'NoneType' object is not subscriptable dst = getdefault(oq.maximum_distance, trt) oq.pointsource_distance[trt] = {mag: dst for mag in mags} return aw
def extract(what, calc_id, webapi=True): """ Extract an output from the datastore and save it into an .hdf5 file. By default uses the WebAPI, otherwise the extraction is done locally. """ with performance.Monitor('extract', measuremem=True) as mon: if webapi: obj = WebExtractor(calc_id).get(what) else: obj = Extractor(calc_id).get(what) fname = '%s_%d.hdf5' % (what.replace('/', '-').replace('?', '-'), calc_id) obj.save(fname) print('Saved', fname) if mon.duration > 1: print(mon)