def gen_rupture_getters(dstore, slc=slice(None), concurrent_tasks=1, hdf5cache=None): """ :yields: RuptureGetters """ if dstore.parent: dstore = dstore.parent csm_info = dstore['csm_info'] trt_by_grp = csm_info.grp_by("trt") samples = csm_info.get_samples_by_grp() rlzs_by_gsim = csm_info.get_rlzs_by_gsim_grp() rup_array = dstore['ruptures'][slc] maxweight = numpy.ceil(len(rup_array) / (concurrent_tasks or 1)) nr, ne, first_event = 0, 0, 0 for grp_id, arr in general.group_array(rup_array, 'grp_id').items(): if not rlzs_by_gsim[grp_id]: # this may happen if a source model has no sources, like # in event_based_risk/case_3 continue for block in general.block_splitter(arr, maxweight): rgetter = RuptureGetter( hdf5cache or dstore.filename, numpy.array(block), grp_id, trt_by_grp[grp_id], samples[grp_id], rlzs_by_gsim[grp_id], first_event) rgetter.weight = getattr(block, 'weight', len(block)) first_event += rgetter.num_events yield rgetter nr += len(block) ne += rgetter.num_events logging.info('Read %d ruptures and %d events', nr, ne)
def save_gmf_data(dstore, sitecol, gmfs, imts, events=()): """ :param dstore: a :class:`openquake.baselib.datastore.DataStore` instance :param sitecol: a :class:`openquake.hazardlib.site.SiteCollection` instance :param gmfs: an array of shape (N, E, M) :param imts: a list of IMT strings :param events: E event IDs or the empty tuple """ if len(events) == 0: E = gmfs.shape[1] events = numpy.zeros(E, rupture.events_dt) events['id'] = numpy.arange(E, dtype=U64) dstore['events'] = events offset = 0 gmfa = get_gmv_data(sitecol.sids, gmfs, events) dstore['gmf_data/data'] = gmfa dic = general.group_array(gmfa, 'sid') lst = [] all_sids = sitecol.complete.sids for sid in all_sids: rows = dic.get(sid, ()) n = len(rows) lst.append((offset, offset + n)) offset += n dstore['gmf_data/imts'] = ' '.join(imts) dstore['gmf_data/indices'] = numpy.array(lst, U32)
def compute_gmfs_and_curves(eb_ruptures, sitecol, imts, rlzs_assoc, min_iml, monitor): """ :param eb_ruptures: a list of blocks of EBRuptures of the same SESCollection :param sitecol: a :class:`openquake.hazardlib.site.SiteCollection` instance :param imts: a list of IMT string :param rlzs_assoc: a RlzsAssoc instance :param monitor: a Monitor instance :returns: a dictionary (rlzi, imt) -> [gmfarray, haz_curves] """ oq = monitor.oqparam # NB: by construction each block is a non-empty list with # ruptures of the same src_group_id trunc_level = oq.truncation_level correl_model = readinput.get_correl_model(oq) gmfadict = create( calc.GmfColl, eb_ruptures, sitecol, imts, rlzs_assoc, trunc_level, correl_model, min_iml, monitor).by_rlzi() result = {rlzi: [gmfadict[rlzi], None] if oq.ground_motion_fields else [None, None] for rlzi in gmfadict} if oq.hazard_curves_from_gmfs: with monitor('bulding hazard curves', measuremem=False): duration = oq.investigation_time * oq.ses_per_logic_tree_path for rlzi in gmfadict: gmvs_by_sid = group_array(gmfadict[rlzi], 'sid') result[rlzi][POEMAP] = calc.gmvs_to_poe_map( gmvs_by_sid, oq.imtls, oq.investigation_time, duration) return result
def export_gmf(ekey, dstore): """ :param ekey: export key, i.e. a pair (datastore key, fmt) :param dstore: datastore object """ sitecol = dstore['sitecol'] rlzs_assoc = dstore['csm_info'].get_rlzs_assoc() oq = dstore['oqparam'] investigation_time = (None if oq.calculation_mode == 'scenario' else oq.investigation_time) samples = oq.number_of_logic_tree_samples fmt = ekey[-1] etags = dstore['etags'].value gmf_data = dstore['gmf_data'] nbytes = gmf_data.attrs['nbytes'] logging.info('Internal size of the GMFs: %s', humansize(nbytes)) if nbytes > GMF_MAX_SIZE: logging.warn(GMF_WARNING, dstore.hdf5path) fnames = [] for rlz in rlzs_assoc.realizations: gmf_arr = gmf_data['%04d' % rlz.ordinal].value ruptures = [] for eid, gmfa in group_array(gmf_arr, 'eid').items(): rup = util.Rupture(etags[eid], sorted(set(gmfa['sid']))) rup.gmfa = gmfa ruptures.append(rup) ruptures.sort(key=operator.attrgetter('etag')) fname = build_name(dstore, rlz, 'gmf', fmt, samples) fnames.append(fname) globals()['export_gmf_%s' % fmt]( ('gmf', fmt), fname, sitecol, oq.imtls, ruptures, rlz, investigation_time) return fnames
def export_gmf(ekey, dstore): """ :param ekey: export key, i.e. a pair (datastore key, fmt) :param dstore: datastore object """ oq = dstore['oqparam'] if not oq.calculation_mode.startswith('scenario'): return [] sitecol = dstore['sitecol'] investigation_time = (None if oq.calculation_mode == 'scenario' else oq.investigation_time) fmt = ekey[-1] gmf_data = dstore['gmf_data'] nbytes = gmf_data.attrs['nbytes'] logging.info('Internal size of the GMFs: %s', humansize(nbytes)) if nbytes > GMF_MAX_SIZE: logging.warning(GMF_WARNING, dstore.filename) data = gmf_data['data'].value ses_idx = 1 # for scenario only events = [] for eid, gmfa in group_array(data, 'eid').items(): rup = Event(eid, ses_idx, sorted(set(gmfa['sid'])), gmfa) events.append(rup) fname = dstore.build_fname('gmf', 'scenario', fmt) writer = hazard_writers.EventBasedGMFXMLWriter( fname, sm_lt_path='', gsim_lt_path='') writer.serialize( GmfCollection(sitecol, oq.imtls, events, investigation_time)) return [fname]
def get_dmg_csq(crm, assets_by_site, gmf): """ :param crm: a CompositeRiskModel object :param assets_by_site: a list of arrays per each site :param gmf: a ground motion field :returns: an array of shape (A, L, 1, D + 1) with the number of buildings in each damage state for each asset and loss type """ A = sum(len(assets) for assets in assets_by_site) L = len(crm.loss_types) D = len(crm.damage_states) out = numpy.zeros((A, L, 1, D + 1), F32) for assets, gmv in zip(assets_by_site, gmf): group = general.group_array(assets, 'taxonomy') for taxonomy, assets in group.items(): for l, loss_type in enumerate(crm.loss_types): # NB: risk logic trees are not yet supported in multi_risk [rm], [w] = crm.get_rmodels_weights(taxonomy) fracs = rm.scenario_damage(loss_type, assets, [gmv]) for asset, frac in zip(assets, fracs): dmg = asset['number'] * frac[0, :D] csq = asset['value-' + loss_type] * frac[0, D] out[asset['ordinal'], l, 0, :D] = dmg out[asset['ordinal'], l, 0, D] = csq return out
def view_dupl_sources(token, dstore): """ Show the sources with the same ID and the truly duplicated sources """ fields = ('source_id', 'code', 'gidx1', 'gidx2', 'num_ruptures') dic = group_array(dstore['source_info'][fields], 'source_id') sameid = [] dupl = [] for source_id, group in dic.items(): if len(group) > 1: # same ID sources sources = [] for rec in group: geom = dstore['source_geom'][rec['gidx1']:rec['gidx2']] src = Source(source_id, rec['code'], geom, rec['num_ruptures']) sources.append(src) if all_equal(sources): dupl.append(source_id) sameid.append(source_id) if not dupl: return '' msg = str(dupl) + '\n' msg += ('Found %d source(s) with the same ID and %d true duplicate(s)' % (len(sameid), len(dupl))) fakedupl = set(sameid) - set(dupl) if fakedupl: msg += '\nHere is a fake duplicate: %s' % fakedupl.pop() return msg
def get_hazard(self, data=None): """ :param data: if given, an iterator of records of dtype gmf_dt :returns: sid -> records """ if data is None: data = self.get_gmfdata() return general.group_array(data, 'sid')
def num_taxonomies_by_site(self): """ :returns: an array with the number of assets per each site """ dic = general.group_array(self.array, 'site_id') num_taxonomies = numpy.zeros(self.tot_sites, U32) for sid, arr in dic.items(): num_taxonomies[sid] = len(numpy.unique(arr['taxonomy'])) return num_taxonomies
def assertEventsByRlz(self, events_by_rlz): """ Check the distribution of the events by realization index """ n_events = numpy.zeros(self.calc.R, int) dic = general.group_array(self.calc.datastore['events'].value, 'rlz') for rlzi, events in dic.items(): n_events[rlzi] = len(events) numpy.testing.assert_equal(n_events, events_by_rlz)
def __getitem__(self, sid): dset = self.dstore['gmf_data/data'] idxs = self.dstore['gmf_data/indices'][sid] if idxs.dtype.name == 'uint32': # scenario idxs = [idxs] elif not idxs.dtype.names: # engine >= 3.2 idxs = zip(*idxs) data = [dset[start:stop] for start, stop in idxs] if len(data) == 0: # site ID with no data return {} return general.group_array(numpy.concatenate(data), 'rlzi')
def _build_csv_data(array, rlz, sitecol, imts, investigation_time): # lon, lat, gmv_imt1, ..., gmv_imtN smlt_path = '_'.join(rlz.sm_lt_path) gsimlt_path = rlz.gsim_rlz.uid comment = ('smlt_path=%s, gsimlt_path=%s, investigation_time=%s' % (smlt_path, gsimlt_path, investigation_time)) rows = [['lon', 'lat'] + imts] for sid, data in group_array(array, 'sid').items(): row = ['%.5f' % sitecol.lons[sid], '%.5f' % sitecol.lats[sid]] + list( data['gmv']) rows.append(row) return rows, comment
def build_rcurves(cb_inputs, assets, monitor): """ :param cb_inputs: triples `(cb, rlzname, data)` :param assets: full list of assets :param monitor: Monitor instance """ result = {} for cb, rlzname, data in cb_inputs: aids, curves = cb(assets, group_array(data, 'aid')) if len(aids): # strip "rlz-" from rlzname below result[cb.index, int(rlzname[4:])] = aids, curves return result
def get_gmfs(dstore, precalc=None): """ :param dstore: a datastore :param precalc: a scenario calculator with attribute .gmfa :returns: a dictionary grp_id, gsid -> gmfa """ oq = dstore['oqparam'] if 'gmfs' in oq.inputs: # from file logging.info('Reading gmfs from file') sitecol, etags, gmfs_by_imt = readinput.get_gmfs(oq) # reduce the gmfs matrices to the filtered sites for imt in oq.imtls: gmfs_by_imt[imt] = gmfs_by_imt[imt][sitecol.indices] logging.info('Preparing the risk input') return etags, [gmfs_by_imt] rlzs_assoc = dstore['csm_info'].get_rlzs_assoc() rlzs = rlzs_assoc.realizations sitecol = dstore['sitecol'] # NB: if the hazard site collection has N sites, the hazard # filtered site collection for the nonzero GMFs has N' <= N sites # whereas the risk site collection associated to the assets # has N'' <= N' sites if dstore.parent: haz_sitecol = dstore.parent['sitecol'] # N' values else: haz_sitecol = sitecol risk_indices = set(sitecol.indices) # N'' values N = len(haz_sitecol.complete) imt_dt = numpy.dtype([(str(imt), F32) for imt in oq.imtls]) E = oq.number_of_ground_motion_fields etags = numpy.array(sorted(b'scenario-%010d~ses=1' % i for i in range(E))) gmfs = numpy.zeros((len(rlzs_assoc), N, E), imt_dt) if precalc: for i, gsim in enumerate(precalc.gsims): for imti, imt in enumerate(oq.imtls): gmfs[imt][i, sitecol.sids] = precalc.gmfa[gsim][imti] return etags, gmfs # else read from the datastore for i, rlz in enumerate(rlzs): data = group_array(dstore['gmf_data/sm-0000/%04d' % i], 'sid') for sid, array in data.items(): if sid in risk_indices: for imti, imt in enumerate(oq.imtls): a = get_array(array, imti=imti) gmfs[imt][i, sid, a['eid']] = a['gmv'] return etags, gmfs
def _build_csv_data(array, rlz, sitecol, imts, investigation_time): # lon, lat, gmv_imt1, ..., gmv_imtN smlt_path = '_'.join(rlz.sm_lt_path) gsimlt_path = rlz.gsim_rlz.uid comment = ('smlt_path=%s, gsimlt_path=%s, investigation_time=%s' % (smlt_path, gsimlt_path, investigation_time)) rows = [['lon', 'lat'] + imts] irange = range(len(imts)) for sid, data in group_array(array, 'sid').items(): dic = dict(zip(data['imti'], data['gmv'])) row = ['%.5f' % sitecol.lons[sid], '%.5f' % sitecol.lats[sid]] + [ dic.get(imti, 0) for imti in irange] rows.append(row) return rows, comment
def import_gmfs(dstore, fname, sids): """ Import in the datastore a ground motion field CSV file. :param dstore: the datastore :param fname: the CSV file :param sids: the site IDs (complete) :returns: event_ids, num_rlzs """ array = hdf5.read_csv(fname, {'sid': U32, 'eid': U64, None: F32}).array imts = [name[4:] for name in array.dtype.names[2:]] n_imts = len(imts) gmf_data_dt = numpy.dtype( [('rlzi', U16), ('sid', U32), ('eid', U64), ('gmv', (F32, (n_imts,)))]) arr = numpy.zeros(len(array), gmf_data_dt) col = 0 for name in array.dtype.names: if name.startswith('gmv_'): arr['gmv'][:, col] = array[name] col += 1 else: arr[name] = array[name] # store the events eids = numpy.unique(array['eid']) eids.sort() E = len(eids) eid2idx = dict(zip(eids, range(E))) events = numpy.zeros(E, rupture.events_dt) events['id'] = eids dstore['events'] = events # store the GMFs dic = general.group_array(arr, 'sid') lst = [] offset = 0 for sid in sids: n = len(dic.get(sid, [])) lst.append((offset, offset + n)) if n: offset += n gmvs = dic[sid] gmvs['eid'] = get_idxs(gmvs, eid2idx) dstore.extend('gmf_data/data', gmvs) dstore['gmf_data/indices'] = numpy.array(lst, U32) dstore['gmf_data/imts'] = ' '.join(imts) sig_eps = numpy.zeros(len(eids), getters.sig_eps_dt(imts)) sig_eps['eid'] = eids dstore['gmf_data/sigma_epsilon'] = sig_eps dstore['weights'] = numpy.ones(1) return eids
def view_elt(token, dstore): """ Display the event loss table averaged by event """ oq = dstore['oqparam'] R = len(dstore['csm_info'].rlzs) dic = group_array(dstore['losses_by_event'][()], 'rlzi') header = oq.loss_dt().names tbl = [] for rlzi in range(R): if rlzi in dic: tbl.append(dic[rlzi]['loss'].mean(axis=0)) else: tbl.append([0.] * len(header)) return rst_table(tbl, header)
def get_gmfs(dstore): """ :param dstore: a datastore :returns: a dictionary grp_id, gsid -> gmfa """ oq = dstore['oqparam'] if 'gmfs' in oq.inputs: # from file logging.info('Reading gmfs from file') sitecol, etags, gmfs_by_imt = readinput.get_gmfs(oq) # reduce the gmfs matrices to the filtered sites for imt in oq.imtls: gmfs_by_imt[imt] = gmfs_by_imt[imt][sitecol.indices] logging.info('Preparing the risk input') return etags, {(0, 'FromFile'): gmfs_by_imt} # else from datastore rlzs_assoc = dstore['csm_info'].get_rlzs_assoc() rlzs = rlzs_assoc.realizations sitecol = dstore['sitecol'] # NB: if the hazard site collection has N sites, the hazard # filtered site collection for the nonzero GMFs has N' <= N sites # whereas the risk site collection associated to the assets # has N'' <= N' sites if dstore.parent: haz_sitecol = dstore.parent['sitecol'] # N' values else: haz_sitecol = sitecol risk_indices = set(sitecol.indices) # N'' values N = len(haz_sitecol.complete) imt_dt = dtype((imt, F32) for imt in oq.imtls) E = oq.number_of_ground_motion_fields # build a matrix N x E for each GSIM realization gmfs = {(grp_id, gsim): numpy.zeros((N, E), imt_dt) for grp_id, gsim in rlzs_assoc} for i, rlz in enumerate(rlzs): data = group_array(dstore['gmf_data/%04d' % i], 'sid') for sid, array in data.items(): if sid in risk_indices: for imti, imt in enumerate(oq.imtls): a = get_array(array, imti=imti) gs = str(rlz.gsim_rlz) gmfs[0, gs][imt][sid, a['eid']] = a['gmv'] etags = numpy.array( sorted([b'scenario-%010d~ses=1' % i for i in range(oq.number_of_ground_motion_fields)])) return etags, gmfs
def export_gmf(ekey, dstore): """ :param ekey: export key, i.e. a pair (datastore key, fmt) :param dstore: datastore object """ sitecol = dstore['sitecol'] rlzs_assoc = dstore['csm_info'].get_rlzs_assoc() oq = dstore['oqparam'] investigation_time = (None if oq.calculation_mode == 'scenario' else oq.investigation_time) fmt = ekey[-1] n_gmfs = getattr(oq, 'number_of_ground_motion_fields', None) if n_gmfs: etags = numpy.array( sorted([b'scenario-%010d~ses=1' % i for i in range(n_gmfs)])) gmf_data = dstore['gmf_data'] nbytes = gmf_data.attrs['nbytes'] logging.info('Internal size of the GMFs: %s', humansize(nbytes)) if nbytes > GMF_MAX_SIZE: logging.warn(GMF_WARNING, dstore.hdf5path) fnames = [] for sm_id, rlzs in sorted(rlzs_assoc.rlzs_by_smodel.items()): key = 'sm-%04d' % sm_id if not n_gmfs: # event based events = dstore['events'] if key not in events: # source model producing zero ruptures continue etags = build_etags(events[key]) for rlz in rlzs: try: gmf_arr = gmf_data['%s/%04d' % (key, rlz.ordinal)].value except KeyError: # no GMFs for the given realization continue ruptures = [] for eid, gmfa in group_array(gmf_arr, 'eid').items(): rup = util.Rupture(sm_id, eid, etags[eid], sorted(set(gmfa['sid']))) rup.gmfa = gmfa ruptures.append(rup) ruptures.sort(key=operator.attrgetter('etag')) fname = dstore.build_fname('gmf', rlz, fmt) fnames.append(fname) globals()['export_gmf_%s' % fmt]( ('gmf', fmt), fname, sitecol, oq.imtls, ruptures, rlz, investigation_time) return fnames
def make_eps(asset_array, num_samples, seed, correlation): """ :param asset_array: an array of assets :param int num_samples: the number of ruptures :param int seed: a random seed :param float correlation: the correlation coefficient :returns: epsilons matrix of shape (num_assets, num_samples) """ assets_by_taxo = group_array(asset_array, 'taxonomy') eps = numpy.zeros((len(asset_array), num_samples), numpy.float32) for taxonomy, assets in assets_by_taxo.items(): shape = (len(assets), num_samples) logging.info('Building %s epsilons for taxonomy %s', shape, taxonomy) zeros = numpy.zeros(shape) epsilons = scientific.make_epsilons(zeros, seed, correlation) for asset, epsrow in zip(assets, epsilons): eps[asset['ordinal']] = epsrow return eps
def view_assets_by_site(token, dstore): """ Display statistical information about the distribution of the assets """ taxonomies = dstore['assetcol/tagcol/taxonomy'][()] assets_by_site = dstore['assetcol'].assets_by_site() data = ['taxonomy mean stddev min max num_sites num_assets'.split()] num_assets = AccumDict() for assets in assets_by_site: num_assets += {k: [len(v)] for k, v in group_array( assets, 'taxonomy').items()} for taxo in sorted(num_assets): val = numpy.array(num_assets[taxo]) data.append(stats(taxonomies[taxo], val, val.sum())) if len(num_assets) > 1: # more than one taxonomy, add a summary n_assets = numpy.array([len(assets) for assets in assets_by_site]) data.append(stats('*ALL*', n_assets, n_assets.sum())) return rst_table(data)
def get_amplification(oqparam): """ :returns: a composite array (amplification, param, imt0, imt1, ...) """ fname = oqparam.inputs['amplification'] aw = hdf5.read_csv(fname, {'ampcode': site.ampcode_dt, None: F64}) aw.fname = fname imls = () if 'level' in aw.dtype.names: for records in group_array(aw, 'ampcode').values(): if len(imls) == 0: imls = numpy.sort(records['level']) elif len(records['level']) != len(imls) or (records['level'] != imls).any(): raise InvalidFile( '%s: levels for %s %s instead of %s' % (fname, records['ampcode'][0], records['level'], imls)) return aw
def view_portfolio_loss(token, dstore): """ The loss for the full portfolio, for each realization and loss type, extracted from the event loss table. """ oq = dstore['oqparam'] loss_dt = oq.loss_dt() R = dstore['csm_info'].get_num_rlzs() by_rlzi = group_array(dstore['losses_by_event'].value, 'rlzi') data = numpy.zeros(R, loss_dt) rlzids = [str(r) for r in range(R)] for r in range(R): loss = by_rlzi[r]['loss'].sum(axis=0) for l, lt in enumerate(loss_dt.names): data[r][lt] = loss[l] array = util.compose_arrays(numpy.array(rlzids), data, 'rlz') # this is very sensitive to rounding errors, so I am using a low precision return rst_table(array, fmt='%.5E')
def __fromh5__(self, dic, attrs): # TODO: this is called more times than needed, maybe we should cache it sg_data = group_array(dic['sg_data'], 'sm_id') sm_data = dic['sm_data'] vars(self).update(attrs) self.gsim_fname = decode(self.gsim_fname) if self.gsim_fname.endswith('.xml'): trts = sorted(self.trts) if 'gmpe_table' in self.gsim_lt_xml: # the canadian gsims depends on external files which are not # in the datastore; I am storing the path to the original # file so that the external files can be found; unfortunately, # this means that copying the datastore on a different machine # and exporting from there works only if the gsim_fname and all # the external files are copied in the exact same place self.gsim_lt = logictree.GsimLogicTree(self.gsim_fname, trts) else: # regular case: read the logic tree from self.gsim_lt_xml, # so that you do not need to copy anything except the datastore tmp = writetmp(self.gsim_lt_xml, suffix='.xml') self.gsim_lt = logictree.GsimLogicTree(tmp, trts) else: # fake file with the name of the GSIM self.gsim_lt = logictree.GsimLogicTree.from_(self.gsim_fname) self.source_models = [] for sm_id, rec in enumerate(sm_data): tdata = sg_data[sm_id] srcgroups = [ sourceconverter.SourceGroup(self.trts[trti], id=grp_id, eff_ruptures=effrup) for grp_id, trti, effrup, sm_id in tdata if effrup ] path = tuple(str(decode(rec['path'])).split('_')) trts = set(sg.trt for sg in srcgroups) num_gsim_paths = self.gsim_lt.reduce(trts).get_num_paths() sm = logictree.SourceModel(rec['name'], rec['weight'], path, srcgroups, num_gsim_paths, sm_id, rec['samples']) self.source_models.append(sm) self.init() try: os.remove(tmp) # gsim_lt file except NameError: # tmp is defined only in the regular case, see above pass
def postproc(self): """ Build aggregate loss curves in process """ dstore = self.datastore self.before_export() # set 'realizations' oq = self.oqparam stats = self.param['stats'] # store avg_losses-stats if oq.avg_losses: set_rlzs_stats(self.datastore, 'avg_losses') try: b = self.param['builder'] except KeyError: # don't build auxiliary tables return if dstore.parent: dstore.parent.open('r') # to read the ruptures logging.info('Building loss tables') build_loss_tables(dstore) logging.info('Building aggregate loss curves') with self.monitor('building agg_curves', measuremem=True): lbr = group_array(dstore['losses_by_event'][()], 'rlzi') dic = {r: arr['loss'] for r, arr in lbr.items()} array, arr_stats = b.build(dic, stats) loss_types = oq.loss_dt().names units = self.datastore['cost_calculator'].get_units(loss_types) if oq.individual_curves or self.R == 1: self.datastore['agg_curves-rlzs'] = array # shape (P, R, L) self.datastore.set_attrs( 'agg_curves-rlzs', shape_descr=['return_periods', 'rlzs', 'loss_types'], return_periods=b.return_periods, rlzs=numpy.arange(self.R), loss_types=loss_types, units=units) if arr_stats is not None: self.datastore['agg_curves-stats'] = arr_stats # shape (P, S, L) self.datastore.set_attrs( 'agg_curves-stats', shape_descr=['return_periods', 'stats', 'loss_types'], return_periods=b.return_periods, stats=[encode(name) for (name, func) in stats], loss_types=loss_types, units=units)
def export_gmf(ekey, dstore): """ :param ekey: export key, i.e. a pair (datastore key, fmt) :param dstore: datastore object """ sitecol = dstore['sitecol'] rlzs_assoc = dstore['csm_info'].get_rlzs_assoc() oq = dstore['oqparam'] investigation_time = (None if oq.calculation_mode == 'scenario' else oq.investigation_time) fmt = ekey[-1] gmf_data = dstore['gmf_data'] nbytes = gmf_data.attrs['nbytes'] logging.info('Internal size of the GMFs: %s', humansize(nbytes)) if nbytes > GMF_MAX_SIZE: logging.warn(GMF_WARNING, dstore.hdf5path) fnames = [] ruptures_by_rlz = collections.defaultdict(list) for grp_id, gsim in rlzs_assoc: key = 'grp-%02d' % grp_id try: events = dstore['events/' + key] except KeyError: # source model producing zero ruptures continue eventdict = dict(zip(events['eid'], events)) try: data = gmf_data['%s/%s' % (key, gsim)].value except KeyError: # no GMFs for the given realization continue for rlzi, rlz in enumerate(rlzs_assoc[grp_id, gsim]): ruptures = ruptures_by_rlz[rlz] gmf_arr = get_array(data, rlzi=rlzi) for eid, gmfa in group_array(gmf_arr, 'eid').items(): ses_idx = eventdict[eid]['ses'] rup = Rup(eid, ses_idx, sorted(set(gmfa['sid'])), gmfa) ruptures.append(rup) for rlz in sorted(ruptures_by_rlz): ruptures_by_rlz[rlz].sort(key=operator.attrgetter('eid')) fname = dstore.build_fname('gmf', rlz, fmt) fnames.append(fname) globals()['export_gmf_%s' % fmt](('gmf', fmt), fname, sitecol, oq.imtls, ruptures_by_rlz[rlz], rlz, investigation_time) return fnames
def gen_rupture_getters(dstore, srcfilter, ct): """ :param dstore: a :class:`openquake.baselib.datastore.DataStore` :param srcfilter: a :class:`openquake.hazardlib.calc.filters.SourceFilter` :param ct: number of concurrent tasks :yields: filtered RuptureGetters """ full_lt = dstore['full_lt'] trt_by_grp = full_lt.trt_by_grp samples = full_lt.get_samples_by_grp() rlzs_by_gsim = full_lt.get_rlzs_by_gsim_grp() rup_array = dstore['ruptures'][()] items = list(general.group_array(rup_array, 'grp_id').items()) items.sort(key=lambda item: len(item[1])) # other weights were much worse maxweight = None while items: grp_id, rups = items.pop() # from the largest group if not rlzs_by_gsim[grp_id]: # this may happen if a source model has no sources, like # in event_based_risk/case_3 continue trt = trt_by_grp[grp_id] proxies = list(_gen(rups, srcfilter, trt, samples[grp_id])) if len(proxies) == 1: # split by gsim offset = 0 for gsim, rlzs in rlzs_by_gsim[grp_id].items(): rgetter = RuptureGetter( proxies, dstore.filename, grp_id, trt, samples[grp_id], {gsim: rlzs}) rgetter.offset = offset offset += rgetter.num_events yield rgetter else: # split by block if not maxweight: maxweight = sum(p.weight for p in proxies) / (ct // 2 or 1) blocks = list(general.block_splitter( proxies, maxweight, operator.attrgetter('weight'))) logging.info('Group %d: %d ruptures -> %d task(s)', grp_id, len(rups), len(blocks)) for block in blocks: rgetter = RuptureGetter( block, dstore.filename, grp_id, trt, samples[grp_id], rlzs_by_gsim[grp_id]) yield rgetter
def postproc(self): """ Build aggregate loss curves in process """ dstore = self.datastore self.before_export() # set 'realizations' oq = self.oqparam stats = self.param['stats'] # store avg_losses-stats if oq.avg_losses: set_rlzs_stats(self.datastore, 'avg_losses') try: b = self.param['builder'] except KeyError: # don't build auxiliary tables return if dstore.parent: dstore.parent.open('r') # to read the ruptures if 'ruptures' in self.datastore and len(self.datastore['ruptures']): logging.info('Building loss tables') with self.monitor('building loss tables', measuremem=True): rlt, lbr = build_loss_tables(dstore) dstore['rup_loss_table'] = rlt dstore['losses_by_rlzi'] = lbr ridx = [rlt[:, lti].argmax() for lti in range(self.L)] dstore.set_attrs('rup_loss_table', ridx=ridx) logging.info('Building aggregate loss curves') with self.monitor('building agg_curves', measuremem=True): lbr = group_array(dstore['losses_by_event'][()], 'rlzi') dic = {r: arr['loss'] for r, arr in lbr.items()} array, arr_stats = b.build(dic, stats) loss_types = ' '.join(oq.loss_dt().names) units = self.datastore['cost_calculator'].get_units(loss_types.split()) if oq.individual_curves or self.R == 1: self.datastore['agg_curves-rlzs'] = array self.datastore.set_attrs( 'agg_curves-rlzs', return_periods=b.return_periods, loss_types=loss_types, units=units) if arr_stats is not None: self.datastore['agg_curves-stats'] = arr_stats self.datastore.set_attrs( 'agg_curves-stats', return_periods=b.return_periods, stats=[encode(name) for (name, func) in stats], loss_types=loss_types, units=units)
def get_gmfs(dstore): """ :param dstore: a datastore :returns: a dictionary trt_id, gsid -> gmfa """ oq = dstore["oqparam"] if "gmfs" in oq.inputs: # from file logging.info("Reading gmfs from file") sitecol, etags, gmfs_by_imt = readinput.get_gmfs(oq) # reduce the gmfs matrices to the filtered sites for imt in oq.imtls: gmfs_by_imt[imt] = gmfs_by_imt[imt][sitecol.indices] logging.info("Preparing the risk input") return etags, {(0, "FromFile"): gmfs_by_imt} # else from datastore rlzs_assoc = dstore["csm_info"].get_rlzs_assoc() rlzs = rlzs_assoc.realizations sitecol = dstore["sitecol"] # NB: if the hazard site collection has N sites, the hazard # filtered site collection for the nonzero GMFs has N' <= N sites # whereas the risk site collection associated to the assets # has N'' <= N' sites if dstore.parent: haz_sitecol = dstore.parent["sitecol"] # N' values else: haz_sitecol = sitecol risk_indices = set(sitecol.indices) # N'' values N = len(haz_sitecol.complete) imt_dt = numpy.dtype([(bytes(imt), F32) for imt in oq.imtls]) E = oq.number_of_ground_motion_fields # build a matrix N x E for each GSIM realization gmfs = {(trt_id, gsim): numpy.zeros((N, E), imt_dt) for trt_id, gsim in rlzs_assoc} for i, rlz in enumerate(rlzs): data = general.group_array(dstore["gmf_data/%04d" % i], "sid") for sid, array in data.items(): if sid in risk_indices: for imti, imt in enumerate(oq.imtls): a = general.get_array(array, imti=imti) gs = str(rlz.gsim_rlz) gmfs[0, gs][imt][sid, a["eid"]] = a["gmv"] return dstore["etags"].value, gmfs
def export(self, mesh, sm_by_grp): """ Yield :class:`Rupture` objects, with all the attributes set, suitable for export in XML format. """ rupture = self.rupture events_by_ses = general.group_array(self.events, 'ses') new = ExportedRupture(self.serial, events_by_ses, self.sids) new.mesh = mesh[self.sids] new.multiplicity = self.multiplicity if isinstance(rupture.surface, geo.ComplexFaultSurface): new.typology = 'complexFaultsurface' elif isinstance(rupture.surface, geo.SimpleFaultSurface): new.typology = 'simpleFaultsurface' elif isinstance(rupture.surface, geo.GriddedSurface): new.typology = 'griddedRupture' elif isinstance(rupture.surface, geo.MultiSurface): new.typology = 'multiPlanesRupture' else: new.typology = 'singlePlaneRupture' new.is_from_fault_source = iffs = isinstance( rupture.surface, (geo.ComplexFaultSurface, geo.SimpleFaultSurface)) new.is_multi_surface = ims = isinstance( rupture.surface, geo.MultiSurface) new.lons, new.lats, new.depths = get_geom( rupture.surface, iffs, ims) new.surface = rupture.surface new.strike = rupture.surface.get_strike() new.dip = rupture.surface.get_dip() new.rake = rupture.rake new.hypocenter = rupture.hypocenter new.tectonic_region_type = rupture.tectonic_region_type new.magnitude = new.mag = rupture.mag new.top_left_corner = None if iffs or ims else ( new.lons[0], new.lats[0], new.depths[0]) new.top_right_corner = None if iffs or ims else ( new.lons[1], new.lats[1], new.depths[1]) new.bottom_left_corner = None if iffs or ims else ( new.lons[2], new.lats[2], new.depths[2]) new.bottom_right_corner = None if iffs or ims else ( new.lons[3], new.lats[3], new.depths[3]) return new
def export_all(self): fnames = [] imts = list(self.oq.imtls) for sm_id in self.dstore['gmf_data']: events = self.dstore['events/' + sm_id] etag = dict(zip(range(len(events)), build_etags(events))) for rlzno in self.dstore['gmf_data/' + sm_id]: rlz = self.rlzs[int(rlzno)] gmf = self.dstore['gmf_data/%s/%s' % (sm_id, rlzno)].value for eid, array in group_array(gmf, 'eid').items(): data, comment = _build_csv_data( array, rlz, self.sitecol, imts, self.oq.investigation_time) fname = self.dstore.build_fname( 'gmf', '%s-rlz-%03d' % (etag[eid], rlz.ordinal), 'csv') logging.info('Exporting %s', fname) writers.write_csv(fname, data, comment=comment) fnames.append(fname) return fnames
def __fromh5__(self, dic, attrs): tm_data = group_array(dic['tm_data'], 'sm_id') sm_data = dic['sm_data'] vars(self).update(attrs) self.source_models = [] for sm_id, rec in enumerate(sm_data): tdata = tm_data[sm_id] trtmodels = [ TrtModel(self.trts[trti], id=trt_id, eff_ruptures=effrup) for trt_id, trti, effrup, sm_id in tdata if effrup > 0] path = tuple(rec['path'].split('_')) trts = set(tm.trt for tm in trtmodels) if self.gsim_fname.endswith('.xml'): gsim_lt = logictree.GsimLogicTree(self.gsim_fname, trts) else: # fake file with the name of the GSIM gsim_lt = logictree.GsimLogicTree.from_(self.gsim_fname) sm = SourceModel(rec['name'], rec['weight'], path, trtmodels, gsim_lt, sm_id, rec['samples']) self.source_models.append(sm)
def gen_rgetters(dstore, slc=slice(None), erf=False): """ :yields: unfiltered RuptureGetters """ full_lt = dstore['full_lt'] trt_by_grp = full_lt.trt_by_grp samples = full_lt.get_samples_by_grp() rlzs_by_gsim = full_lt.get_rlzs_by_gsim_grp() rup_array = dstore['rup'][slc] if erf else dstore['ruptures'][slc] nr = len(dstore['rup']) if erf else len(dstore['ruptures']) for grp_id, arr in general.group_array(rup_array, 'grp_id').items(): if not rlzs_by_gsim.get(grp_id, []): # the model has no sources continue for block in general.split_in_blocks(arr, len(arr) / nr): rgetter = RuptureGetter([RuptureProxy(rec) for rec in block], dstore.filename, grp_id, trt_by_grp[grp_id], samples[grp_id], rlzs_by_gsim[grp_id]) yield rgetter
def get_assets_by_taxo(assets, tempname=None): """ :param assets: an array of assets :param tempname: hdf5 file where the epsilons are (or None) :returns: assets_by_taxo with attributes eps and idxs """ assets_by_taxo = AccumDict(group_array(assets, 'taxonomy')) assets_by_taxo.idxs = numpy.argsort( numpy.concatenate([a['ordinal'] for a in assets_by_taxo.values()])) assets_by_taxo.eps = {} if tempname is None: # no epsilons return assets_by_taxo # otherwise read the epsilons and group them by taxonomy with hdf5.File(tempname, 'r') as h5: dset = h5['epsilon_matrix'] for taxo, assets in assets_by_taxo.items(): lst = [dset[aid] for aid in assets['ordinal']] assets_by_taxo.eps[taxo] = numpy.array(lst) return assets_by_taxo
def __init__(self, imtls, ampl_funcs, amplevels=None): fname = getattr(ampl_funcs, 'fname', None) self.imtls = imtls self.periods, levels = check_same_levels(imtls) self.amplevels = levels if amplevels is None else amplevels self.midlevels = numpy.diff(levels) / 2 + levels[:-1] # mid levels self.vs30_ref = ampl_funcs.vs30_ref has_levels = 'level' in ampl_funcs.dtype.names if has_levels: self.imls = imls = numpy.array(sorted(set(ampl_funcs['level']))) check_unique(ampl_funcs.array, ['ampcode', 'level'], fname) else: self.imls = imls = () check_unique(ampl_funcs.array, ['ampcode'], fname) cols = (ampl_funcs.dtype.names[2:] if has_levels else ampl_funcs.dtype.names[1:]) imts = [from_string(imt) for imt in cols if not imt.startswith('sigma_')] m_indices = digitize( 'period', self.periods, [imt.period for imt in imts]) if len(imls) <= 1: # 1 level means same values for all levels l_indices = [0] else: l_indices = digitize('level', self.midlevels, imls) L = len(l_indices) self.imtdict = {imt: str(imts[m]) for m, imt in zip(m_indices, imtls)} self.alpha = {} # code, imt -> alphas self.sigma = {} # code, imt -> sigmas self.ampcodes = [] for code, arr in group_array(ampl_funcs, 'ampcode').items(): self.ampcodes.append(code) for m in set(m_indices): im = str(imts[m]) self.alpha[code, im] = alpha = numpy.zeros(L) self.sigma[code, im] = sigma = numpy.zeros(L) idx = 0 for rec in arr[l_indices]: alpha[idx] = rec[im] try: sigma[idx] = rec['sigma_' + im] except ValueError: # missing sigma pass idx += 1
def export_gmf_data_csv(ekey, dstore): oq = dstore['oqparam'] rlzs_assoc = dstore['csm_info'].get_rlzs_assoc() if 'scenario' in oq.calculation_mode: imtls = dstore['oqparam'].imtls gsims = [str(rlz.gsim_rlz) for rlz in rlzs_assoc.realizations] n_gmfs = oq.number_of_ground_motion_fields fields = ['%03d' % i for i in range(n_gmfs)] dt = numpy.dtype([(f, F32) for f in fields]) etags, gmfs_ = calc.get_gmfs(dstore) sitemesh = get_mesh(dstore['sitecol']) writer = writers.CsvWriter(fmt='%.5f') for gsim, gmfa in zip(gsims, gmfs_): # gmfa of shape (N, I, E) for imti, imt in enumerate(imtls): gmfs = numpy.zeros(len(gmfa), dt) for e, event in enumerate(dt.names): gmfs[event] = gmfa[:, imti, e] dest = dstore.build_fname('gmf', '%s-%s' % (gsim, imt), 'csv') data = util.compose_arrays(sitemesh, gmfs) writer.save(data, dest) return writer.getsaved() else: # event based eid = int(ekey[0].split('/')[1]) if '/' in ekey[0] else None gmfa = numpy.fromiter( GmfDataGetter.gen_gmfs(dstore['gmf_data'], rlzs_assoc, eid), gmf_data_dt) if eid is None: # new format fname = dstore.build_fname('gmf', 'data', 'csv') gmfa.sort(order=['rlzi', 'sid', 'eid', 'imti']) writers.write_csv(fname, gmfa) return [fname] # old format for single eid fnames = [] imts = list(oq.imtls) for rlzi, array in group_array(gmfa, 'rlzi').items(): rlz = rlzs_assoc.realizations[rlzi] data, comment = _build_csv_data(array, rlz, dstore['sitecol'], imts, oq.investigation_time) fname = dstore.build_fname('gmf', '%d-rlz-%03d' % (eid, rlzi), 'csv') writers.write_csv(fname, data, comment=comment) fnames.append(fname) return fnames
def get_assets_by_taxo(assets, epspath=None): """ :param assets: an array of assets :param epspath: hdf5 file where the epsilons are (or None) :returns: assets_by_taxo with attributes eps and idxs """ assets_by_taxo = AccumDict(group_array(assets, 'taxonomy')) assets_by_taxo.idxs = numpy.argsort(numpy.concatenate([ a['ordinal'] for a in assets_by_taxo.values()])) assets_by_taxo.eps = {} if epspath is None: # no epsilons return assets_by_taxo # otherwise read the epsilons and group them by taxonomy with hdf5.File(epspath, 'r') as h5: dset = h5['epsilon_matrix'] for taxo, assets in assets_by_taxo.items(): lst = [dset[aid] for aid in assets['ordinal']] assets_by_taxo.eps[taxo] = numpy.array(lst) return assets_by_taxo
def compute_gmfs_curves(self, monitor): """ :returns: a dict with keys gmfdata, indices, hcurves """ oq = self.oqparam with monitor('GmfGetter.init', measuremem=True): self.init() hcurves = {} # key -> poes if oq.hazard_curves_from_gmfs: hc_mon = monitor('building hazard curves', measuremem=False) duration = oq.investigation_time * oq.ses_per_logic_tree_path with monitor('building hazard', measuremem=True): gmfdata = self.get_gmfdata() # returned later hazard = self.get_hazard(data=gmfdata) for sid, hazardr in hazard.items(): dic = general.group_array(hazardr, 'rlzi') for rlzi, array in dic.items(): with hc_mon: gmvs = array['gmv'] for imti, imt in enumerate(oq.imtls): poes = _gmvs_to_haz_curve(gmvs[:, imti], oq.imtls[imt], oq.investigation_time, duration) hcurves[rsi2str(rlzi, sid, imt)] = poes elif oq.ground_motion_fields: # fast lane with monitor('building hazard', measuremem=True): gmfdata = self.get_gmfdata() else: return {} indices = [] gmfdata.sort(order=('sid', 'rlzi', 'eid')) start = stop = 0 for sid, rows in itertools.groupby(gmfdata['sid']): for row in rows: stop += 1 indices.append((sid, start, stop)) start = stop res = dict(gmfdata=gmfdata, hcurves=hcurves, indices=numpy.array(indices, (U32, 3))) return res
def import_gmfs(dstore, fname, sids): """ Import in the datastore a ground motion field CSV file. :param dstore: the datastore :param fname: the CSV file :param sids: the site IDs (complete) :returns: event_ids, num_rlzs """ array = writers.read_composite_array(fname).array # has header rlzi, sid, eid, gmv_PGA, ... imts = [name[4:] for name in array.dtype.names[3:]] n_imts = len(imts) gmf_data_dt = numpy.dtype( [('rlzi', U16), ('sid', U32), ('eid', U64), ('gmv', (F32, (n_imts,)))]) # store the events eids = numpy.unique(array['eid']) eids.sort() E = len(eids) eid2idx = dict(zip(eids, range(E))) events = numpy.zeros(E, rupture.events_dt) events['id'] = eids dstore['events'] = events # store the GMFs dic = general.group_array(array.view(gmf_data_dt), 'sid') lst = [] offset = 0 for sid in sids: n = len(dic.get(sid, [])) lst.append((offset, offset + n)) if n: offset += n gmvs = dic[sid] gmvs['eid'] = get_idxs(gmvs, eid2idx) gmvs['rlzi'] = 0 # effectively there is only 1 realization dstore.extend('gmf_data/data', gmvs) dstore['gmf_data/indices'] = numpy.array(lst, U32) dstore['gmf_data/imts'] = ' '.join(imts) sig_eps_dt = [('eid', U64), ('sig', (F32, n_imts)), ('eps', (F32, n_imts))] dstore['gmf_data/sigma_epsilon'] = numpy.zeros(0, sig_eps_dt) dstore['weights'] = numpy.ones(1) return eids
def compute_gmfs_curves(self, monitor): """ :returns: a dict with keys gmfdata, indices, hcurves """ oq = self.oqparam with monitor('GmfGetter.init', measuremem=True): self.init() hcurves = {} # key -> poes if oq.hazard_curves_from_gmfs: hc_mon = monitor('building hazard curves', measuremem=False) with monitor('building hazard', measuremem=True): gmfdata = self.get_gmfdata() # returned later hazard = self.get_hazard(data=gmfdata) for sid, hazardr in hazard.items(): dic = general.group_array(hazardr, 'rlzi') for rlzi, array in dic.items(): with hc_mon: gmvs = array['gmv'] for imti, imt in enumerate(oq.imtls): poes = _gmvs_to_haz_curve( gmvs[:, imti], oq.imtls[imt], oq.ses_per_logic_tree_path) hcurves[rsi2str(rlzi, sid, imt)] = poes elif oq.ground_motion_fields: # fast lane with monitor('building hazard', measuremem=True): gmfdata = self.get_gmfdata() else: return {} if len(gmfdata) == 0: return dict(gmfdata=[]) indices = [] gmfdata.sort(order=('sid', 'rlzi', 'eid')) start = stop = 0 for sid, rows in itertools.groupby(gmfdata['sid']): for row in rows: stop += 1 indices.append((sid, start, stop)) start = stop res = dict(gmfdata=gmfdata, hcurves=hcurves, sig_eps=numpy.array(self.sig_eps, self.sig_eps_dt), indices=numpy.array(indices, (U32, 3))) return res
def gen_rgetters(dstore, slc=slice(None)): """ :yields: unfiltered RuptureGetters """ csm_info = dstore['csm_info'] trt_by_grp = csm_info.grp_by("trt") samples = csm_info.get_samples_by_grp() rlzs_by_gsim = csm_info.get_rlzs_by_gsim_grp() rup_array = dstore['ruptures'][slc] ct = dstore['oqparam'].concurrent_tasks or 1 nr = len(dstore['ruptures']) for grp_id, arr in general.group_array(rup_array, 'grp_id').items(): if not rlzs_by_gsim[grp_id]: # the model has no sources continue for block in general.split_in_blocks(arr, len(arr) / nr * ct): rgetter = RuptureGetter([RuptureProxy(rec) for rec in block], dstore.filename, grp_id, trt_by_grp[grp_id], samples[grp_id], rlzs_by_gsim[grp_id]) yield rgetter
def __fromh5__(self, dic, attrs): tm_data = group_array(dic['tm_data'], 'sm_id') sm_data = dic['sm_data'] vars(self).update(attrs) self.source_models = [] for sm_id, rec in enumerate(sm_data): tdata = tm_data[sm_id] trtmodels = [ TrtModel(self.trts[trti], id=trt_id, eff_ruptures=effrup) for trt_id, trti, effrup, sm_id in tdata if effrup > 0 ] path = tuple(rec['path'].split('_')) trts = set(tm.trt for tm in trtmodels) if self.gsim_fname.endswith('.xml'): gsim_lt = logictree.GsimLogicTree(self.gsim_fname, trts) else: # fake file with the name of the GSIM gsim_lt = logictree.GsimLogicTree.from_(self.gsim_fname) sm = SourceModel(rec['name'], rec['weight'], path, trtmodels, gsim_lt, sm_id, rec['samples']) self.source_models.append(sm)
def export_gmf_scenario_npz(ekey, dstore): oq = dstore['oqparam'] dic = {} fname = dstore.export_path('%s.%s' % ekey) if 'scenario' in oq.calculation_mode: # compute the GMFs on the fly from the stored rupture # NB: for visualization purposes we want to export the full mesh # of points, including the ones outside the maximum distance # NB2: in the future, I want to add a sitecol output, then the # visualization of the mesh will be possibile even without the GMFs; # in the future, here we will change # sitemesh = get_mesh(dstore['sitecol'], complete=False) sitecol = dstore['sitecol'].complete sitemesh = get_mesh(sitecol) rlzs_assoc = dstore['csm_info'].get_rlzs_assoc() gsims = rlzs_assoc.gsims_by_grp_id[0] # there is a single grp_id E = oq.number_of_ground_motion_fields correl_model = oq.get_correl_model() [ebrupture] = calc.get_ruptures(dstore, 0) computer = gmf.GmfComputer( ebrupture, sitecol, oq.imtls, gsims, oq.truncation_level, correl_model) gmf_dt = numpy.dtype([(imt, (F32, (E,))) for imt in oq.imtls]) imts = list(oq.imtls) for gsim in gsims: arr = computer.compute(gsim, E, oq.random_seed) I, S, E = arr.shape # #IMTs, #sites, #events gmfa = numpy.zeros(S, gmf_dt) for imti, imt in enumerate(imts): gmfa[imt] = arr[imti] dic[str(gsim)] = util.compose_arrays(sitemesh, gmfa) elif 'event_based' in oq.calculation_mode: dic['sitemesh'] = get_mesh(dstore['sitecol']) for grp in sorted(dstore['gmf_data']): data_by_rlzi = group_array(dstore['gmf_data/' + grp].value, 'rlzi') for rlzi in data_by_rlzi: dic['rlz-%03d' % rlzi] = data_by_rlzi[rlzi] else: # nothing to export return [] savez(fname, **dic) return [fname]
def view_dupl_sources_time(token, dstore): """ Display the time spent computing duplicated sources """ info = dstore['source_info'] items = sorted(group_array(info[()], 'source_id').items()) tbl = [] tot_time = 0 for source_id, records in items: if len(records) > 1: # dupl calc_time = records['calc_time'].sum() tot_time += calc_time + records['split_time'].sum() tbl.append((source_id, calc_time, len(records))) if tbl and info.attrs.get('has_dupl_sources'): tot = info['calc_time'].sum() + info['split_time'].sum() percent = tot_time / tot * 100 m = '\nTotal time in duplicated sources: %d/%d (%d%%)' % ( tot_time, tot, percent) return rst_table(tbl, ['source_id', 'calc_time', 'num_dupl']) + m else: return 'There are no duplicated sources'
def export_gmf_txt(key, dest, sitecol, imts, ruptures, rlz, investigation_time): """ :param key: output_type and export_type :param dest: name of the exported file :param sitecol: the full site collection :param imts: the list of intensity measure types :param ruptures: an ordered list of ruptures :param rlz: a realization object :param investigation_time: investigation time (None for scenario) """ # the csv file has the form # etag,indices,gmvs_imt_1,...,gmvs_imt_N rows = [] for rupture in ruptures: indices = rupture.indices gmvs = [a['gmv'] for a in group_array(rupture.gmfa, 'imti').values()] row = [rupture.etag, ' '.join(map(str, indices))] + gmvs rows.append(row) write_csv(dest, rows) return {key: [dest]}
def view_dupl_sources_time(token, dstore): """ Display the time spent computing duplicated sources """ info = dstore['source_info'] items = sorted(group_array(info[()], 'source_id').items()) tbl = [] tot_time = 0 for source_id, records in items: if len(records) > 1: # dupl calc_time = records['calc_time'].sum() tot_time += calc_time + records['split_time'].sum() tbl.append((source_id, calc_time, len(records))) if tbl and info.attrs.get('has_dupl_sources'): tot = info['calc_time'].sum() + info['split_time'].sum() percent = tot_time / tot * 100 m = '\nTotal time in duplicated sources: %d/%d (%d%%)' % (tot_time, tot, percent) return rst_table(tbl, ['source_id', 'calc_time', 'num_dupl']) + m else: return 'There are no duplicated sources'
def test_spatial_correlation(self): expected = {sc1: [0.99, 0.41], sc2: [0.99, 0.64], sc3: [0.99, 0.22]} for case in expected: self.run_calc(case.__file__, 'job.ini') oq = self.calc.oqparam self.assertEqual(list(oq.imtls), ['PGA']) dstore = read(self.calc.datastore.calc_id) gmf = group_array(dstore['gmf_data/data'], 'sid') gmvs_site_0 = gmf[0]['gmv'] gmvs_site_1 = gmf[1]['gmv'] joint_prob_0_5 = joint_prob_of_occurrence( gmvs_site_0, gmvs_site_1, 0.5, oq.investigation_time, oq.ses_per_logic_tree_path) joint_prob_1_0 = joint_prob_of_occurrence( gmvs_site_0, gmvs_site_1, 1.0, oq.investigation_time, oq.ses_per_logic_tree_path) p05, p10 = expected[case] numpy.testing.assert_almost_equal(joint_prob_0_5, p05, decimal=1) numpy.testing.assert_almost_equal(joint_prob_1_0, p10, decimal=1)
def import_gmfs(dstore, fname, sids): """ Import in the datastore a ground motion field CSV file. :param dstore: the datastore :param fname: the CSV file :param sids: the site IDs (complete) :returns: event_ids, num_rlzs """ array = writers.read_composite_array(fname).array # has header rlzi, sid, eid, gmv_PGA, ... imts = [name[4:] for name in array.dtype.names[3:]] n_imts = len(imts) gmf_data_dt = numpy.dtype( [('rlzi', U16), ('sid', U32), ('eid', U64), ('gmv', (F32, (n_imts,)))]) # store the events eids = numpy.unique(array['eid']) eids.sort() E = len(eids) eid2idx = dict(zip(eids, range(E))) events = numpy.zeros(E, rupture.events_dt) events['eid'] = eids dstore['events'] = events # store the GMFs dic = general.group_array(array.view(gmf_data_dt), 'sid') lst = [] offset = 0 for sid in sids: n = len(dic.get(sid, [])) lst.append((offset, offset + n)) if n: offset += n gmvs = dic[sid] gmvs['eid'] = get_idxs(gmvs, eid2idx) gmvs['rlzi'] = 0 # effectively there is only 1 realization dstore.extend('gmf_data/data', gmvs) dstore['gmf_data/indices'] = numpy.array(lst, U32) dstore['gmf_data/imts'] = ' '.join(imts) return eids
def view_dupl_sources(token, dstore): """ Display the duplicated sources from source_info """ info = dstore['source_info'] items = sorted(group_array(info.value, 'source_id').items()) tbl = [] tot_calc_time = 0 for source_id, records in items: if len(records) > 1: # dupl calc_time = records['calc_time'].sum() tot_calc_time += calc_time grp_ids = sorted(rec['grp_id'] for rec in records) tbl.append((source_id, calc_time, grp_ids)) if tbl and info.attrs['has_dupl_sources']: tot = info['calc_time'].sum() percent = tot_calc_time / tot * 100 m = '\nTotal calc_time in duplicated sources: %d/%d (%d%%)' % ( tot_calc_time, tot, percent) return rst_table(tbl, ['source_id', 'calc_time', 'src_group_ids']) + m else: return 'There are no duplicated sources'
def build(self, losses_by_event, stats=()): """ :param losses_by_event: the aggregate loss table as an array :param stats: list of pairs [(statname, statfunc), ...] :returns: two arrays of dtype loss_dt values with shape (P, R) and (P, S) """ P, R = len(self.return_periods), len(self.weights) array = numpy.zeros((P, R), self.loss_dt) dic = group_array(losses_by_event, 'rlzi') for r in dic: num_events = self.num_events[r] losses = dic[r]['loss'] for lti, lt in enumerate(self.loss_dt.names): ls = losses[:, lti].flatten() # flatten only in ucerf # NB: do not use squeeze or the gmf_ebrisk tests will break lbp = losses_by_period(ls, self.return_periods, num_events, self.eff_time) array[:, r][lt] = lbp return self.pair(array, stats)
def save_gmf_data(dstore, sitecol, gmfs, eids=()): """ :param dstore: a :class:`openquake.baselib.datastore.DataStore` instance :param sitecol: a :class:`openquake.hazardlib.site.SiteCollection` instance :param gmfs: an array of shape (R, N, E, M) :param eids: E event IDs or the empty tuple """ offset = 0 dstore['gmf_data/data'] = gmfa = get_gmv_data(sitecol.sids, gmfs) dic = general.group_array(gmfa, 'sid') lst = [] for sid in sitecol.complete.sids: rows = dic.get(sid, ()) n = len(rows) lst.append(numpy.array([(offset, offset + n)], riskinput.indices_dt)) offset += n dstore.save_vlen('gmf_data/indices', lst) dstore.set_attrs('gmf_data', num_gmfs=len(gmfs)) if len(eids): # store the events events = numpy.zeros(len(eids), readinput.stored_event_dt) events['eid'] = eids dstore['events'] = events
def __fromh5__(self, dic, attrs): # TODO: this is called more times than needed, maybe we should cache it sg_data = group_array(dic['sg_data'], 'sm_id') sm_data = dic['sm_data'] vars(self).update(attrs) self.gsim_lt = dic['gsim_lt'] self.source_models = [] for sm_id, rec in enumerate(sm_data): tdata = sg_data[sm_id] srcgroups = [ sourceconverter.SourceGroup( self.trts[data['trti']], id=data['grp_id'], name=get_field(data, 'name', ''), eff_ruptures=data['effrup'], tot_ruptures=get_field(data, 'totrup', 0)) for data in tdata] path = tuple(str(decode(rec['path'])).split('_')) sm = logictree.LtSourceModel( rec['name'], rec['weight'], path, srcgroups, rec['num_rlzs'], sm_id, rec['samples']) self.source_models.append(sm) self.init()
def import_gmfs(dstore, fname, sids): """ Import in the datastore a ground motion field CSV file. :param dstore: the datastore :param fname: the CSV file :param sids: the site IDs (complete) :returns: event_ids, num_rlzs """ array = writers.read_composite_array(fname).array # has header rlzi, sid, eid, gmv_PGA, ... imts = [name[4:] for name in array.dtype.names[3:]] n_imts = len(imts) gmf_data_dt = numpy.dtype( [('rlzi', U16), ('sid', U32), ('eid', U64), ('gmv', (F32, (n_imts,)))]) # store the events eids = numpy.unique(array['eid']) eids.sort() events = numpy.zeros(len(eids), rupture.events_dt) events['eid'] = eids dstore['events'] = events # store the GMFs dic = general.group_array(array.view(gmf_data_dt), 'sid') lst = [] offset = 0 for sid in sids: n = len(dic.get(sid, [])) lst.append((offset, offset + n)) if n: offset += n dstore.extend('gmf_data/data', dic[sid]) dstore['gmf_data/indices'] = numpy.array(lst, U32) dstore['gmf_data/imts'] = ' '.join(imts) # FIXME: if there is no data for the maximum realization # the inferred number of realizations will be wrong num_rlzs = array['rlzi'].max() + 1 return eids, num_rlzs
def view_dupl_sources(token, dstore): """ Show the sources with the same ID and the truly duplicated sources """ array = dstore['source_info']['source_id', 'checksum', 'num_ruptures'] dic = group_array(array, 'source_id', 'checksum') dupl = [] uniq = [] muls = [] nr = 0 for (source_id, checksum), group in dic.items(): mul = len(group) nr += group[0]['num_ruptures'] if mul > 1: # duplicate muls.append(mul) dupl.append(source_id) else: uniq.append(source_id) if not dupl: return String('', nr) u, d, m = len(uniq), len(dupl), sum(muls) / len(dupl) return String('Found %d unique sources and %d duplicate sources with' ' multiplicity %.1f: %s' % (u, d, m, numpy.array(dupl)), nr)
def gen_rupture_getters(dstore, slc=slice(None), concurrent_tasks=1, filename=None): """ :yields: RuptureGetters """ try: e0s = dstore['eslices'][:, 0] except KeyError: e0s = None if dstore.parent: dstore = dstore.parent csm_info = dstore['csm_info'] trt_by_grp = csm_info.grp_by("trt") samples = csm_info.get_samples_by_grp() rlzs_by_gsim = csm_info.get_rlzs_by_gsim_grp() rup_array = dstore['ruptures'][slc] maxweight = numpy.ceil(len(rup_array) / (concurrent_tasks or 1)) nr, ne = 0, 0 for grp_id, arr in general.group_array(rup_array, 'grp_id').items(): if not rlzs_by_gsim[grp_id]: # this may happen if a source model has no sources, like # in event_based_risk/case_3 continue for block in general.block_splitter(arr, maxweight): if e0s is None: e0 = numpy.zeros(len(block), U32) else: e0 = e0s[nr:nr + len(block)] rgetter = RuptureGetter(numpy.array(block), filename or dstore.filename, grp_id, trt_by_grp[grp_id], samples[grp_id], rlzs_by_gsim[grp_id], e0) yield rgetter nr += len(block) ne += rgetter.num_events
def __fromh5__(self, dic, attrs): # TODO: this is called more times than needed, maybe we should cache it sg_data = group_array(dic['sg_data'], 'sm_id') sm_data = dic['sm_data'] vars(self).update(attrs) self.gsim_fname = decode(self.gsim_fname) if self.gsim_fname.endswith('.xml'): # otherwise it would look in the current directory GMPETable.GMPE_DIR = os.path.dirname(self.gsim_fname) trts = sorted(self.trts) tmp = gettemp(self.gsim_lt_xml, suffix='.xml') self.gsim_lt = logictree.GsimLogicTree(tmp, trts) else: # fake file with the name of the GSIM self.gsim_lt = logictree.GsimLogicTree.from_(self.gsim_fname) self.source_models = [] for sm_id, rec in enumerate(sm_data): tdata = sg_data[sm_id] srcgroups = [ sourceconverter.SourceGroup(self.trts[data['trti']], id=data['grp_id'], name=get_field(data, 'name', ''), eff_ruptures=data['effrup'], tot_ruptures=get_field( data, 'totrup', 0)) for data in tdata if data['effrup'] ] path = tuple(str(decode(rec['path'])).split('_')) trts = set(sg.trt for sg in srcgroups) sm = logictree.LtSourceModel(rec['name'], rec['weight'], path, srcgroups, rec['num_rlzs'], sm_id, rec['samples']) self.source_models.append(sm) self.init() try: os.remove(tmp) # gsim_lt file except NameError: # tmp is defined only in the regular case, see above pass
def save_gmf_data(dstore, sitecol, gmfs, imts, events=()): """ :param dstore: a :class:`openquake.baselib.datastore.DataStore` instance :param sitecol: a :class:`openquake.hazardlib.site.SiteCollection` instance :param gmfs: an array of shape (N, E, M) :param imts: a list of IMT strings :param events: E event IDs or the empty tuple """ if len(events) == 0: E = gmfs.shape[1] events = numpy.zeros(E, rupture.events_dt) events['id'] = numpy.arange(E, dtype=U32) dstore['events'] = events offset = 0 # convert an array of shape (N, E, M) into an array of type gmv_data_dt N, E, M = gmfs.shape lst = [(sitecol.sids[s], ei, gmfs[s, ei]) for s in numpy.arange(N, dtype=U32) for ei, event in enumerate(events)] gmfa = numpy.array(lst, dstore['oqparam'].gmf_data_dt()) dstore['gmf_data/sid'] = gmfa['sid'] dstore['gmf_data/eid'] = gmfa['eid'] cols = ['sid', 'eid'] for m in range(M): col = f'gmv_{m}' cols.append(col) dstore['gmf_data/' + col] = gmfa['gmv'][:, m] dstore.getitem('gmf_data').attrs['__pdcolumns__'] = ' '.join(cols) dic = general.group_array(gmfa, 'sid') lst = [] all_sids = sitecol.complete.sids for sid in all_sids: rows = dic.get(sid, ()) n = len(rows) lst.append((offset, offset + n)) offset += n dstore['gmf_data/imts'] = ' '.join(imts)
def compute_gmfs_and_curves(eb_ruptures, sitecol, imts, rlzs_assoc, min_iml, monitor): """ :param eb_ruptures: a list of blocks of EBRuptures of the same SESCollection :param sitecol: a :class:`openquake.hazardlib.site.SiteCollection` instance :param imts: a list of IMT string :param rlzs_assoc: a RlzsAssoc instance :param monitor: a Monitor instance :returns: a dictionary (rlzi, imt) -> [gmfarray, haz_curves] """ oq = monitor.oqparam # NB: by construction each block is a non-empty list with # ruptures of the same trt_model_id trunc_level = oq.truncation_level correl_model = readinput.get_correl_model(oq) gmfadict = create(GmfColl, eb_ruptures, sitecol, imts, rlzs_assoc, trunc_level, correl_model, min_iml, monitor).by_rlzi() result = { rlzi: [gmfadict[rlzi], None] if oq.ground_motion_fields else [None, None] for rlzi in gmfadict } if oq.hazard_curves_from_gmfs: with monitor('bulding hazard curves', measuremem=False): duration = oq.investigation_time * oq.ses_per_logic_tree_path for rlzi in gmfadict: gmvs_by_sid = group_array(gmfadict[rlzi], 'sid') result[rlzi][POEMAP] = gmvs_to_poe_map(gmvs_by_sid, oq.imtls, oq.investigation_time, duration) return result