def get_amplification(oqparam): """ :returns: a DataFrame (ampcode, level, PGA, SA() ...) """ fname = oqparam.inputs['amplification'] df = hdf5.read_csv(fname, {'ampcode': site.ampcode_dt, None: F64}, index='ampcode') df.fname = fname return df
def _read_csv(self, errors=None): """ :yields: asset nodes """ expected_header = set(self._csv_header('', '')) floatfields = set() strfields = self.tagcol.tagnames + self.occupancy_periods.split() for fname in self.datafiles: with open(fname, encoding='utf-8-sig', errors=errors) as f: try: fields = next(csv.reader(f)) except UnicodeDecodeError: msg = ("%s is not encoded as UTF-8\ntry oq shell " "and then o.fix_latin1('%s')\nor set " "ignore_encoding_errors=true" % (fname, fname)) raise RuntimeError(msg) header = set(self.fieldmap.get(f, f) for f in fields) for field in fields: if field not in strfields: floatfields.add(field) missing = expected_header - header - {'exposure', 'country'} if len(header) < len(fields): raise InvalidFile( '%s: The header %s contains a duplicated field' % (fname, header)) elif missing: raise InvalidFile('%s: missing %s' % (fname, missing)) conv = { 'lon': float, 'lat': float, 'number': float, 'area': float, 'retrofitted': float, None: object } for f in strfields: conv[f] = str revmap = {} # oq -> inp for inp, oq in self.fieldmap.items(): revmap[oq] = inp if oq in conv: conv[inp] = conv[oq] rename = self.fieldmap.copy() for field in self.cost_types['name']: f = revmap.get(field, field) conv[f] = float rename[f] = 'value-' + field for field in self.occupancy_periods.split(): f = revmap.get(field, field) conv[f] = float rename[f] = 'occupants_' + field for fname in self.datafiles: array = hdf5.read_csv(fname, conv, rename, errors=errors).array array['lon'] = numpy.round(array['lon'], 5) array['lat'] = numpy.round(array['lat'], 5) yield from array
def get_site_model(oqparam): """ Convert the NRML file into an array of site parameters. :param oqparam: an :class:`openquake.commonlib.oqvalidation.OqParam` instance :returns: an array with fields lon, lat, vs30, ... """ req_site_params = get_gsim_lt(oqparam).req_site_params arrays = [] for fname in oqparam.inputs['site_model']: if isinstance(fname, str) and fname.endswith('.csv'): sm = hdf5.read_csv(fname, { None: float, 'vs30measured': numpy.uint8 }).array if 'site_id' in sm.dtype.names: raise InvalidFile('%s: you passed a sites.csv file instead of ' 'a site_model.csv file!' % fname) z = numpy.zeros(len(sm), sorted(sm.dtype.descr)) for name in z.dtype.names: # reorder the fields z[name] = sm[name] arrays.append(z) continue nodes = nrml.read(fname).siteModel params = [valid.site_param(node.attrib) for node in nodes] missing = req_site_params - set(params[0]) if 'vs30measured' in missing: # use a default of False missing -= {'vs30measured'} for param in params: param['vs30measured'] = False if 'backarc' in missing: # use a default of False missing -= {'backarc'} for param in params: param['backarc'] = False if missing: raise InvalidFile( '%s: missing parameter %s' % (oqparam.inputs['site_model'], ', '.join(missing))) # NB: the sorted in sorted(params[0]) is essential, otherwise there is # an heisenbug in scenario/test_case_4 site_model_dt = numpy.dtype([(p, site.site_param_dt[p]) for p in sorted(params[0])]) sm = numpy.array([ tuple(param[name] for name in site_model_dt.names) for param in params ], site_model_dt) dupl = "\n".join('%s %s' % loc for loc, n in countby(sm, 'lon', 'lat').items() if n > 1) if dupl: raise InvalidFile('There are duplicated sites in %s:\n%s' % (fname, dupl)) arrays.append(sm) return numpy.concatenate(arrays)
def test_dupl(self): fname = gettemp(dupl_ampl_func) df = read_csv(fname, { 'ampcode': ampcode_dt, None: numpy.float64 }, index='ampcode') with self.assertRaises(ValueError) as ctx: Amplifier(self.imtls, df, self.soil_levels) self.assertEqual(str(ctx.exception), "Found duplicates for b'A'")
def test_case_7a(self): # case with preimported exposure self.run_calc(case_7a.__file__, 'job_h.ini') self.run_calc(case_7a.__file__, 'job_r.ini', hazard_calculation_id=str(self.calc.datastore.calc_id)) [fname] = export(('losses_by_event', 'csv'), self.calc.datastore) self.assertEqualFiles('expected/agg_losses.csv', fname) rup_ids = set(read_csv(fname, {None: '<S50'})['rup_id']) [fname] = export(('tot_curves-rlzs', 'csv'), self.calc.datastore) self.assertEqualFiles('expected/agg_curves.csv', fname) # check that the IDs in losses_by_event.csv exist in ruptures.csv [fname] = export(('ruptures', 'csv'), self.calc.datastore) rupids = set(read_csv(fname, {None: '<S50'})['rup_id']) self.assertTrue( rup_ids <= rupids, 'There are non-existing rupture IDs' ' in the event loss table!')
def import_gmfs(dstore, fname, sids): """ Import in the datastore a ground motion field CSV file. :param dstore: the datastore :param fname: the CSV file :param sids: the site IDs (complete) :returns: event_ids, num_rlzs """ array = hdf5.read_csv(fname, {'sid': U32, 'eid': U32, None: F32}).array names = array.dtype.names if names[0] == 'rlzi': # backward compatbility names = names[1:] # discard the field rlzi imts = [name[4:] for name in names[2:]] oq = dstore['oqparam'] missing = set(oq.imtls) - set(imts) if missing: raise ValueError('The calculation needs %s which is missing from %s' % (', '.join(missing), fname)) imt2idx = {imt: i for i, imt in enumerate(oq.imtls)} arr = numpy.zeros(len(array), oq.gmf_data_dt()) for name in names: if name.startswith('gmv_'): try: m = imt2idx[name[4:]] except KeyError: # the file contains more than enough IMTs pass else: arr['gmv'][:, m] = array[name] else: arr[name] = array[name] # store the events eids = numpy.unique(array['eid']) eids.sort() E = len(eids) events = numpy.zeros(E, rupture.events_dt) events['id'] = eids dstore['events'] = events # store the GMFs dic = general.group_array(arr, 'sid') lst = [] offset = 0 gmvlst = [] for sid in sids: n = len(dic.get(sid, [])) lst.append((offset, offset + n)) if n: offset += n gmvs = dic[sid] gmvlst.append(gmvs) dstore['gmf_data/data'] = numpy.concatenate(gmvlst) dstore['gmf_data/indices'] = numpy.array(lst, U32) dstore['gmf_data/imts'] = ' '.join(imts) dstore['weights'] = numpy.ones(1) return eids
def _get_site_model(fname, req_site_params): sm = hdf5.read_csv(fname, site.site_param_dt).array sm['lon'] = numpy.round(sm['lon'], 5) sm['lat'] = numpy.round(sm['lat'], 5) dupl = general.get_duplicates(sm, 'lon', 'lat') if dupl: raise InvalidFile('Found duplicate sites %s in %s' % (dupl, fname)) z = numpy.zeros(len(sm), sorted(sm.dtype.descr)) for name in z.dtype.names: z[name] = sm[name] return z
def test_case_26_land(self): # cali landslide simplified self.run_calc(case_26.__file__, 'job_land.ini') df = self.calc.datastore.read_df('gmf_data', 'sid') pd_mean = df[df.prob_disp > 0].prob_disp.mean() nd_mean = df[df.newmark_disp > 0].newmark_disp.mean() self.assertGreater(pd_mean, 0) self.assertGreater(nd_mean, 0) [fname, _, _] = export(('gmf_data', 'csv'), self.calc.datastore) arr = read_csv(fname)[:2] self.assertEqual(arr.dtype.names, ('site_id', 'event_id', 'gmv_PGA'))
def compute_mean(fname, *keys): keys = [k.lower() for k in keys] aw = hdf5.read_csv(fname, {'imt': str, 'poe': str, None: float}) dframe = aw.to_dframe() out = [] poecols = [col for col in dframe.columns if POECOL.match(col)] for key, df in dframe.groupby(keys): poes = [df[col].to_numpy() for col in poecols] [avg] = numpy.average(poes, weights=aw.weights, axis=0) out.append((key, avg)) return out
def read_df(cls, csvfname): """ :param csvfname: CSV file name :returns: a pandas DataFrame """ df = hdf5.read_csv(csvfname, { 'ampcode': ampcode_dt, None: float }, index='ampcode') df.fname = csvfname return df
def get_crmodel(oqparam): """ Return a :class:`openquake.risklib.riskinput.CompositeRiskModel` instance :param oqparam: an :class:`openquake.commonlib.oqvalidation.OqParam` instance """ risklist = get_risk_functions(oqparam) if not oqparam.limit_states and risklist.limit_states: oqparam.limit_states = risklist.limit_states elif 'damage' in oqparam.calculation_mode and risklist.limit_states: assert oqparam.limit_states == risklist.limit_states loss_types = oqparam.loss_dt().names consdict = {} if 'consequence' in oqparam.inputs: # build consdict of the form consequence_by_tagname -> tag -> array for by, fnames in oqparam.inputs['consequence'].items(): if isinstance(fnames, str): # single file fnames = [fnames] dtypedict = { by: str, 'consequence': str, 'loss_type': str, None: float } # i.e. files collapsed.csv, fatalities.csv, ... with headers # taxonomy,consequence,loss_type,slight,moderate,extensive arrays = [] for fname in fnames: arr = hdf5.read_csv(fname, dtypedict).array arrays.append(arr) for no, row in enumerate(arr, 2): if row['loss_type'] not in loss_types: msg = '%s: %s is not a recognized loss type, line=%d' raise InvalidFile(msg % (fname, row['loss_type'], no)) array = numpy.concatenate(arrays) dic = group_array(array, 'consequence') for consequence, group in dic.items(): if consequence not in scientific.KNOWN_CONSEQUENCES: raise InvalidFile('Unknown consequence %s in %s' % (consequence, fnames)) bytag = { tag: _cons_coeffs(grp, loss_types, risklist.limit_states) for tag, grp in group_array(group, by).items() } consdict['%s_by_%s' % (consequence, by)] = bytag # for instance consdict['collapsed_by_taxonomy']['W_LFM-DUM_H3'] # is [(0.05,), (0.2 ,), (0.6 ,), (1. ,)] for damage state and structural crm = riskmodels.CompositeRiskModel(oqparam, risklist, consdict) return crm
def get_amplification(oqparam): """ :returns: a composite array (amplification, param, imt0, imt1, ...) """ fname = oqparam.inputs['amplification'] aw = hdf5.read_csv(fname, {'ampcode': 'S2', 'level': U8, None: F64}) levels = numpy.arange(len(aw.imls)) for records in group_array(aw, 'ampcode').values(): if (records['level'] != levels).any(): raise InvalidFile('%s: levels for %s %s instead of %s' % (fname, records['ampcode'][0], records['level'], levels)) return aw
def import_gmfs(dstore, fname, sids): """ Import in the datastore a ground motion field CSV file. :param dstore: the datastore :param fname: the CSV file :param sids: the site IDs (complete) :returns: event_ids, num_rlzs """ array = hdf5.read_csv(fname, {'sid': U32, 'eid': U64, None: F32}).array names = array.dtype.names if names[0] == 'rlzi': # backward compatbility names = names[1:] imts = [name[4:] for name in names[2:]] gmf_data_dt = dstore['oqparam'].gmf_data_dt() arr = numpy.zeros(len(array), gmf_data_dt) col = 0 for name in names: if name.startswith('gmv_'): arr['gmv'][:, col] = array[name] col += 1 else: arr[name] = array[name] # store the events eids = numpy.unique(array['eid']) eids.sort() E = len(eids) eid2idx = dict(zip(eids, range(E))) events = numpy.zeros(E, rupture.events_dt) events['id'] = eids dstore['events'] = events # store the GMFs dic = general.group_array(arr, 'sid') lst = [] offset = 0 for sid in sids: n = len(dic.get(sid, [])) lst.append((offset, offset + n)) if n: offset += n gmvs = dic[sid] gmvs['eid'] = get_idxs(gmvs, eid2idx) dstore.extend('gmf_data/data', gmvs) dstore['gmf_data/indices'] = numpy.array(lst, U32) dstore['gmf_data/imts'] = ' '.join(imts) sig_eps = numpy.zeros(len(eids), getters.sig_eps_dt(imts)) sig_eps['eid'] = eids dstore['gmf_data/sigma_epsilon'] = sig_eps dstore['weights'] = numpy.ones(1) return eids
def test_case_7a(self): # case with preimported exposure self.run_calc(case_7a.__file__, 'job_h.ini') self.run_calc(case_7a.__file__, 'job_r.ini', hazard_calculation_id=str(self.calc.datastore.calc_id)) [fname] = export(('agg_loss_table', 'csv'), self.calc.datastore) self.assertEqualFiles('expected/agg_losses.csv', fname, delta=1E-4) rup_ids = set(read_csv(fname, {None: '<S50'})['rup_id']) [fname] = export(('agg_curves-rlzs', 'csv'), self.calc.datastore) self.assertEqualFiles('expected/agg_curves.csv', fname, delta=1E-4) # check that the IDs in agg_loss_table.csv exist in ruptures.csv # this is using extract/rupture_info internally [fname] = export(('ruptures', 'csv'), self.calc.datastore) rupids = set(read_csv(fname, {None: '<S50'})['rup_id']) self.assertTrue(rup_ids <= rupids, 'There are non-existing rupture IDs' ' in the event loss table!') # check that the exported ruptures can be re-imported text = extract(self.calc.datastore, 'ruptures').array rups = readinput.get_ruptures(gettemp(text)) aac(rups['n_occ'], [1, 1, 1, 1])
def get_ruptures(fname_csv): """ Read ruptures in CSV format and return an ArrayWrapper. :param fname_csv: path to the CSV file """ if not rupture.BaseRupture._code: rupture.BaseRupture.init() # initialize rupture codes code = rupture.BaseRupture.str2code aw = hdf5.read_csv(fname_csv, rupture.rupture_dt) rups = [] geoms = [] n_occ = 1 for u, row in enumerate(aw.array): hypo = row['lon'], row['lat'], row['dep'] dic = json.loads(row['extra']) meshes = F32(json.loads(row['mesh'])) # num_surfaces 3D arrays num_surfaces = len(meshes) shapes = [] points = [] minlons = [] maxlons = [] minlats = [] maxlats = [] for mesh in meshes: shapes.extend(mesh.shape[1:]) points.extend(mesh.flatten()) # lons + lats + deps minlons.append(mesh[0].min()) minlats.append(mesh[1].min()) maxlons.append(mesh[0].max()) maxlats.append(mesh[1].max()) rec = numpy.zeros(1, rupture_dt)[0] rec['seed'] = row['seed'] rec['minlon'] = minlon = min(minlons) rec['minlat'] = minlat = min(minlats) rec['maxlon'] = maxlon = max(maxlons) rec['maxlat'] = maxlat = max(maxlats) rec['mag'] = row['mag'] rec['hypo'] = hypo rate = dic.get('occurrence_rate', numpy.nan) tup = (u, row['seed'], 'no-source', aw.trts.index(row['trt']), code[row['kind']], n_occ, row['mag'], row['rake'], rate, minlon, minlat, maxlon, maxlat, hypo, u, 0) rups.append(tup) geoms.append(numpy.concatenate([[num_surfaces], shapes, points])) if not rups: return () dic = dict(geom=numpy.array(geoms, object)) # NB: PMFs for nonparametric ruptures are missing return hdf5.ArrayWrapper(numpy.array(rups, rupture_dt), dic)
def import_gmfs(dstore, fname, sids): """ Import in the datastore a ground motion field CSV file. :param dstore: the datastore :param fname: the CSV file :param sids: the site IDs (complete) :returns: event_ids, num_rlzs """ array = hdf5.read_csv(fname, {'sid': U32, 'eid': U64, None: F32}).array imts = [name[4:] for name in array.dtype.names[2:]] n_imts = len(imts) gmf_data_dt = numpy.dtype( [('rlzi', U16), ('sid', U32), ('eid', U64), ('gmv', (F32, (n_imts,)))]) arr = numpy.zeros(len(array), gmf_data_dt) col = 0 for name in array.dtype.names: if name.startswith('gmv_'): arr['gmv'][:, col] = array[name] col += 1 else: arr[name] = array[name] # store the events eids = numpy.unique(array['eid']) eids.sort() E = len(eids) eid2idx = dict(zip(eids, range(E))) events = numpy.zeros(E, rupture.events_dt) events['id'] = eids dstore['events'] = events # store the GMFs dic = general.group_array(arr, 'sid') lst = [] offset = 0 for sid in sids: n = len(dic.get(sid, [])) lst.append((offset, offset + n)) if n: offset += n gmvs = dic[sid] gmvs['eid'] = get_idxs(gmvs, eid2idx) dstore.extend('gmf_data/data', gmvs) dstore['gmf_data/indices'] = numpy.array(lst, U32) dstore['gmf_data/imts'] = ' '.join(imts) sig_eps = numpy.zeros(len(eids), getters.sig_eps_dt(imts)) sig_eps['eid'] = eids dstore['gmf_data/sigma_epsilon'] = sig_eps dstore['weights'] = numpy.ones(1) return eids
def from_csv(cls, fname): """ :param fname: path to a CSV file with header (lon, lat, dep) and 4 x P rows describing planes in terms of corner points in the order topleft, topright, bottomright, bottomleft :returns: a MultiSurface made of P planar surfaces """ surfaces = [] array = read_csv(fname).array.reshape(4, -1) # shape (4, P) for plane in array.T: arr = plane.view((float, 3)) # shape (4, 3) surfaces.append(PlanarSurface.from_ucerf(arr)) return cls(surfaces)
def test_case_9(self): # case with noDamageLimit==0 that had NaNs in the past self.run_calc(case_9.__file__, 'job.ini') # export/import dmg_by_event and check the total nodamage [fname] = export(('dmg_by_event', 'csv'), self.calc.datastore) df = read_csv(fname, index='event_id') nodamage = df[df['rlz_id'] == 0]['structural~no_damage'].sum() self.assertEqual(nodamage, 1068763.0) [fname] = export(('avg_damages-stats', 'csv'), self.calc.datastore) self.assertEqualFiles('expected/avg_damages.csv', fname) [fname] = export(('avg_losses-stats', 'csv'), self.calc.datastore) self.assertEqualFiles('expected/losses_asset.csv', fname)
def _read_csv(self): """ :yields: asset nodes """ expected_header = set(self._csv_header('', '')) floatfields = set() strfields = self.tagcol.tagnames + self.occupancy_periods.split() for fname in self.datafiles: with open(fname, encoding='utf-8-sig') as f: fields = next(csv.reader(f)) header = set(self.fieldmap.get(f, f) for f in fields) for field in fields: if field not in strfields: floatfields.add(field) missing = expected_header - header - {'exposure', 'country'} if len(header) < len(fields): raise InvalidFile( '%s: The header %s contains a duplicated field' % (fname, header)) elif missing: raise InvalidFile('%s: missing %s' % (fname, missing)) conv = { 'lon': float, 'lat': float, 'number': float, 'area': float, 'retrofitted': float, None: object } revmap = {} # oq -> inp for inp, oq in self.fieldmap.items(): revmap[oq] = inp if oq in conv: conv[inp] = conv[oq] rename = self.fieldmap.copy() for field in self.cost_types['name']: f = revmap.get(field, field) conv[f] = float rename[f] = 'value-' + field for field in self.occupancy_periods.split(): f = revmap.get(field, field) conv[f] = float rename[f] = 'occupants_' + field for fname in self.datafiles: array = hdf5.read_csv(fname, conv, rename).array array['lon'] = numpy.round(array['lon'], 5) array['lat'] = numpy.round(array['lat'], 5) yield from array
def get_site_model(oqparam): """ Convert the NRML file into an array of site parameters. :param oqparam: an :class:`openquake.commonlib.oqvalidation.OqParam` instance :returns: an array with fields lon, lat, vs30, ... """ req_site_params = get_gsim_lt(oqparam).req_site_params arrays = [] for fname in oqparam.inputs['site_model']: if isinstance(fname, str) and fname.endswith('.csv'): sm = hdf5.read_csv( fname, {None: float, 'vs30measured': bool}).array if 'site_id' in sm.dtype.names: raise InvalidFile('%s: you passed a sites.csv file instead of ' 'a site_model.csv file!' % fname) z = numpy.zeros(len(sm), sorted(sm.dtype.descr)) for name in z.dtype.names: # reorder the fields z[name] = sm[name] arrays.append(z) continue nodes = nrml.read(fname).siteModel params = [valid.site_param(node.attrib) for node in nodes] missing = req_site_params - set(params[0]) if 'vs30measured' in missing: # use a default of False missing -= {'vs30measured'} for param in params: param['vs30measured'] = False if 'backarc' in missing: # use a default of False missing -= {'backarc'} for param in params: param['backarc'] = False if missing: raise InvalidFile('%s: missing parameter %s' % (oqparam.inputs['site_model'], ', '.join(missing))) # NB: the sorted in sorted(params[0]) is essential, otherwise there is # an heisenbug in scenario/test_case_4 site_model_dt = numpy.dtype([(p, site.site_param_dt[p]) for p in sorted(params[0])]) sm = numpy.array([tuple(param[name] for name in site_model_dt.names) for param in params], site_model_dt) arrays.append(sm) return numpy.concatenate(arrays)
def test_gmf_with_uncertainty(self): fname = gettemp(gmf_ampl_func) aw = read_csv(fname, {'ampcode': ampcode_dt, None: numpy.float64}) imtls = {'PGA': self.imls} a = Amplifier(imtls, aw, self.soil_levels) res = [] nsim = 10000 numpy.random.seed(42) # must be fixed for i in range(nsim): gmvs = a._amplify_gmvs(b'A', numpy.array([.1, .2, .3]), 'PGA') res.append(list(gmvs)) res = numpy.array(res) dat = numpy.reshape(numpy.tile([.1, .2, .3], nsim), (nsim, 3)) computed = numpy.std(numpy.log(res / dat), axis=0) expected = numpy.array([0.3, 0.3, 0.3]) msg = "Computed and expected std do not match" numpy.testing.assert_almost_equal(computed, expected, 2, err_msg=msg)
def test01(self): fname = gettemp(ampl_func) df = read_csv(fname, { 'ampcode': ampcode_dt, None: numpy.float64 }, index='ampcode') af = AmplFunction.from_dframe(df) truncation_level = 3 sitecode = b'A' imls_soil = numpy.log([0.012, 0.052, 0.12, 0.22, 0.52]) imls_soil = numpy.log(numpy.logspace(-2, 0, num=20)) imtls_soil = DictArray({'PGA': imls_soil, 'SA(1.0)': imls_soil}) # The output in this case will be (1, x, 2) i.e. 1 site, number # intensity measure levels times 2 and 2 GMMs tmp = _get_poes(self.meastd, imtls_soil, truncation_level) # This function is rather slow at the moment res = _get_poes_site(self.meastd, imtls_soil, truncation_level, af, self.mag, sitecode, self.rrup[0], squeeze=False) if False: import matplotlib.pyplot as plt plt.plot(numpy.exp(imls_soil), res[0, 0:len(imls_soil), 0], '-o', label='soil') plt.plot(numpy.exp(imls_soil), tmp[0, 0:len(imls_soil), 0], '-o', label='rock') plt.legend() plt.xscale('log') plt.yscale('log') plt.grid(which='both') plt.show()
def get_amplification(oqparam): """ :returns: a composite array (amplification, param, imt0, imt1, ...) """ fname = oqparam.inputs['amplification'] aw = hdf5.read_csv(fname, {'ampcode': site.ampcode_dt, None: F64}) imls = () if 'level' in aw.dtype.names: for records in group_array(aw, 'ampcode').values(): if len(imls) == 0: imls = numpy.sort(records['level']) elif len(records['level']) != len(imls) or ( records['level'] != imls).any(): raise InvalidFile('%s: levels for %s %s instead of %s' % (fname, records['ampcode'][0], records['level'], imls)) return aw
def test01(self): fname = gettemp(ampl_func) df = read_csv(fname, { 'ampcode': ampcode_dt, None: numpy.float64 }, index='ampcode') sitecode = b'A' imls_soil = numpy.log([0.012, 0.052, 0.12, 0.22, 0.52]) imls_soil = numpy.log(numpy.logspace(-2, 0, num=20)) self.cmaker.loglevels = ll = DictArray({ 'PGA': imls_soil, 'SA(1.0)': imls_soil }) self.cmaker.af = AmplFunction.from_dframe(df) self.cmaker.trunclevel = tl = 3 # The output in this case will be (1, x, 2) i.e. 1 site, number # intensity measure levels times 2 and 2 GMMs tmp = _get_poes(self.meastd, ll, tl) # This function is rather slow at the moment ctx = unittest.mock.Mock(mag=self.mag, rrup=self.rrup, sids=[0], sites=dict(ampcode=[sitecode])) res = get_poes_site(self.meastd, self.cmaker, ctx) if False: import matplotlib.pyplot as plt plt.plot(numpy.exp(imls_soil), res[0, 0:len(imls_soil), 0], '-o', label='soil') plt.plot(numpy.exp(imls_soil), tmp[0, 0:len(imls_soil), 0], '-o', label='rock') plt.legend() plt.xscale('log') plt.yscale('log') plt.grid(which='both') plt.show()
def test_gmf_cata(self): fname = gettemp(cata_ampl_func) df = read_csv(fname, {'ampcode': ampcode_dt, None: numpy.float64}, index='ampcode') imtls = DictArray({'PGA': [numpy.nan]}) a = Amplifier(imtls, df) numpy.random.seed(42) # must be fixed gmvs1 = a._amplify_gmvs(b'z1', numpy.array([.1, .2, .3]), 'PGA') aac(gmvs1, [0.217124, 0.399295, 0.602515], atol=1E-5) gmvs2 = a._amplify_gmvs(b'z2', numpy.array([.1, .2, .3]), 'PGA') aac(gmvs2, [0.266652, 0.334187, 0.510845], atol=1E-5) numpy.random.seed(43) # changing the seed the results change a lot gmvs1 = a._amplify_gmvs(b'z1', numpy.array([.1, .2, .3]), 'PGA') aac(gmvs1, [0.197304, 0.293422, 0.399669], atol=1E-5) gmvs2 = a._amplify_gmvs(b'z2', numpy.array([.1, .2, .3]), 'PGA') aac(gmvs2, [0.117069, 0.517284, 0.475571], atol=1E-5)
def test(self): inputdir = os.path.dirname(case_16.__file__) output = gettemp(suffix='.csv') grid_spacing = 50 exposure_xml = os.path.join(inputdir, 'exposure.xml') vs30_csv = os.path.join(inputdir, 'vs30.csv') sitecol = prepare_site_model([exposure_xml], [], [vs30_csv], True, True, True, grid_spacing, 5, output) sm = read_csv(output, {None: float, 'vs30measured': numpy.uint8}) self.assertEqual(sm['vs30measured'].sum(), 0) self.assertEqual(len(sitecol), 84) # 84 non-empty grid points self.assertEqual(len(sitecol), len(sm)) # test no grid sc = prepare_site_model([exposure_xml], [], [vs30_csv], True, True, False, 0, 5, output) self.assertEqual(len(sc), 148) # 148 sites within 5 km from the params # test sites_csv == vs30_csv sc = prepare_site_model([], [vs30_csv], [vs30_csv], True, True, False, 0, 5, output)
def load_insurance_data(self, ins_types, ins_files): """ Read the insurance files and populate the policy_dict """ for loss_type, fname in zip(ins_types, ins_files): array = hdf5.read_csv( fname, {'insurance_limit': float, 'deductible': float, None: object}).array policy_name = array.dtype.names[0] policy_idx = getattr(self.assetcol.tagcol, policy_name + '_idx') insurance = numpy.zeros((len(policy_idx), 2)) for pol, ded, lim in array[ [policy_name, 'deductible', 'insurance_limit']]: insurance[policy_idx[pol]] = ded, lim self.policy_dict[loss_type] = insurance if self.policy_name and policy_name != self.policy_name: raise ValueError( 'The file %s contains %s as policy field, but we were ' 'expecting %s' % (fname, policy_name, self.policy_name)) else: self.policy_name = policy_name
def get_crmodel(oqparam): """ Return a :class:`openquake.risklib.riskinput.CompositeRiskModel` instance :param oqparam: an :class:`openquake.commonlib.oqvalidation.OqParam` instance """ risklist = get_risk_functions(oqparam) consdict = {} if 'consequence' in oqparam.inputs: # build consdict of the form cname_by_tagname -> tag -> array for by, fname in oqparam.inputs['consequence'].items(): dtypedict = { by: str, 'cname': str, 'loss_type': str, None: float} dic = group_array( hdf5.read_csv(fname, dtypedict).array, 'cname') for cname, group in dic.items(): bytag = {tag: _cons_coeffs(grp, risklist.limit_states) for tag, grp in group_array(group, by).items()} consdict['%s_by_%s' % (cname, by)] = bytag crm = riskmodels.CompositeRiskModel(oqparam, risklist, consdict) return crm
def get_rupture(oqparam): """ Read the `rupture_model` file and by filter the site collection :param oqparam: an :class:`openquake.commonlib.oqvalidation.OqParam` instance :returns: an hazardlib rupture """ rup_model = oqparam.inputs['rupture_model'] if rup_model.endswith('.csv'): return rupture.from_array(hdf5.read_csv(rup_model)) if rup_model.endswith('.xml'): [rup_node] = nrml.read(rup_model) conv = sourceconverter.RuptureConverter( oqparam.rupture_mesh_spacing, oqparam.complex_fault_mesh_spacing) rup = conv.convert_node(rup_node) else: raise ValueError('Unrecognized ruptures model %s' % rup_model) rup.tectonic_region_type = '*' # there is not TRT for scenario ruptures rup.rup_id = oqparam.ses_seed return rup
def test(self): inputdir = os.path.dirname(case_16.__file__) output = gettemp(suffix='.csv') grid_spacing = 50 exposure_xml = os.path.join(inputdir, 'exposure.xml') vs30_csv = os.path.join(inputdir, 'vs30.csv') sitecol = prepare_site_model( [exposure_xml], [], [vs30_csv], True, True, True, grid_spacing, 5, output) sm = read_csv(output, {None: float, 'vs30measured': bool}) self.assertEqual(sm['vs30measured'].sum(), 0) self.assertEqual(len(sitecol), 84) # 84 non-empty grid points self.assertEqual(len(sitecol), len(sm)) # test no grid sc = prepare_site_model([exposure_xml], [], [vs30_csv], True, True, False, 0, 5, output) self.assertEqual(len(sc), 148) # 148 sites within 5 km from the params # test sites_csv sc = prepare_site_model([], [output], [vs30_csv], True, True, False, 0, 5, output)
def _read_csv(self): """ :yields: asset nodes """ expected_header = set(self._csv_header('', '')) for fname in self.datafiles: with open(fname, encoding='utf-8-sig') as f: fields = next(csv.reader(f)) header = set(fields) missing = expected_header - header - {'exposure', 'country'} if len(header) < len(fields): raise InvalidFile( '%s: The header %s contains a duplicated field' % (fname, header)) elif missing: msg = ('Unexpected header in %s\nExpected: %s\nGot: %s\n' 'Missing: %s') raise InvalidFile(msg % (fname, sorted(expected_header), sorted(header), missing)) conv = { 'lon': float, 'lat': float, 'number': float, 'area': float, 'retrofitted': float, None: object } rename = {} for field in self.cost_types['name']: conv[field] = float rename[field] = 'value-' + field for field in self.occupancy_periods.split(): conv[field] = float rename[field] = 'occupants_' + field for fname in self.datafiles: array = hdf5.read_csv(fname, conv, rename).array array['lon'] = numpy.round(array['lon'], 5) array['lat'] = numpy.round(array['lat'], 5) yield from array
def get_ruptures(fname_csv): """ Read ruptures in CSV format and return an ArrayWrapper """ if not rupture.BaseRupture._code: rupture.BaseRupture.init() # initialize rupture codes code = rupture.BaseRupture.str2code aw = hdf5.read_csv(fname_csv, rupture.rupture_dt) trts = aw.trts rups = [] geoms = [] n_occ = 1 for u, row in enumerate(aw.array): hypo = row['lon'], row['lat'], row['dep'] dic = json.loads(row['extra']) mesh = F32(json.loads(row['mesh'])) s1, s2 = mesh.shape[1:] rec = numpy.zeros(1, rupture_dt)[0] rec['seed'] = row['seed'] rec['minlon'] = minlon = mesh[0].min() rec['minlat'] = minlat = mesh[1].min() rec['maxlon'] = maxlon = mesh[0].max() rec['maxlat'] = maxlat = mesh[1].max() rec['mag'] = row['mag'] rec['hypo'] = hypo rate = dic.get('occurrence_rate', numpy.nan) tup = (u, row['seed'], 'no-source', trts.index(row['trt']), code[row['kind']], n_occ, row['mag'], row['rake'], rate, minlon, minlat, maxlon, maxlat, hypo, u, 0, 0) rups.append(tup) points = mesh.flatten() # lons + lats + deps # FIXME: extend to MultiSurfaces geoms.append(numpy.concatenate([[1], [s1, s2], points])) if not rups: return () dic = dict(geom=numpy.array(geoms, object)) # NB: PMFs for nonparametric ruptures are missing return hdf5.ArrayWrapper(numpy.array(rups, rupture_dt), dic)
def test_resampling(self): path = os.path.dirname(os.path.abspath(__file__)) # Read AF f_af = os.path.join(path, 'data', 'convolution', 'amplification.csv') df_af = read_csv(f_af, { 'ampcode': ampcode_dt, None: numpy.float64 }, index='ampcode') # Read hc f_hc = os.path.join(path, 'data', 'convolution', 'hazard_curve.csv') df_hc = pd.read_csv(f_hc, skiprows=1) # Get imls from the hc imls = [] pattern = 'poe-(\\d*\\.\\d*)' for k in df_hc.columns: m = re.match(pattern, k) if m: imls.append(float(m.group(1))) imtls = DictArray({'PGA': imls}) # Create a list with one ProbabilityCurve instance poes = numpy.squeeze(df_hc.iloc[0, 3:].to_numpy()) tmp = numpy.expand_dims(poes, 1) pcurves = [ProbabilityCurve(tmp)] soil_levels = numpy.array(list(numpy.geomspace(0.001, 2, 50))) a = Amplifier(imtls, df_af, soil_levels) res = a.amplify(b'MQ15', pcurves) tmp = 'hazard_curve_expected.csv' fname_expected = os.path.join(path, 'data', 'convolution', tmp) expected = numpy.loadtxt(fname_expected) numpy.testing.assert_allclose(numpy.squeeze(res[0].array), expected)