def test_case_7(self): # 2 models x 3 GMPEs, 1000 samples * 10 SES expected = [ 'hazard_curve-mean.csv', ] out = self.run_calc(case_7.__file__, 'job.ini', exports='csv') aw = extract(self.calc.datastore, 'realizations') dic = countby(aw.array, 'branch_path') self.assertEqual( { b'b11~BA': 332, # w = .6 * .5 = .30 b'b11~CB': 169, # w = .6 * .3 = .18 b'b11~CY': 108, # w = .6 * .2 = .12 b'b12~BA': 193, # w = .4 * .5 = .20 b'b12~CB': 115, # w = .4 * .3 = .12 b'b12~CY': 83 }, # w = .4 * .2 = .08 dic) fnames = out['hcurves', 'csv'] mean_eb = get_mean_curves(self.calc.datastore) for exp, got in zip(expected, fnames): self.assertEqualFiles('expected/%s' % exp, got) mean_cl = get_mean_curves(self.calc.cl.datastore) reldiff, _index = max_rel_diff_index(mean_cl, mean_eb, min_value=0.1) self.assertLess(reldiff, 0.07)
def get_site_model(oqparam): """ Convert the NRML file into an array of site parameters. :param oqparam: an :class:`openquake.commonlib.oqvalidation.OqParam` instance :returns: an array with fields lon, lat, vs30, ... """ req_site_params = get_gsim_lt(oqparam).req_site_params arrays = [] for fname in oqparam.inputs['site_model']: if isinstance(fname, str) and fname.endswith('.csv'): sm = hdf5.read_csv(fname, { None: float, 'vs30measured': numpy.uint8 }).array if 'site_id' in sm.dtype.names: raise InvalidFile('%s: you passed a sites.csv file instead of ' 'a site_model.csv file!' % fname) z = numpy.zeros(len(sm), sorted(sm.dtype.descr)) for name in z.dtype.names: # reorder the fields z[name] = sm[name] arrays.append(z) continue nodes = nrml.read(fname).siteModel params = [valid.site_param(node.attrib) for node in nodes] missing = req_site_params - set(params[0]) if 'vs30measured' in missing: # use a default of False missing -= {'vs30measured'} for param in params: param['vs30measured'] = False if 'backarc' in missing: # use a default of False missing -= {'backarc'} for param in params: param['backarc'] = False if missing: raise InvalidFile( '%s: missing parameter %s' % (oqparam.inputs['site_model'], ', '.join(missing))) # NB: the sorted in sorted(params[0]) is essential, otherwise there is # an heisenbug in scenario/test_case_4 site_model_dt = numpy.dtype([(p, site.site_param_dt[p]) for p in sorted(params[0])]) sm = numpy.array([ tuple(param[name] for name in site_model_dt.names) for param in params ], site_model_dt) dupl = "\n".join('%s %s' % loc for loc, n in countby(sm, 'lon', 'lat').items() if n > 1) if dupl: raise InvalidFile('There are duplicated sites in %s:\n%s' % (fname, dupl)) arrays.append(sm) return numpy.concatenate(arrays)
def view_ruptures_events(token, dstore): num_ruptures = len(dstore['ruptures']) num_events = len(dstore['events']) events_by_rlz = countby(dstore['events'][()], 'rlz') mult = round(num_events / num_ruptures, 3) lst = [('Total number of ruptures', num_ruptures), ('Total number of events', num_events), ('Rupture multiplicity', mult), ('Events by rlz', events_by_rlz.values())] return rst_table(lst)
def view_ruptures_events(token, dstore): num_ruptures = len(dstore['ruptures']) num_events = len(dstore['events']) events_by_rlz = countby(dstore['events'][()], 'rlz_id') mult = round(num_events / num_ruptures, 3) lst = [('Total number of ruptures', num_ruptures), ('Total number of events', num_events), ('Rupture multiplicity', mult), ('Events by rlz', events_by_rlz.values())] return text_table(lst)
def view_act_ruptures_by_src(token, dstore): """ Display the actual number of ruptures by source in event based calculations """ data = dstore['ruptures'][('source_id', 'grp_id', 'rup_id')] counts = sorted(countby(data, 'source_id').items(), key=operator.itemgetter(1), reverse=True) table = [['src_id', 'grp_id', 'act_ruptures']] for source_id, act_ruptures in counts: table.append([source_id, src['grp_id'], act_ruptures]) return rst_table(table)
def view_act_ruptures_by_src(token, dstore): """ Display the actual number of ruptures by source in event based calculations """ data = dstore['ruptures'][('srcidx', 'serial')] counts = sorted(countby(data, 'srcidx').items(), key=operator.itemgetter(1), reverse=True) src_info = dstore['source_info'][('grp_id', 'source_id')] table = [['src_id', 'grp_id', 'act_ruptures']] for srcidx, act_ruptures in counts: src = src_info[srcidx] table.append([src['source_id'], src['grp_id'], act_ruptures]) return rst_table(table)
def get_loss_builder(dstore, return_periods=None, loss_dt=None): """ :param dstore: datastore for an event based risk calculation :returns: a LossesByPeriodBuilder instance """ oq = dstore['oqparam'] weights = dstore['csm_info'].rlzs['weight'] eff_time = oq.investigation_time * oq.ses_per_logic_tree_path num_events = countby(dstore['events'].value, 'rlz') periods = return_periods or oq.return_periods or scientific.return_periods( eff_time, max(num_events.values())) return scientific.LossesByPeriodBuilder( numpy.array(periods), loss_dt or oq.loss_dt(), weights, num_events, eff_time, oq.risk_investigation_time)
def view_act_ruptures_by_src(token, dstore): """ Display the actual number of ruptures by source in event based calculations """ data = dstore['ruptures'].value[['srcidx', 'serial']] counts = sorted(countby(data, 'srcidx').items(), key=operator.itemgetter(1), reverse=True) src_info = dstore['source_info'].value[['grp_id', 'source_id']] table = [['src_id', 'grp_id', 'act_ruptures']] for srcidx, act_ruptures in counts: src = src_info[srcidx] table.append([src['source_id'], src['grp_id'], act_ruptures]) return rst_table(table)
def get_loss_builder(dstore, return_periods=None, loss_dt=None): """ :param dstore: datastore for an event based risk calculation :returns: a LossCurvesMapsBuilder instance """ oq = dstore['oqparam'] weights = dstore['weights'][()] eff_time = oq.investigation_time * oq.ses_per_logic_tree_path num_events = countby(dstore['events'][()], 'rlz') periods = return_periods or oq.return_periods or scientific.return_periods( eff_time, max(num_events.values())) return scientific.LossCurvesMapsBuilder( oq.conditional_loss_poes, numpy.array(periods), loss_dt or oq.loss_dt(), weights, num_events, eff_time, oq.risk_investigation_time)
def get_loss_builder(dstore, return_periods=None, loss_dt=None): """ :param dstore: datastore for an event based risk calculation :returns: a LossCurvesMapsBuilder instance """ oq = dstore['oqparam'] weights = dstore['weights'][()] eff_time = oq.investigation_time * oq.ses_per_logic_tree_path num_events = general.countby(dstore['events'][()], 'rlz_id') periods = return_periods or oq.return_periods or scientific.return_periods( eff_time, max(num_events.values())) return scientific.LossCurvesMapsBuilder( oq.conditional_loss_poes, numpy.array(periods), loss_dt or oq.loss_dt(), weights, num_events, eff_time, oq.risk_investigation_time)
def test_supertask(self): # this test has 4 supertasks generating 4 + 5 + 3 + 5 = 17 subtasks # and 18 outputs (1 output does not produce a subtask) allargs = [('aaaaeeeeiii', ), ('uuuuaaaaeeeeiii', ), ('aaaaaaaaeeeeiii', ), ('aaaaeeeeiiiiiooooooo', )] numchars = sum(len(arg) for arg, in allargs) # 61 tmp = pathlib.Path(tempfile.mkdtemp(), 'calc_1.hdf5') with hdf5.File(tmp) as h5: monitor = performance.Monitor(hdf5=h5) res = parallel.Starmap(supertask, allargs, monitor).reduce() self.assertEqual(res, {'n': numchars}) # check that the correct information is stored in the hdf5 file with hdf5.File(tmp) as h5: num = general.countby(h5['performance_data'].value, 'operation') self.assertEqual(num[b'waiting'], 4) self.assertEqual(num[b'total supertask'], 18) # outputs self.assertEqual(num[b'total get_length'], 17) # subtasks self.assertGreater(len(h5['task_info/supertask']), 0) shutil.rmtree(tmp.parent)
def test_supertask(self): # this test has 4 supertasks generating 4 + 5 + 3 + 5 = 17 subtasks # and 5 real outputs (one from the yield {}) allargs = [('aaaaeeeeiii', ), ('uuuuaaaaeeeeiii', ), ('aaaaaaaaeeeeiii', ), ('aaaaeeeeiiiiiooooooo', )] numchars = sum(len(arg) for arg, in allargs) # 61 tmpdir = tempfile.mkdtemp() tmp = os.path.join(tmpdir, 'calc_1.hdf5') hdf5.File(tmp, 'w').close() # the file must exist smap = parallel.Starmap(supertask, allargs, hdf5path=tmp) res = smap.reduce() self.assertEqual(res, {'n': numchars}) # check that the correct information is stored in the hdf5 file with hdf5.File(tmp, 'r') as h5: num = general.countby(h5['performance_data'][()], 'operation') self.assertEqual(num[b'waiting'], 4) self.assertEqual(num[b'total supertask'], 5) # outputs self.assertEqual(num[b'total get_length'], 17) # subtasks self.assertGreater(len(h5['task_info']), 0) shutil.rmtree(tmpdir)
def test_supertask(self): # this test has 4 supertasks generating 4 + 5 + 3 + 5 = 17 subtasks # and 18 outputs (1 output does not produce a subtask) allargs = [('aaaaeeeeiii',), ('uuuuaaaaeeeeiii',), ('aaaaaaaaeeeeiii',), ('aaaaeeeeiiiiiooooooo',)] numchars = sum(len(arg) for arg, in allargs) # 61 tmp = pathlib.Path(tempfile.mkdtemp(), 'calc_1.hdf5') with hdf5.File(tmp) as h5: monitor = performance.Monitor(hdf5=h5) res = parallel.Starmap(supertask, allargs, monitor).reduce() self.assertEqual(res, {'n': numchars}) # check that the correct information is stored in the hdf5 file with hdf5.File(tmp) as h5: num = general.countby(h5['performance_data'].value, 'operation') self.assertEqual(num[b'waiting'], 4) self.assertEqual(num[b'total supertask'], 18) # outputs self.assertEqual(num[b'total get_length'], 17) # subtasks self.assertGreater(len(h5['task_info/supertask']), 0) shutil.rmtree(tmp.parent)
def test_supertask(self): # this test has 4 supertasks generating 4 + 5 + 3 + 5 = 17 subtasks allargs = [('aaaaeeeeiii', ), ('uuuuaaaaeeeeiii', ), ('aaaaaaaaeeeeiii', ), ('aaaaeeeeiiiiiooooooo', )] numchars = sum(len(arg) for arg, in allargs) # 61 tmpdir = tempfile.mkdtemp() tmp = os.path.join(tmpdir, 'calc_1.hdf5') performance.init_performance(tmp, swmr=True) smap = parallel.Starmap(supertask, allargs, h5=hdf5.File(tmp, 'a')) res = smap.reduce() smap.h5.close() self.assertEqual(res, {'n': numchars}) # check that the correct information is stored in the hdf5 file with hdf5.File(tmp, 'r') as h5: num = general.countby(h5['performance_data'][()], 'operation') self.assertEqual(num[b'waiting'], 4) self.assertEqual(num[b'total supertask'], 4) # tasks self.assertEqual(num[b'total get_length'], 17) # subtasks info = h5['task_info'][()] dic = dict(general.fast_agg3(info, 'taskname', ['received'])) self.assertGreater(dic[b'get_length'], 0) self.assertGreater(dic[b'supertask'], 0) shutil.rmtree(tmpdir)
def get_site_model(oqparam): """ Convert the NRML file into an array of site parameters. :param oqparam: an :class:`openquake.commonlib.oqvalidation.OqParam` instance :returns: an array with fields lon, lat, vs30, ... """ req_site_params = get_gsim_lt(oqparam).req_site_params if 'amplification' in oqparam.inputs: req_site_params.add('ampcode') arrays = [] for fname in oqparam.inputs['site_model']: if isinstance(fname, str) and fname.endswith('.csv'): sm = hdf5.read_csv(fname, site.site_param_dt).array sm['lon'] = numpy.round(sm['lon'], 5) sm['lat'] = numpy.round(sm['lat'], 5) dupl = get_duplicates(sm, 'lon', 'lat') if dupl: raise InvalidFile( 'Found duplicate sites %s in %s' % (dupl, fname)) if 'site_id' in sm.dtype.names: raise InvalidFile('%s: you passed a sites.csv file instead of ' 'a site_model.csv file!' % fname) params = sorted(set(sm.dtype.names) | req_site_params) z = numpy.zeros( len(sm), [(p, site.site_param_dt[p]) for p in params]) for name in z.dtype.names: try: z[name] = sm[name] except ValueError: # missing, use the global parameter # exercised in the test classical/case_28 value = getattr(oqparam, site.param[name]) if name == 'vs30measured': # special case value = value == 'measured' z[name] = value arrays.append(z) continue nodes = nrml.read(fname).siteModel params = [valid.site_param(node.attrib) for node in nodes] missing = req_site_params - set(params[0]) if 'vs30measured' in missing: # use a default of False missing -= {'vs30measured'} for param in params: param['vs30measured'] = False if 'backarc' in missing: # use a default of False missing -= {'backarc'} for param in params: param['backarc'] = False if 'ampcode' in missing: # use a default of b'' missing -= {'ampcode'} for param in params: param['ampcode'] = b'' if missing: raise InvalidFile('%s: missing parameter %s' % (oqparam.inputs['site_model'], ', '.join(missing))) # NB: the sorted in sorted(params[0]) is essential, otherwise there is # an heisenbug in scenario/test_case_4 site_model_dt = numpy.dtype([(p, site.site_param_dt[p]) for p in sorted(params[0])]) sm = numpy.array([tuple(param[name] for name in site_model_dt.names) for param in params], site_model_dt) dupl = "\n".join( '%s %s' % loc for loc, n in countby(sm, 'lon', 'lat').items() if n > 1) if dupl: raise InvalidFile('There are duplicated sites in %s:\n%s' % (fname, dupl)) arrays.append(sm) return numpy.concatenate(arrays)
def _read_risk_data(self): # read the exposure (if any), the risk model (if any) and then the # site collection, possibly extracted from the exposure. oq = self.oqparam self.load_crmodel() # must be called first if oq.hazard_calculation_id: with util.read(oq.hazard_calculation_id) as dstore: haz_sitecol = dstore['sitecol'].complete else: haz_sitecol = readinput.get_site_collection(oq) if hasattr(self, 'rup'): # for scenario we reduce the site collection to the sites # within the maximum distance from the rupture haz_sitecol, _dctx = self.cmaker.filter( haz_sitecol, self.rup) haz_sitecol.make_complete() if 'site_model' in oq.inputs: self.datastore['site_model'] = readinput.get_site_model(oq) oq_hazard = (self.datastore.parent['oqparam'] if self.datastore.parent else None) if 'exposure' in oq.inputs: exposure = self.read_exposure(haz_sitecol) self.datastore['assetcol'] = self.assetcol self.datastore['cost_calculator'] = exposure.cost_calculator if hasattr(readinput.exposure, 'exposures'): self.datastore['assetcol/exposures'] = ( numpy.array(exposure.exposures, hdf5.vstr)) elif 'assetcol' in self.datastore.parent: assetcol = self.datastore.parent['assetcol'] if oq.region: region = wkt.loads(oq.region) self.sitecol = haz_sitecol.within(region) if oq.shakemap_id or 'shakemap' in oq.inputs: self.sitecol, self.assetcol = self.read_shakemap( haz_sitecol, assetcol) self.datastore['assetcol'] = self.assetcol logging.info('Extracted %d/%d assets', len(self.assetcol), len(assetcol)) nsites = len(self.sitecol) if (oq.spatial_correlation != 'no' and nsites > MAXSITES): # hard-coded, heuristic raise ValueError(CORRELATION_MATRIX_TOO_LARGE % nsites) elif hasattr(self, 'sitecol') and general.not_equal( self.sitecol.sids, haz_sitecol.sids): self.assetcol = assetcol.reduce(self.sitecol) self.datastore['assetcol'] = self.assetcol logging.info('Extracted %d/%d assets', len(self.assetcol), len(assetcol)) else: self.assetcol = assetcol else: # no exposure self.sitecol = haz_sitecol if self.sitecol: logging.info('Read N=%d hazard sites and L=%d hazard levels', len(self.sitecol), len(oq.imtls.array)) if oq_hazard: parent = self.datastore.parent if 'assetcol' in parent: check_time_event(oq, parent['assetcol'].occupancy_periods) elif oq.job_type == 'risk' and 'exposure' not in oq.inputs: raise ValueError('Missing exposure both in hazard and risk!') if oq_hazard.time_event and oq_hazard.time_event != oq.time_event: raise ValueError( 'The risk configuration file has time_event=%s but the ' 'hazard was computed with time_event=%s' % ( oq.time_event, oq_hazard.time_event)) if oq.job_type == 'risk': tmap_arr, tmap_lst = logictree.taxonomy_mapping( self.oqparam.inputs.get('taxonomy_mapping'), self.assetcol.tagcol.taxonomy) self.crmodel.tmap = tmap_lst if len(tmap_arr): self.datastore['taxonomy_mapping'] = tmap_arr taxonomies = set(taxo for items in self.crmodel.tmap for taxo, weight in items if taxo != '?') # check that we are covering all the taxonomies in the exposure missing = taxonomies - set(self.crmodel.taxonomies) if self.crmodel and missing: raise RuntimeError('The exposure contains the taxonomies %s ' 'which are not in the risk model' % missing) if len(self.crmodel.taxonomies) > len(taxonomies): logging.info('Reducing risk model from %d to %d taxonomies', len(self.crmodel.taxonomies), len(taxonomies)) self.crmodel = self.crmodel.reduce(taxonomies) self.crmodel.tmap = tmap_lst self.crmodel.vectorize_cons_model(self.assetcol.tagcol) if hasattr(self, 'sitecol') and self.sitecol: if 'site_model' in oq.inputs: assoc_dist = (oq.region_grid_spacing * 1.414 if oq.region_grid_spacing else 5) # Graeme's 5km sm = readinput.get_site_model(oq) self.sitecol.complete.assoc(sm, assoc_dist) self.datastore['sitecol'] = self.sitecol.complete # used in the risk calculators self.param = dict(individual_curves=oq.individual_curves, avg_losses=oq.avg_losses) # compute exposure stats if hasattr(self, 'assetcol'): arr = self.assetcol.array num_assets = list(general.countby(arr, 'site_id').values()) self.datastore['assets_by_site'] = get_stats(num_assets) num_taxos = self.assetcol.num_taxonomies_by_site() self.datastore['taxonomies_by_site'] = get_stats(num_taxos) save_exposed_values( self.datastore, self.assetcol, oq.loss_names, oq.aggregate_by)