def test_double(self): fname = gettemp(double_ampl_func) aw = read_csv(fname, { 'ampcode': 'S2', 'level': numpy.uint8, None: numpy.float64 }) a = Amplifier(self.imtls, aw) poes = a.amplify_one(b'A', 'SA(0.1)', self.hcurve[1]).flatten() numpy.testing.assert_allclose(poes, [ 0.989, 0.989, 0.985, 0.98, 0.97, 0.94, 0.89, 0.79, 0.69, 0.09, 0.09 ], atol=1E-6) poes = a.amplify_one(b'A', 'SA(0.2)', self.hcurve[2]).flatten() numpy.testing.assert_allclose(poes, [ 0.989, 0.989, 0.985, 0.98, 0.97, 0.94, 0.89, 0.79, 0.69, 0.09, 0.09 ], atol=1E-6) poes = a.amplify_one(b'A', 'SA(0.5)', self.hcurve[3]).flatten() numpy.testing.assert_allclose(poes, [ 0.989, 0.989, 0.985, 0.98, 0.97, 0.94, 0.89, 0.79, 0.69, 0.09, 0.09 ], atol=1E-6)
def test_resampling(self): path = os.path.dirname(os.path.abspath(__file__)) # Read AF f_af = os.path.join(path, 'data', 'convolution', 'amplification.csv') df_af = read_csv(f_af, {'ampcode': ampcode_dt, None: numpy.float64}, index='ampcode') # Read hc f_hc = os.path.join(path, 'data', 'convolution', 'hazard_curve.csv') df_hc = pd.read_csv(f_hc, skiprows=1) # Get imls from the hc imls = [] pattern = 'poe-(\\d*\\.\\d*)' for k in df_hc.columns: m = re.match(pattern, k) if m: imls.append(float(m.group(1))) imtls = DictArray({'PGA': imls}) # Create a list with one ProbabilityCurve instance poes = numpy.squeeze(df_hc.iloc[0, 3:].to_numpy()) tmp = numpy.expand_dims(poes, 1) pcurve = ProbabilityCurve(tmp) soil_levels = numpy.array(list(numpy.geomspace(0.001, 2, 50))) a = Amplifier(imtls, df_af, soil_levels) res = a.amplify(b'MQ15', pcurve) tmp = 'hazard_curve_expected.csv' fname_expected = os.path.join(path, 'data', 'convolution', tmp) expected = numpy.loadtxt(fname_expected) numpy.testing.assert_allclose(numpy.squeeze(res.array), expected)
def test_simple(self): fname = gettemp(simple_ampl_func) aw = read_csv(fname, {'ampcode': ampcode_dt, None: numpy.float64}) a = Amplifier(self.imtls, aw, self.soil_levels) a.check(self.vs30, vs30_tolerance=1) poes = a.amplify_one(b'A', 'SA(0.1)', self.hcurve[1]).flatten() numpy.testing.assert_allclose( poes, [0.985002, 0.979997, 0.970004, 0.940069, 0.889961, 0.79, 0.690037], atol=1E-6) poes = a.amplify_one(b'A', 'SA(0.2)', self.hcurve[2]).flatten() numpy.testing.assert_allclose( poes, [0.985002, 0.979997, 0.970004, 0.940069, 0.889961, 0.79, 0.690037], atol=1E-6) poes = a.amplify_one(b'A', 'SA(0.5)', self.hcurve[3]).flatten() numpy.testing.assert_allclose( poes, [0.985002, 0.979996, 0.969991, 0.940012, 0.889958, 0.79, 0.690037], atol=1E-6) # amplify GMFs with sigmas numpy.random.seed(42) gmvs = a._amplify_gmvs(b'A', numpy.array([.005, .010, .015]), 'PGA') numpy.testing.assert_allclose(gmvs, [0.005307, 0.010093, 0.016804], atol=1E-5)
def test_dupl(self): fname = gettemp(dupl_ampl_func) df = read_csv(fname, {'ampcode': ampcode_dt, None: numpy.float64}, index='ampcode') with self.assertRaises(ValueError) as ctx: Amplifier(self.imtls, df, self.soil_levels) self.assertEqual(str(ctx.exception), "Found duplicates for b'A'")
def test_gmf_with_uncertainty(self): fname = gettemp(gmf_ampl_func) aw = read_csv(fname, {'ampcode': ampcode_dt, None: numpy.float64}) imtls = {'PGA': self.imls} a = Amplifier(imtls, aw, self.soil_levels) res = [] nsim = 10000 numpy.random.seed(42) # must be fixed for i in range(nsim): gmvs = a._amplify_gmvs(b'A', numpy.array([.1, .2, .3]), 'PGA') res.append(list(gmvs)) res = numpy.array(res) dat = numpy.reshape(numpy.tile([.1, .2, .3], nsim), (nsim, 3)) computed = numpy.std(numpy.log(res / dat), axis=0) expected = numpy.array([0.3, 0.3, 0.3]) msg = "Computed and expected std do not match" numpy.testing.assert_almost_equal(computed, expected, 2, err_msg=msg)
def test_trivial(self): # using the heaviside function, i.e. `amplify_one` has contributions # only for soil_intensity < a * mid_intensity with a=1 # in this case the minimimum mid_intensity is 0.0015 which is # smaller than the minimum soil intensity 0.0020, so some contribution # is lost and this is the reason why the first poe in 0.985 # instead of 0.989 fname = gettemp(trivial_ampl_func) df = read_csv(fname, { 'ampcode': ampcode_dt, None: numpy.float64 }, index='ampcode') a = Amplifier(self.imtls, df, self.soil_levels) a.check(self.vs30, 0) numpy.testing.assert_allclose(a.midlevels, [ 0.0015, 0.0035, 0.0075, 0.015, 0.035, 0.075, 0.15, 0.35, 0.75, 1.1 ]) poes = a.amplify_one(b'A', 'SA(0.1)', self.hcurve[1]).flatten() numpy.testing.assert_allclose( poes, [0.985, 0.98, 0.97, 0.94, 0.89, 0.79, 0.69], atol=1E-6) poes = a.amplify_one(b'A', 'SA(0.2)', self.hcurve[2]).flatten() numpy.testing.assert_allclose( poes, [0.985, 0.98, 0.97, 0.94, 0.89, 0.79, 0.69], atol=1E-6) poes = a.amplify_one(b'A', 'SA(0.5)', self.hcurve[3]).flatten() numpy.testing.assert_allclose( poes, [0.985, 0.98, 0.97, 0.94, 0.89, 0.79, 0.69], atol=1E-6)
def test_simple(self): fname = gettemp(simple_ampl_func) df = read_csv(fname, { 'ampcode': ampcode_dt, None: numpy.float64 }, index='ampcode') a = Amplifier(self.imtls, df, self.soil_levels) # a.check(self.vs30, vs30_tolerance=1) poes = a.amplify_one(b'A', 'SA(0.1)', self.hcurve[1]).flatten() numpy.testing.assert_allclose(poes, [ 0.981141, 0.975771, 0.964955, 0.935616, 0.882413, 0.785659, 0.636667 ], atol=1e-6) poes = a.amplify_one(b'A', 'SA(0.2)', self.hcurve[2]).flatten() numpy.testing.assert_allclose(poes, [ 0.981141, 0.975771, 0.964955, 0.935616, 0.882413, 0.785659, 0.636667 ], atol=1e-6) poes = a.amplify_one(b'A', 'SA(0.5)', self.hcurve[3]).flatten() numpy.testing.assert_allclose(poes, [ 0.981681, 0.976563, 0.967238, 0.940109, 0.890456, 0.799286, 0.686047 ], atol=1e-6) # Amplify GMFs with sigmas numpy.random.seed(42) gmvs = a._amplify_gmvs(b'A', numpy.array([.005, .010, .015]), 'PGA') numpy.testing.assert_allclose(gmvs, [0.005401, 0.010356, 0.016704], atol=1E-5)
def test_double(self): fname = gettemp(double_ampl_func) df = read_csv(fname, { 'ampcode': ampcode_dt, None: numpy.float64 }, index='ampcode') a = Amplifier(self.imtls, df, self.soil_levels) poes = a.amplify_one(b'A', 'SA(0.1)', self.hcurve[1]).flatten() numpy.testing.assert_allclose(poes, [ 0.985122, 0.979701, 0.975965, 0.96634, 0.922497, 0.886351, 0.790249 ], atol=1E-6) # poes, [0.989, 0.985, 0.98, 0.97, 0.94, 0.89, 0.79], atol=1E-6) poes = a.amplify_one(b'A', 'SA(0.2)', self.hcurve[2]).flatten() numpy.testing.assert_allclose(poes, [ 0.985122, 0.979701, 0.975965, 0.96634, 0.922497, 0.886351, 0.790249 ], atol=1E-6) # poes, [0.989, 0.985, 0.98, 0.97, 0.94, 0.89, 0.79], atol=1E-6) poes = a.amplify_one(b'A', 'SA(0.5)', self.hcurve[3]).flatten() numpy.testing.assert_allclose(poes, [ 0.985122, 0.979701, 0.975965, 0.96634, 0.922497, 0.886351, 0.790249 ], atol=1E-6) # poes, [0.989, 0.985, 0.98, 0.97, 0.94, 0.89, 0.79], atol=1E-6) # amplify GMFs without sigmas gmvs = a._amplify_gmvs(b'A', numpy.array([.1, .2, .3]), 'SA(0.5)') numpy.testing.assert_allclose(gmvs, [.2, .4, .6])
def test_double(self): fname = gettemp(double_ampl_func) aw = read_csv(fname, {'ampcode': ampcode_dt, None: numpy.float64}) a = Amplifier(self.imtls, aw) poes = a.amplify_one(b'A', 'SA(0.1)', self.hcurve[1]).flatten() numpy.testing.assert_allclose(poes, [ 0.989, 0.989, 0.985, 0.98, 0.97, 0.94, 0.89, 0.79, 0.69, 0.09, 0.09 ], atol=1E-6) poes = a.amplify_one(b'A', 'SA(0.2)', self.hcurve[2]).flatten() numpy.testing.assert_allclose(poes, [ 0.989, 0.989, 0.985, 0.98, 0.97, 0.94, 0.89, 0.79, 0.69, 0.09, 0.09 ], atol=1E-6) poes = a.amplify_one(b'A', 'SA(0.5)', self.hcurve[3]).flatten() numpy.testing.assert_allclose(poes, [ 0.989, 0.989, 0.985, 0.98, 0.97, 0.94, 0.89, 0.79, 0.69, 0.09, 0.09 ], atol=1E-6) # amplify GMFs without sigmas gmvs = a._amplify_gmvs(b'A', numpy.array([.1, .2, .3]), 'SA(0.5)') numpy.testing.assert_allclose(gmvs, [.2, .4, .6])
def calc_stats(self): oq = self.oqparam hstats = oq.hazard_stats() # initialize datasets N = len(self.sitecol.complete) P = len(oq.poes) M = len(oq.imtls) if oq.soil_intensities is not None: L = M * len(oq.soil_intensities) else: L = len(oq.imtls.array) R = len(self.rlzs_assoc.realizations) S = len(hstats) if R > 1 and oq.individual_curves or not hstats: self.datastore.create_dset('hcurves-rlzs', F32, (N, R, L)) if oq.poes: self.datastore.create_dset('hmaps-rlzs', F32, (N, R, M, P)) if hstats: self.datastore.create_dset('hcurves-stats', F32, (N, S, L)) if oq.poes: self.datastore.create_dset('hmaps-stats', F32, (N, S, M, P)) ct = oq.concurrent_tasks logging.info('Building hazard statistics with %d concurrent_tasks', ct) weights = [rlz.weight for rlz in self.rlzs_assoc.realizations] if 'amplification' in oq.inputs: amplifier = Amplifier(oq.imtls, self.datastore['amplification'], oq.soil_intensities) amplifier.check(self.sitecol.vs30, oq.vs30_tolerance) else: amplifier = None allargs = [ # this list is very fast to generate (getters.PmapGetter(self.datastore, weights, t.sids, oq.poes), N, hstats, oq.individual_curves, oq.max_sites_disagg, amplifier) for t in self.sitecol.split_in_tiles(ct) ] self.datastore.swmr_on() parallel.Starmap(build_hazard, allargs, h5=self.datastore.hdf5).reduce(self.save_hazard)
def test_simple(self): # # MP: checked using hand calculations some values of the poes computed # considering uncertainty # fname = gettemp(simple_ampl_func) df = read_csv(fname, { 'ampcode': ampcode_dt, None: numpy.float64 }, index='ampcode') a = Amplifier(self.imtls, df, self.soil_levels) a.check(self.vs30, vs30_tolerance=1) poes = a.amplify_one(b'A', 'SA(0.1)', self.hcurve[1]).flatten() numpy.testing.assert_allclose(poes, [ 0.985008, 0.980001, 0.970019, 0.94006, 0.890007, 0.790198, 0.690201 ], atol=1E-6) poes = a.amplify_one(b'A', 'SA(0.2)', self.hcurve[2]).flatten() numpy.testing.assert_allclose(poes, [ 0.985008, 0.980001, 0.970019, 0.94006, 0.890007, 0.790198, 0.690201 ], atol=1E-6) poes = a.amplify_one(b'A', 'SA(0.5)', self.hcurve[3]).flatten() numpy.testing.assert_allclose(poes, [ 0.985109, 0.980022, 0.970272, 0.940816, 0.890224, 0.792719, 0.692719 ], atol=1E-6) # Amplify GMFs with sigmas numpy.random.seed(42) gmvs = a._amplify_gmvs(b'A', numpy.array([.005, .010, .015]), 'PGA') numpy.testing.assert_allclose(gmvs, [0.005401, 0.010356, 0.016704], atol=1E-5)
def test_gmf_cata(self): fname = gettemp(cata_ampl_func) df = read_csv(fname, {'ampcode': ampcode_dt, None: numpy.float64}, index='ampcode') imtls = DictArray({'PGA': [numpy.nan]}) a = Amplifier(imtls, df) numpy.random.seed(42) # must be fixed gmvs1 = a._amplify_gmvs(b'z1', numpy.array([.1, .2, .3]), 'PGA') aac(gmvs1, [0.217124, 0.399295, 0.602515], atol=1E-5) gmvs2 = a._amplify_gmvs(b'z2', numpy.array([.1, .2, .3]), 'PGA') aac(gmvs2, [0.266652, 0.334187, 0.510845], atol=1E-5) numpy.random.seed(43) # changing the seed the results change a lot gmvs1 = a._amplify_gmvs(b'z1', numpy.array([.1, .2, .3]), 'PGA') aac(gmvs1, [0.197304, 0.293422, 0.399669], atol=1E-5) gmvs2 = a._amplify_gmvs(b'z2', numpy.array([.1, .2, .3]), 'PGA') aac(gmvs2, [0.117069, 0.517284, 0.475571], atol=1E-5)
def test_simple(self): fname = gettemp(simple_ampl_func) aw = read_csv(fname, { 'ampcode': 'S2', 'level': numpy.uint8, None: numpy.float64 }) a = Amplifier(self.imtls, aw, self.soil_levels) a.check(self.vs30, 1) poes = a.amplify_one(b'A', 'SA(0.1)', self.hcurve[1]).flatten() numpy.testing.assert_allclose( poes, [0.985002, 0.979997, 0.970004, 0.940069, 0.889961, 0.79, 0.690037], atol=1E-6) poes = a.amplify_one(b'A', 'SA(0.2)', self.hcurve[2]).flatten() numpy.testing.assert_allclose( poes, [0.985002, 0.979997, 0.970004, 0.940069, 0.889961, 0.79, 0.690037], atol=1E-6) poes = a.amplify_one(b'A', 'SA(0.5)', self.hcurve[3]).flatten() numpy.testing.assert_allclose( poes, [0.985002, 0.979996, 0.969991, 0.940012, 0.889958, 0.79, 0.690037], atol=1E-6)
def _read_risk_data(self): # read the exposure (if any), the risk model (if any) and then the # site collection, possibly extracted from the exposure. oq = self.oqparam self.load_crmodel() # must be called first if oq.hazard_calculation_id: with util.read(oq.hazard_calculation_id) as dstore: haz_sitecol = dstore['sitecol'].complete if ('amplification' in oq.inputs and 'ampcode' not in haz_sitecol.array.dtype.names): haz_sitecol.add_col('ampcode', site.ampcode_dt) else: haz_sitecol = readinput.get_site_collection(oq) if hasattr(self, 'rup'): # for scenario we reduce the site collection to the sites # within the maximum distance from the rupture haz_sitecol, _dctx = self.cmaker.filter( haz_sitecol, self.rup) haz_sitecol.make_complete() if 'site_model' in oq.inputs: self.datastore['site_model'] = readinput.get_site_model(oq) oq_hazard = (self.datastore.parent['oqparam'] if self.datastore.parent else None) if 'exposure' in oq.inputs: exposure = self.read_exposure(haz_sitecol) self.datastore['assetcol'] = self.assetcol self.datastore['cost_calculator'] = exposure.cost_calculator if hasattr(readinput.exposure, 'exposures'): self.datastore['assetcol/exposures'] = ( numpy.array(exposure.exposures, hdf5.vstr)) elif 'assetcol' in self.datastore.parent: assetcol = self.datastore.parent['assetcol'] if oq.region: region = wkt.loads(oq.region) self.sitecol = haz_sitecol.within(region) if oq.shakemap_id or 'shakemap' in oq.inputs: self.sitecol, self.assetcol = self.read_shakemap( haz_sitecol, assetcol) self.datastore['assetcol'] = self.assetcol logging.info('Extracted %d/%d assets', len(self.assetcol), len(assetcol)) nsites = len(self.sitecol) if (oq.spatial_correlation != 'no' and nsites > MAXSITES): # hard-coded, heuristic raise ValueError(CORRELATION_MATRIX_TOO_LARGE % nsites) elif hasattr(self, 'sitecol') and general.not_equal( self.sitecol.sids, haz_sitecol.sids): self.assetcol = assetcol.reduce(self.sitecol) self.datastore['assetcol'] = self.assetcol logging.info('Extracted %d/%d assets', len(self.assetcol), len(assetcol)) else: self.assetcol = assetcol else: # no exposure self.sitecol = haz_sitecol if self.sitecol: logging.info('Read N=%d hazard sites and L=%d hazard levels', len(self.sitecol), len(oq.imtls.array)) if oq_hazard: parent = self.datastore.parent if 'assetcol' in parent: check_time_event(oq, parent['assetcol'].occupancy_periods) elif oq.job_type == 'risk' and 'exposure' not in oq.inputs: raise ValueError('Missing exposure both in hazard and risk!') if oq_hazard.time_event and oq_hazard.time_event != oq.time_event: raise ValueError( 'The risk configuration file has time_event=%s but the ' 'hazard was computed with time_event=%s' % ( oq.time_event, oq_hazard.time_event)) if oq.job_type == 'risk': tmap_arr, tmap_lst = logictree.taxonomy_mapping( self.oqparam.inputs.get('taxonomy_mapping'), self.assetcol.tagcol.taxonomy) self.crmodel.tmap = tmap_lst if len(tmap_arr): self.datastore['taxonomy_mapping'] = tmap_arr taxonomies = set(taxo for items in self.crmodel.tmap for taxo, weight in items if taxo != '?') # check that we are covering all the taxonomies in the exposure missing = taxonomies - set(self.crmodel.taxonomies) if self.crmodel and missing: raise RuntimeError('The exposure contains the taxonomies %s ' 'which are not in the risk model' % missing) if len(self.crmodel.taxonomies) > len(taxonomies): logging.info('Reducing risk model from %d to %d taxonomies', len(self.crmodel.taxonomies), len(taxonomies)) self.crmodel = self.crmodel.reduce(taxonomies) self.crmodel.tmap = tmap_lst self.crmodel.vectorize_cons_model(self.assetcol.tagcol) if hasattr(self, 'sitecol') and self.sitecol: if 'site_model' in oq.inputs: assoc_dist = (oq.region_grid_spacing * 1.414 if oq.region_grid_spacing else 5) # Graeme's 5km sm = readinput.get_site_model(oq) self.sitecol.complete.assoc(sm, assoc_dist) self.datastore['sitecol'] = self.sitecol.complete # store amplification functions if any if 'amplification' in oq.inputs: logging.info('Reading %s', oq.inputs['amplification']) self.datastore['amplification'] = readinput.get_amplification(oq) check_amplification(self.datastore) self.amplifier = Amplifier( oq.imtls, self.datastore['amplification'], oq.soil_intensities) self.amplifier.check(self.sitecol.vs30, oq.vs30_tolerance) else: self.amplifier = None # used in the risk calculators self.param = dict(individual_curves=oq.individual_curves, avg_losses=oq.avg_losses, amplifier=self.amplifier) # compute exposure stats if hasattr(self, 'assetcol'): save_exposed_values( self.datastore, self.assetcol, oq.loss_names, oq.aggregate_by)
class HazardCalculator(BaseCalculator): """ Base class for hazard calculators based on source models """ def src_filter(self, filename=None): """ :returns: a SourceFilter/UcerfFilter """ oq = self.oqparam if getattr(self, 'sitecol', None): sitecol = self.sitecol.complete else: # can happen to the ruptures-only calculator sitecol = None filename = None if 'ucerf' in oq.calculation_mode: return UcerfFilter(sitecol, oq.maximum_distance, filename) return SourceFilter(sitecol, oq.maximum_distance, filename) @property def E(self): """ :returns: the number of stored events """ try: return len(self.datastore['events']) except KeyError: return 0 @property def N(self): """ :returns: the total number of sites """ return len(self.sitecol.complete) if self.sitecol else None def check_overflow(self): """Overridden in event based""" def check_floating_spinning(self): f, s = self.csm.get_floating_spinning_factors() if f != 1: logging.info('Rupture floating factor = %s', f) if s != 1: logging.info('Rupture spinning factor = %s', s) def read_inputs(self): """ Read risk data and sources if any """ oq = self.oqparam self._read_risk_data() self.check_overflow() # check if self.sitecol is too large if getattr(self, 'sitecol', None): # can be None for the ruptures-only calculator with hdf5.File(self.datastore.tempname, 'w') as tmp: tmp['sitecol'] = self.sitecol if ('source_model_logic_tree' in oq.inputs and oq.hazard_calculation_id is None): with self.monitor('composite source model', measuremem=True): self.csm = csm = readinput.get_composite_source_model( oq, self.datastore.hdf5) ns = len(csm.get_sources()) if oq.disagg_by_src and ns > 1000: j = oq.inputs['job_ini'] raise InvalidFile( '%s: disagg_by_src can be set only if there are <=1000' ' sources, but %d were found in the model' % (j, ns)) self.csm_info = csm.info self.datastore['source_model_lt'] = csm.source_model_lt res = views.view('dupl_sources', self.datastore) logging.info(f'The composite source model has {res.val:_d} ' 'ruptures') if res: logging.info(res) self.init() # do this at the end of pre-execute if not oq.hazard_calculation_id: self.gzip_inputs() def save_multi_peril(self): """Defined in MultiRiskCalculator""" def pre_execute(self): """ Check if there is a previous calculation ID. If yes, read the inputs by retrieving the previous calculation; if not, read the inputs directly. """ oq = self.oqparam if 'gmfs' in oq.inputs or 'multi_peril' in oq.inputs: # read hazard from files assert not oq.hazard_calculation_id, ( 'You cannot use --hc together with gmfs_file') self.read_inputs() if 'gmfs' in oq.inputs: if not oq.inputs['gmfs'].endswith('.csv'): raise NotImplementedError( 'Importer for %s' % oq.inputs['gmfs']) E = len(import_gmfs(self.datastore, oq.inputs['gmfs'], self.sitecol.complete.sids)) if hasattr(oq, 'number_of_ground_motion_fields'): if oq.number_of_ground_motion_fields != E: raise RuntimeError( 'Expected %d ground motion fields, found %d' % (oq.number_of_ground_motion_fields, E)) else: # set the number of GMFs from the file oq.number_of_ground_motion_fields = E else: self.save_multi_peril() self.save_crmodel() elif 'hazard_curves' in oq.inputs: # read hazard from file assert not oq.hazard_calculation_id, ( 'You cannot use --hc together with hazard_curves') haz_sitecol = readinput.get_site_collection(oq) self.load_crmodel() # must be after get_site_collection self.read_exposure(haz_sitecol) # define .assets_by_site self.datastore['poes/grp-00'] = fix_ones(readinput.pmap) self.datastore['sitecol'] = self.sitecol self.datastore['assetcol'] = self.assetcol self.datastore['csm_info'] = fake = source.CompositionInfo.fake() self.rlzs_assoc = fake.get_rlzs_assoc() self.datastore['rlzs_by_grp'] = self.rlzs_assoc.by_grp() self.save_crmodel() elif oq.hazard_calculation_id: parent = util.read(oq.hazard_calculation_id) self.check_precalc(parent['oqparam'].calculation_mode) self.datastore.parent = parent # copy missing parameters from the parent if 'concurrent_tasks' not in vars(self.oqparam): self.oqparam.concurrent_tasks = ( self.oqparam.__class__.concurrent_tasks.default) params = {name: value for name, value in vars(parent['oqparam']).items() if name not in vars(self.oqparam)} self.save_params(**params) self.read_inputs() oqp = parent['oqparam'] if oqp.investigation_time != oq.investigation_time: raise ValueError( 'The parent calculation was using investigation_time=%s' ' != %s' % (oqp.investigation_time, oq.investigation_time)) if not consistent(oqp.minimum_intensity, oq.minimum_intensity): raise ValueError( 'The parent calculation was using minimum_intensity=%s' ' != %s' % (oqp.minimum_intensity, oq.minimum_intensity)) hstats, rstats = list(oqp.hazard_stats()), list(oq.hazard_stats()) if hstats != rstats: raise ValueError( 'The parent calculation had stats %s != %s' % (hstats, rstats)) missing_imts = set(oq.risk_imtls) - set(oqp.imtls) if missing_imts: raise ValueError( 'The parent calculation is missing the IMT(s) %s' % ', '.join(missing_imts)) self.save_crmodel() elif self.__class__.precalc: calc = calculators[self.__class__.precalc]( self.oqparam, self.datastore.calc_id) calc.run(remove=False) for name in ('csm param sitecol assetcol crmodel rlzs_assoc ' 'policy_name policy_dict csm_info').split(): if hasattr(calc, name): setattr(self, name, getattr(calc, name)) else: self.read_inputs() self.save_crmodel() def init(self): """ To be overridden to initialize the datasets needed by the calculation """ oq = self.oqparam if not oq.risk_imtls: if self.datastore.parent: oq.risk_imtls = ( self.datastore.parent['oqparam'].risk_imtls) if 'precalc' in vars(self): self.rlzs_assoc = self.precalc.rlzs_assoc elif 'csm_info' in self.datastore: csm_info = self.datastore['csm_info'] if oq.hazard_calculation_id and 'gsim_logic_tree' in oq.inputs: # redefine the realizations by reading the weights from the # gsim_logic_tree_file that could be different from the parent csm_info.gsim_lt = logictree.GsimLogicTree( oq.inputs['gsim_logic_tree'], set(csm_info.trts)) self.rlzs_assoc = csm_info.get_rlzs_assoc() elif hasattr(self, 'csm'): self.check_floating_spinning() self.rlzs_assoc = self.csm.info.get_rlzs_assoc() else: # build a fake; used by risk-from-file calculators self.datastore['csm_info'] = fake = source.CompositionInfo.fake() self.rlzs_assoc = fake.get_rlzs_assoc() @general.cached_property def R(self): """ :returns: the number of realizations """ try: return self.csm.info.get_num_rlzs() except AttributeError: # no self.csm return self.datastore['csm_info'].get_num_rlzs() def read_exposure(self, haz_sitecol): # after load_risk_model """ Read the exposure, the risk models and update the attributes .sitecol, .assetcol """ oq = self.oqparam with self.monitor('reading exposure'): self.sitecol, self.assetcol, discarded = ( readinput.get_sitecol_assetcol( oq, haz_sitecol, self.crmodel.loss_types)) if len(discarded): self.datastore['discarded'] = discarded if hasattr(self, 'rup'): # this is normal for the case of scenario from rupture logging.info('%d assets were discarded because too far ' 'from the rupture; use `oq show discarded` ' 'to show them and `oq plot_assets` to plot ' 'them' % len(discarded)) elif not oq.discard_assets: # raise an error self.datastore['sitecol'] = self.sitecol self.datastore['assetcol'] = self.assetcol raise RuntimeError( '%d assets were discarded; use `oq show discarded` to' ' show them and `oq plot_assets` to plot them' % len(discarded)) self.policy_name = '' self.policy_dict = {} if oq.inputs.get('insurance'): k, v = zip(*oq.inputs['insurance'].items()) self.load_insurance_data(k, v) return readinput.exposure def load_insurance_data(self, ins_types, ins_files): """ Read the insurance files and populate the policy_dict """ for loss_type, fname in zip(ins_types, ins_files): array = hdf5.read_csv( fname, {'insurance_limit': float, 'deductible': float, None: object}).array policy_name = array.dtype.names[0] policy_idx = getattr(self.assetcol.tagcol, policy_name + '_idx') insurance = numpy.zeros((len(policy_idx), 2)) for pol, ded, lim in array[ [policy_name, 'deductible', 'insurance_limit']]: insurance[policy_idx[pol]] = ded, lim self.policy_dict[loss_type] = insurance if self.policy_name and policy_name != self.policy_name: raise ValueError( 'The file %s contains %s as policy field, but we were ' 'expecting %s' % (fname, policy_name, self.policy_name)) else: self.policy_name = policy_name def load_crmodel(self): # to be called before read_exposure # NB: this is called even if there is no risk model """ Read the risk models and set the attribute .crmodel. The crmodel can be empty for hazard calculations. Save the loss ratios (if any) in the datastore. """ logging.info('Reading the risk model if present') self.crmodel = readinput.get_crmodel(self.oqparam) if not self.crmodel: parent = self.datastore.parent if 'risk_model' in parent: self.crmodel = riskmodels.CompositeRiskModel.read(parent) return if self.oqparam.ground_motion_fields and not self.oqparam.imtls: raise InvalidFile('No intensity_measure_types specified in %s' % self.oqparam.inputs['job_ini']) self.save_params() # re-save oqparam def save_crmodel(self): """ Save the risk models in the datastore """ if len(self.crmodel): self.datastore['risk_model'] = rm = self.crmodel attrs = self.datastore.getitem('risk_model').attrs attrs['min_iml'] = hdf5.array_of_vstr(sorted(rm.min_iml.items())) def _read_risk_data(self): # read the exposure (if any), the risk model (if any) and then the # site collection, possibly extracted from the exposure. oq = self.oqparam self.load_crmodel() # must be called first if oq.hazard_calculation_id: with util.read(oq.hazard_calculation_id) as dstore: haz_sitecol = dstore['sitecol'].complete if ('amplification' in oq.inputs and 'ampcode' not in haz_sitecol.array.dtype.names): haz_sitecol.add_col('ampcode', site.ampcode_dt) else: haz_sitecol = readinput.get_site_collection(oq) if hasattr(self, 'rup'): # for scenario we reduce the site collection to the sites # within the maximum distance from the rupture haz_sitecol, _dctx = self.cmaker.filter( haz_sitecol, self.rup) haz_sitecol.make_complete() if 'site_model' in oq.inputs: self.datastore['site_model'] = readinput.get_site_model(oq) oq_hazard = (self.datastore.parent['oqparam'] if self.datastore.parent else None) if 'exposure' in oq.inputs: exposure = self.read_exposure(haz_sitecol) self.datastore['assetcol'] = self.assetcol self.datastore['cost_calculator'] = exposure.cost_calculator if hasattr(readinput.exposure, 'exposures'): self.datastore['assetcol/exposures'] = ( numpy.array(exposure.exposures, hdf5.vstr)) elif 'assetcol' in self.datastore.parent: assetcol = self.datastore.parent['assetcol'] if oq.region: region = wkt.loads(oq.region) self.sitecol = haz_sitecol.within(region) if oq.shakemap_id or 'shakemap' in oq.inputs: self.sitecol, self.assetcol = self.read_shakemap( haz_sitecol, assetcol) self.datastore['assetcol'] = self.assetcol logging.info('Extracted %d/%d assets', len(self.assetcol), len(assetcol)) nsites = len(self.sitecol) if (oq.spatial_correlation != 'no' and nsites > MAXSITES): # hard-coded, heuristic raise ValueError(CORRELATION_MATRIX_TOO_LARGE % nsites) elif hasattr(self, 'sitecol') and general.not_equal( self.sitecol.sids, haz_sitecol.sids): self.assetcol = assetcol.reduce(self.sitecol) self.datastore['assetcol'] = self.assetcol logging.info('Extracted %d/%d assets', len(self.assetcol), len(assetcol)) else: self.assetcol = assetcol else: # no exposure self.sitecol = haz_sitecol if self.sitecol: logging.info('Read N=%d hazard sites and L=%d hazard levels', len(self.sitecol), len(oq.imtls.array)) if oq_hazard: parent = self.datastore.parent if 'assetcol' in parent: check_time_event(oq, parent['assetcol'].occupancy_periods) elif oq.job_type == 'risk' and 'exposure' not in oq.inputs: raise ValueError('Missing exposure both in hazard and risk!') if oq_hazard.time_event and oq_hazard.time_event != oq.time_event: raise ValueError( 'The risk configuration file has time_event=%s but the ' 'hazard was computed with time_event=%s' % ( oq.time_event, oq_hazard.time_event)) if oq.job_type == 'risk': tmap_arr, tmap_lst = logictree.taxonomy_mapping( self.oqparam.inputs.get('taxonomy_mapping'), self.assetcol.tagcol.taxonomy) self.crmodel.tmap = tmap_lst if len(tmap_arr): self.datastore['taxonomy_mapping'] = tmap_arr taxonomies = set(taxo for items in self.crmodel.tmap for taxo, weight in items if taxo != '?') # check that we are covering all the taxonomies in the exposure missing = taxonomies - set(self.crmodel.taxonomies) if self.crmodel and missing: raise RuntimeError('The exposure contains the taxonomies %s ' 'which are not in the risk model' % missing) if len(self.crmodel.taxonomies) > len(taxonomies): logging.info('Reducing risk model from %d to %d taxonomies', len(self.crmodel.taxonomies), len(taxonomies)) self.crmodel = self.crmodel.reduce(taxonomies) self.crmodel.tmap = tmap_lst self.crmodel.vectorize_cons_model(self.assetcol.tagcol) if hasattr(self, 'sitecol') and self.sitecol: if 'site_model' in oq.inputs: assoc_dist = (oq.region_grid_spacing * 1.414 if oq.region_grid_spacing else 5) # Graeme's 5km sm = readinput.get_site_model(oq) self.sitecol.complete.assoc(sm, assoc_dist) self.datastore['sitecol'] = self.sitecol.complete # store amplification functions if any if 'amplification' in oq.inputs: logging.info('Reading %s', oq.inputs['amplification']) self.datastore['amplification'] = readinput.get_amplification(oq) check_amplification(self.datastore) self.amplifier = Amplifier( oq.imtls, self.datastore['amplification'], oq.soil_intensities) self.amplifier.check(self.sitecol.vs30, oq.vs30_tolerance) else: self.amplifier = None # used in the risk calculators self.param = dict(individual_curves=oq.individual_curves, avg_losses=oq.avg_losses, amplifier=self.amplifier) # compute exposure stats if hasattr(self, 'assetcol'): save_exposed_values( self.datastore, self.assetcol, oq.loss_names, oq.aggregate_by) def store_rlz_info(self, eff_ruptures=None): """ Save info about the composite source model inside the csm_info dataset """ if hasattr(self, 'csm_info'): # no scenario self.csm_info.update_eff_ruptures(eff_ruptures) self.rlzs_assoc = self.csm_info.get_rlzs_assoc( self.oqparam.sm_lt_path) if not self.rlzs_assoc.realizations: raise RuntimeError('Empty logic tree: too much filtering?') # sanity check that eff_ruptures have been set, i.e. are not -1 for sm in self.csm_info.source_models: for sg in sm.src_groups: assert sg.eff_ruptures != -1, sg self.datastore['csm_info'] = self.csm_info R = len(self.rlzs_assoc.realizations) logging.info('There are %d realization(s)', R) rlzs_by_grp = self.rlzs_assoc.by_grp() if self.oqparam.imtls: self.datastore['weights'] = arr = build_weights( self.rlzs_assoc.realizations, self.oqparam.imt_dt()) self.datastore.set_attrs('weights', nbytes=arr.nbytes) if ('event_based' in self.oqparam.calculation_mode and R >= TWO16 or R >= TWO32): # rlzi is 16 bit integer in the GMFs and 32 bit in rlzs_by_grp raise ValueError( 'The logic tree has too many realizations (%d), use sampling ' 'instead' % R) elif R > 10000: logging.warning( 'The logic tree has %d realizations(!), please consider ' 'sampling it', R) # save vlen-arrays of rlz indices, one per group if rlzs_by_grp: self.datastore['rlzs_by_grp'] = rlzs_by_grp def store_source_info(self, calc_times): """ Save (weight, num_sites, calc_time) inside the source_info dataset """ if calc_times: source_info = self.datastore['source_info'] arr = numpy.zeros((len(source_info), 3), F32) # NB: the zip magic is needed for performance, # looping would be too slow ids, vals = zip(*calc_times.items()) arr[numpy.array(ids)] = vals source_info['eff_ruptures'] += arr[:, 0] source_info['num_sites'] += arr[:, 1] source_info['calc_time'] += arr[:, 2] def post_process(self): """For compatibility with the engine"""
def test_dupl(self): fname = gettemp(dupl_ampl_func) aw = read_csv(fname, {'ampcode': ampcode_dt, None: numpy.float64}) with self.assertRaises(ValueError): Amplifier(self.imtls, aw)
def _read_risk_data(self): # read the risk model (if any), the exposure (if any) and then the # site collection, possibly extracted from the exposure. oq = self.oqparam self.load_crmodel() # must be called first if (not oq.imtls and 'shakemap' not in oq.inputs and oq.ground_motion_fields): raise InvalidFile('There are no intensity measure types in %s' % oq.inputs['job_ini']) if oq.hazard_calculation_id: with util.read(oq.hazard_calculation_id) as dstore: haz_sitecol = dstore['sitecol'].complete if ('amplification' in oq.inputs and 'ampcode' not in haz_sitecol.array.dtype.names): haz_sitecol.add_col('ampcode', site.ampcode_dt) else: haz_sitecol = readinput.get_site_collection(oq, self.datastore) if hasattr(self, 'rup'): # for scenario we reduce the site collection to the sites # within the maximum distance from the rupture haz_sitecol, _dctx = self.cmaker.filter(haz_sitecol, self.rup) haz_sitecol.make_complete() if 'site_model' in oq.inputs: self.datastore['site_model'] = readinput.get_site_model(oq) oq_hazard = (self.datastore.parent['oqparam'] if self.datastore.parent else None) if 'exposure' in oq.inputs: exposure = self.read_exposure(haz_sitecol) self.datastore['assetcol'] = self.assetcol self.datastore['cost_calculator'] = exposure.cost_calculator if hasattr(readinput.exposure, 'exposures'): self.datastore['assetcol/exposures'] = (numpy.array( exposure.exposures, hdf5.vstr)) elif 'assetcol' in self.datastore.parent: assetcol = self.datastore.parent['assetcol'] if oq.region: region = wkt.loads(oq.region) self.sitecol = haz_sitecol.within(region) if oq.shakemap_id or 'shakemap' in oq.inputs: self.sitecol, self.assetcol = self.read_shakemap( haz_sitecol, assetcol) self.datastore['sitecol'] = self.sitecol self.datastore['assetcol'] = self.assetcol logging.info('Extracted %d/%d assets', len(self.assetcol), len(assetcol)) nsites = len(self.sitecol) if (oq.spatial_correlation != 'no' and nsites > MAXSITES): # hard-coded, heuristic raise ValueError(CORRELATION_MATRIX_TOO_LARGE % nsites) elif hasattr(self, 'sitecol') and general.not_equal( self.sitecol.sids, haz_sitecol.sids): self.assetcol = assetcol.reduce(self.sitecol) self.datastore['assetcol'] = self.assetcol logging.info('Extracted %d/%d assets', len(self.assetcol), len(assetcol)) else: self.assetcol = assetcol else: # no exposure self.sitecol = haz_sitecol if self.sitecol and oq.imtls: logging.info('Read N=%d hazard sites and L=%d hazard levels', len(self.sitecol), oq.imtls.size) if oq_hazard: parent = self.datastore.parent if 'assetcol' in parent: check_time_event(oq, parent['assetcol'].occupancy_periods) elif oq.job_type == 'risk' and 'exposure' not in oq.inputs: raise ValueError('Missing exposure both in hazard and risk!') if oq_hazard.time_event and oq_hazard.time_event != oq.time_event: raise ValueError( 'The risk configuration file has time_event=%s but the ' 'hazard was computed with time_event=%s' % (oq.time_event, oq_hazard.time_event)) if oq.job_type == 'risk': tmap_arr, tmap_lst = logictree.taxonomy_mapping( self.oqparam.inputs.get('taxonomy_mapping'), self.assetcol.tagcol.taxonomy) self.crmodel.tmap = tmap_lst if len(tmap_arr): self.datastore['taxonomy_mapping'] = tmap_arr taxonomies = set(taxo for items in self.crmodel.tmap for taxo, weight in items if taxo != '?') # check that we are covering all the taxonomies in the exposure missing = taxonomies - set(self.crmodel.taxonomies) if self.crmodel and missing: raise RuntimeError('The exposure contains the taxonomies %s ' 'which are not in the risk model' % missing) if len(self.crmodel.taxonomies) > len(taxonomies): logging.info('Reducing risk model from %d to %d taxonomies', len(self.crmodel.taxonomies), len(taxonomies)) self.crmodel = self.crmodel.reduce(taxonomies) self.crmodel.tmap = tmap_lst self.crmodel.reduce_cons_model(self.assetcol.tagcol) if hasattr(self, 'sitecol') and self.sitecol: if 'site_model' in oq.inputs: assoc_dist = (oq.region_grid_spacing * 1.414 if oq.region_grid_spacing else 5 ) # Graeme's 5km sm = readinput.get_site_model(oq) self.sitecol.complete.assoc(sm, assoc_dist) self.datastore['sitecol'] = self.sitecol # store amplification functions if any self.af = None if 'amplification' in oq.inputs: logging.info('Reading %s', oq.inputs['amplification']) df = readinput.get_amplification(oq) check_amplification(df, self.sitecol) self.amplifier = Amplifier(oq.imtls, df, oq.soil_intensities) if oq.amplification_method == 'kernel': # TODO: need to add additional checks on the main calculation # methodology since the kernel method is currently tested only # for classical PSHA self.af = AmplFunction.from_dframe(df) self.amplifier = None else: self.amplifier = None # manage secondary perils sec_perils = oq.get_sec_perils() for sp in sec_perils: sp.prepare(self.sitecol) # add columns as needed mal = { lt: getdefault(oq.minimum_asset_loss, lt) for lt in oq.loss_names } if mal: logging.info('minimum_asset_loss=%s', mal) self.param = dict(individual_curves=oq.individual_curves, ps_grid_spacing=oq.ps_grid_spacing, collapse_level=oq.collapse_level, split_sources=oq.split_sources, avg_losses=oq.avg_losses, amplifier=self.amplifier, sec_perils=sec_perils, ses_seed=oq.ses_seed, minimum_asset_loss=mal) # compute exposure stats if hasattr(self, 'assetcol'): save_agg_values(self.datastore, self.assetcol, oq.loss_names, oq.aggregate_by)
class HazardCalculator(BaseCalculator): """ Base class for hazard calculators based on source models """ def src_filter(self, filename=None): """ :returns: a SourceFilter/UcerfFilter """ oq = self.oqparam if getattr(self, 'sitecol', None): sitecol = self.sitecol.complete else: # can happen to the ruptures-only calculator sitecol = None filename = None if oq.is_ucerf(): return UcerfFilter(sitecol, oq.maximum_distance, filename) return SourceFilter(sitecol, oq.maximum_distance, filename) @property def E(self): """ :returns: the number of stored events """ try: return len(self.datastore['events']) except KeyError: return 0 @property def N(self): """ :returns: the total number of sites """ return len(self.sitecol.complete) if self.sitecol else None @property def few_sites(self): """ :returns: True if there are less than max_sites_disagg """ return len(self.sitecol.complete) <= self.oqparam.max_sites_disagg def check_overflow(self): """Overridden in event based""" def check_floating_spinning(self): f, s = self.csm.get_floating_spinning_factors() if f != 1: logging.info('Rupture floating factor = %s', f) if s != 1: logging.info('Rupture spinning factor = %s', s) if (f * s >= 1.5 and self.oqparam.pointsource_distance is None and 'classical' in self.oqparam.calculation_mode): logging.info( 'You are not using the pointsource_distance approximation:\n' 'https://docs.openquake.org/oq-engine/advanced/common-mistakes.html#pointsource-distance' ) def read_inputs(self): """ Read risk data and sources if any """ oq = self.oqparam self._read_risk_data() self.check_overflow() # check if self.sitecol is too large if ('amplification' in oq.inputs and oq.amplification_method == 'kernel'): logging.info('Reading %s', oq.inputs['amplification']) df = readinput.get_amplification(oq) check_amplification(df, self.sitecol) self.af = AmplFunction.from_dframe(df) if getattr(self, 'sitecol', None): # can be None for the ruptures-only calculator with hdf5.File(self.datastore.tempname, 'w') as tmp: tmp['sitecol'] = self.sitecol elif (oq.calculation_mode == 'disaggregation' and oq.max_sites_disagg < len(self.sitecol)): raise ValueError('Please set max_sites_disagg=%d in %s' % (len(self.sitecol), oq.inputs['job_ini'])) elif oq.disagg_by_src and len(self.sitecol) > oq.max_sites_disagg: raise ValueError( 'There are too many sites to use disagg_by_src=true') if ('source_model_logic_tree' in oq.inputs and oq.hazard_calculation_id is None): with self.monitor('composite source model', measuremem=True): self.csm = csm = readinput.get_composite_source_model( oq, self.datastore.hdf5) srcs = [src for sg in csm.src_groups for src in sg] if not srcs: raise RuntimeError('All sources were discarded!?') logging.info('Checking the sources bounding box') sids = self.src_filter().within_bbox(srcs) if len(sids) == 0: raise RuntimeError('All sources were discarded!?') self.full_lt = csm.full_lt self.init() # do this at the end of pre-execute if (not oq.hazard_calculation_id and oq.calculation_mode != 'preclassical' and not oq.save_disk_space): self.gzip_inputs() def save_multi_peril(self): """Defined in MultiRiskCalculator""" def pre_execute(self): """ Check if there is a previous calculation ID. If yes, read the inputs by retrieving the previous calculation; if not, read the inputs directly. """ oq = self.oqparam if 'gmfs' in oq.inputs or 'multi_peril' in oq.inputs: # read hazard from files assert not oq.hazard_calculation_id, ( 'You cannot use --hc together with gmfs_file') self.read_inputs() if 'gmfs' in oq.inputs: if not oq.inputs['gmfs'].endswith('.csv'): raise NotImplementedError('Importer for %s' % oq.inputs['gmfs']) E = len( import_gmfs(self.datastore, oq.inputs['gmfs'], self.sitecol.complete.sids)) if hasattr(oq, 'number_of_ground_motion_fields'): if oq.number_of_ground_motion_fields != E: raise RuntimeError( 'Expected %d ground motion fields, found %d' % (oq.number_of_ground_motion_fields, E)) else: # set the number of GMFs from the file oq.number_of_ground_motion_fields = E else: self.save_multi_peril() self.save_crmodel() elif 'hazard_curves' in oq.inputs: # read hazard from file assert not oq.hazard_calculation_id, ( 'You cannot use --hc together with hazard_curves') haz_sitecol = readinput.get_site_collection(oq) self.load_crmodel() # must be after get_site_collection self.read_exposure(haz_sitecol) # define .assets_by_site self.datastore['poes/grp-00'] = fix_ones(readinput.pmap) self.datastore['sitecol'] = self.sitecol self.datastore['assetcol'] = self.assetcol self.datastore['full_lt'] = fake = logictree.FullLogicTree.fake() self.realizations = fake.get_realizations() self.save_crmodel() elif oq.hazard_calculation_id: parent = util.read(oq.hazard_calculation_id) self.check_precalc(parent['oqparam'].calculation_mode) self.datastore.parent = parent # copy missing parameters from the parent if 'concurrent_tasks' not in vars(self.oqparam): self.oqparam.concurrent_tasks = ( self.oqparam.__class__.concurrent_tasks.default) params = { name: value for name, value in vars(parent['oqparam']).items() if name not in vars(self.oqparam) } self.save_params(**params) self.read_inputs() oqp = parent['oqparam'] if oqp.investigation_time != oq.investigation_time: raise ValueError( 'The parent calculation was using investigation_time=%s' ' != %s' % (oqp.investigation_time, oq.investigation_time)) if not consistent(oqp.minimum_intensity, oq.minimum_intensity): raise ValueError( 'The parent calculation was using minimum_intensity=%s' ' != %s' % (oqp.minimum_intensity, oq.minimum_intensity)) hstats, rstats = list(oqp.hazard_stats()), list(oq.hazard_stats()) if hstats != rstats: raise ValueError('The parent calculation had stats %s != %s' % (hstats, rstats)) missing_imts = set(oq.risk_imtls) - set(oqp.imtls) if missing_imts: raise ValueError( 'The parent calculation is missing the IMT(s) %s' % ', '.join(missing_imts)) self.save_crmodel() elif self.__class__.precalc: calc = calculators[self.__class__.precalc](self.oqparam, self.datastore.calc_id) calc.run(remove=False) for name in ('csm param sitecol assetcol crmodel realizations ' 'policy_name policy_dict full_lt').split(): if hasattr(calc, name): setattr(self, name, getattr(calc, name)) else: self.read_inputs() self.save_crmodel() def init(self): """ To be overridden to initialize the datasets needed by the calculation """ oq = self.oqparam if not oq.risk_imtls: if self.datastore.parent: oq.risk_imtls = (self.datastore.parent['oqparam'].risk_imtls) if 'precalc' in vars(self): self.realizations = self.precalc.realizations elif 'full_lt' in self.datastore: full_lt = self.datastore['full_lt'] self.realizations = full_lt.get_realizations() if oq.hazard_calculation_id and 'gsim_logic_tree' in oq.inputs: # redefine the realizations by reading the weights from the # gsim_logic_tree_file that could be different from the parent full_lt.gsim_lt = logictree.GsimLogicTree( oq.inputs['gsim_logic_tree'], set(full_lt.trts)) elif hasattr(self, 'csm'): self.check_floating_spinning() self.realizations = self.csm.full_lt.get_realizations() else: # build a fake; used by risk-from-file calculators self.datastore['full_lt'] = fake = logictree.FullLogicTree.fake() self.realizations = fake.get_realizations() @general.cached_property def R(self): """ :returns: the number of realizations """ try: return self.csm.full_lt.get_num_rlzs() except AttributeError: # no self.csm return self.datastore['full_lt'].get_num_rlzs() def read_exposure(self, haz_sitecol): # after load_risk_model """ Read the exposure, the risk models and update the attributes .sitecol, .assetcol """ oq = self.oqparam with self.monitor('reading exposure'): self.sitecol, self.assetcol, discarded = ( readinput.get_sitecol_assetcol(oq, haz_sitecol, self.crmodel.loss_types)) if len(discarded): self.datastore['discarded'] = discarded if 'scenario' in oq.calculation_mode: # this is normal for the case of scenario from rupture logging.info('%d assets were discarded because too far ' 'from the rupture; use `oq show discarded` ' 'to show them and `oq plot_assets` to plot ' 'them' % len(discarded)) elif not oq.discard_assets: # raise an error self.datastore['sitecol'] = self.sitecol self.datastore['assetcol'] = self.assetcol raise RuntimeError( '%d assets were discarded; use `oq show discarded` to' ' show them and `oq plot_assets` to plot them' % len(discarded)) self.policy_name = '' self.policy_dict = {} if oq.inputs.get('insurance'): k, v = zip(*oq.inputs['insurance'].items()) self.load_insurance_data(k, v) return readinput.exposure def load_insurance_data(self, ins_types, ins_files): """ Read the insurance files and populate the policy_dict """ for loss_type, fname in zip(ins_types, ins_files): array = hdf5.read_csv(fname, { 'insurance_limit': float, 'deductible': float, None: object }).array policy_name = array.dtype.names[0] policy_idx = getattr(self.assetcol.tagcol, policy_name + '_idx') insurance = numpy.zeros((len(policy_idx), 2)) for pol, ded, lim in array[[ policy_name, 'deductible', 'insurance_limit' ]]: insurance[policy_idx[pol]] = ded, lim self.policy_dict[loss_type] = insurance if self.policy_name and policy_name != self.policy_name: raise ValueError( 'The file %s contains %s as policy field, but we were ' 'expecting %s' % (fname, policy_name, self.policy_name)) else: self.policy_name = policy_name def load_crmodel(self): # to be called before read_exposure # NB: this is called even if there is no risk model """ Read the risk models and set the attribute .crmodel. The crmodel can be empty for hazard calculations. Save the loss ratios (if any) in the datastore. """ logging.info('Reading the risk model if present') self.crmodel = readinput.get_crmodel(self.oqparam) if not self.crmodel: parent = self.datastore.parent if 'risk_model' in parent: self.crmodel = riskmodels.CompositeRiskModel.read(parent) return if self.oqparam.ground_motion_fields and not self.oqparam.imtls: raise InvalidFile('No intensity_measure_types specified in %s' % self.oqparam.inputs['job_ini']) self.save_params() # re-save oqparam def save_crmodel(self): """ Save the risk models in the datastore """ if len(self.crmodel): self.datastore['risk_model'] = rm = self.crmodel attrs = self.datastore.getitem('risk_model').attrs attrs['min_iml'] = hdf5.array_of_vstr(sorted(rm.min_iml.items())) def _read_risk_data(self): # read the exposure (if any), the risk model (if any) and then the # site collection, possibly extracted from the exposure. oq = self.oqparam self.load_crmodel() # must be called first if oq.hazard_calculation_id: with util.read(oq.hazard_calculation_id) as dstore: haz_sitecol = dstore['sitecol'].complete if ('amplification' in oq.inputs and 'ampcode' not in haz_sitecol.array.dtype.names): haz_sitecol.add_col('ampcode', site.ampcode_dt) else: haz_sitecol = readinput.get_site_collection(oq, self.datastore) if hasattr(self, 'rup'): # for scenario we reduce the site collection to the sites # within the maximum distance from the rupture haz_sitecol, _dctx = self.cmaker.filter(haz_sitecol, self.rup) haz_sitecol.make_complete() if 'site_model' in oq.inputs: self.datastore['site_model'] = readinput.get_site_model(oq) oq_hazard = (self.datastore.parent['oqparam'] if self.datastore.parent else None) if 'exposure' in oq.inputs: exposure = self.read_exposure(haz_sitecol) self.datastore['assetcol'] = self.assetcol self.datastore['cost_calculator'] = exposure.cost_calculator if hasattr(readinput.exposure, 'exposures'): self.datastore['assetcol/exposures'] = (numpy.array( exposure.exposures, hdf5.vstr)) elif 'assetcol' in self.datastore.parent: assetcol = self.datastore.parent['assetcol'] if oq.region: region = wkt.loads(oq.region) self.sitecol = haz_sitecol.within(region) if oq.shakemap_id or 'shakemap' in oq.inputs: self.sitecol, self.assetcol = self.read_shakemap( haz_sitecol, assetcol) self.datastore['assetcol'] = self.assetcol logging.info('Extracted %d/%d assets', len(self.assetcol), len(assetcol)) nsites = len(self.sitecol) if (oq.spatial_correlation != 'no' and nsites > MAXSITES): # hard-coded, heuristic raise ValueError(CORRELATION_MATRIX_TOO_LARGE % nsites) elif hasattr(self, 'sitecol') and general.not_equal( self.sitecol.sids, haz_sitecol.sids): self.assetcol = assetcol.reduce(self.sitecol) self.datastore['assetcol'] = self.assetcol logging.info('Extracted %d/%d assets', len(self.assetcol), len(assetcol)) else: self.assetcol = assetcol else: # no exposure self.sitecol = haz_sitecol if self.sitecol: logging.info('Read N=%d hazard sites and L=%d hazard levels', len(self.sitecol), len(oq.imtls.array)) if oq_hazard: parent = self.datastore.parent if 'assetcol' in parent: check_time_event(oq, parent['assetcol'].occupancy_periods) elif oq.job_type == 'risk' and 'exposure' not in oq.inputs: raise ValueError('Missing exposure both in hazard and risk!') if oq_hazard.time_event and oq_hazard.time_event != oq.time_event: raise ValueError( 'The risk configuration file has time_event=%s but the ' 'hazard was computed with time_event=%s' % (oq.time_event, oq_hazard.time_event)) if oq.job_type == 'risk': tmap_arr, tmap_lst = logictree.taxonomy_mapping( self.oqparam.inputs.get('taxonomy_mapping'), self.assetcol.tagcol.taxonomy) self.crmodel.tmap = tmap_lst if len(tmap_arr): self.datastore['taxonomy_mapping'] = tmap_arr taxonomies = set(taxo for items in self.crmodel.tmap for taxo, weight in items if taxo != '?') # check that we are covering all the taxonomies in the exposure missing = taxonomies - set(self.crmodel.taxonomies) if self.crmodel and missing: raise RuntimeError('The exposure contains the taxonomies %s ' 'which are not in the risk model' % missing) if len(self.crmodel.taxonomies) > len(taxonomies): logging.info('Reducing risk model from %d to %d taxonomies', len(self.crmodel.taxonomies), len(taxonomies)) self.crmodel = self.crmodel.reduce(taxonomies) self.crmodel.tmap = tmap_lst self.crmodel.vectorize_cons_model(self.assetcol.tagcol) if hasattr(self, 'sitecol') and self.sitecol: if 'site_model' in oq.inputs: assoc_dist = (oq.region_grid_spacing * 1.414 if oq.region_grid_spacing else 5 ) # Graeme's 5km sm = readinput.get_site_model(oq) self.sitecol.complete.assoc(sm, assoc_dist) self.datastore['sitecol'] = self.sitecol.complete # store amplification functions if any self.af = None if 'amplification' in oq.inputs: logging.info('Reading %s', oq.inputs['amplification']) df = readinput.get_amplification(oq) check_amplification(df, self.sitecol) self.amplifier = Amplifier(oq.imtls, df, oq.soil_intensities) self.amplifier.check(self.sitecol.vs30, oq.vs30_tolerance) if oq.amplification_method == 'kernel': # TODO: need to add additional checks on the main calculation # methodology since the kernel method is currently tested only # for classical PSHA self.af = AmplFunction.from_dframe(df) self.amplifier = None else: self.amplifier = None # used in the risk calculators self.param = dict(individual_curves=oq.individual_curves, collapse_level=oq.collapse_level, avg_losses=oq.avg_losses, amplifier=self.amplifier) # compute exposure stats if hasattr(self, 'assetcol'): save_exposed_values(self.datastore, self.assetcol, oq.loss_names, oq.aggregate_by) def store_rlz_info(self, eff_ruptures): """ Save info about the composite source model inside the full_lt dataset """ oq = self.oqparam if hasattr(self, 'full_lt'): # no scenario self.realizations = self.full_lt.get_realizations() if not self.realizations: raise RuntimeError('Empty logic tree: too much filtering?') self.datastore['full_lt'] = self.full_lt else: # scenario self.full_lt = self.datastore['full_lt'] R = self.R logging.info('There are %d realization(s)', R) if oq.imtls: self.datastore['weights'] = arr = build_weights( self.realizations, oq.imt_dt()) self.datastore.set_attrs('weights', nbytes=arr.nbytes) if ('event_based' in oq.calculation_mode and R >= TWO16 or R >= TWO32): raise ValueError( 'The logic tree has too many realizations (%d), use sampling ' 'instead' % R) elif R > 10000: logging.warning( 'The logic tree has %d realizations(!), please consider ' 'sampling it', R) # check for gsim logic tree reduction discard_trts = [] for trt in self.full_lt.gsim_lt.values: if eff_ruptures.get(trt, 0) == 0: discard_trts.append(trt) if (discard_trts and 'scenario' not in oq.calculation_mode and not oq.is_ucerf()): msg = ('No sources for some TRTs: you should set\n' 'discard_trts = %s\nin %s') % (', '.join(discard_trts), oq.inputs['job_ini']) logging.warning(msg) def store_source_info(self, calc_times): """ Save (eff_ruptures, num_sites, calc_time) inside the source_info """ for src_id, arr in calc_times.items(): src_id = re.sub(r':\d+$', '', src_id) row = self.csm.source_info[src_id] row[EFF_RUPTURES] += arr[0] row[NUM_SITES] += arr[1] row[CALC_TIME] += arr[2] rows = self.csm.source_info.values() recs = [tuple(row) for row in rows] hdf5.extend(self.datastore['source_info'], numpy.array(recs, readinput.source_info_dt)) def post_process(self): """For compatibility with the engine"""