def get_risk_functions(oqparam, kind='vulnerability fragility consequence ' 'vulnerability_retrofitted'): """ :param oqparam: an OqParam instance :param kind: a space-separated string with the kinds of risk models to read :returns: a list of risk functions """ kinds = kind.split() rmodels = AccumDict() for kind in kinds: for key in sorted(oqparam.inputs): mo = re.match('(occupants|%s)_%s$' % (COST_TYPE_REGEX, kind), key) if mo: loss_type = mo.group(1) # the cost_type in the key # can be occupants, structural, nonstructural, ... rmodel = nrml.to_python(oqparam.inputs[key]) if len(rmodel) == 0: raise InvalidFile('%s is empty!' % oqparam.inputs[key]) rmodels[loss_type, kind] = rmodel if rmodel.lossCategory is None: # NRML 0.4 continue cost_type = str(rmodel.lossCategory) rmodel_kind = rmodel.__class__.__name__ kind_ = kind.replace('_retrofitted', '') # strip retrofitted if not rmodel_kind.lower().startswith(kind_): raise ValueError( 'Error in the file "%s_file=%s": is ' 'of kind %s, expected %s' % ( key, oqparam.inputs[key], rmodel_kind, kind.capitalize() + 'Model')) if cost_type != loss_type: raise ValueError( 'Error in the file "%s_file=%s": lossCategory is of ' 'type "%s", expected "%s"' % (key, oqparam.inputs[key], rmodel.lossCategory, loss_type)) cl_risk = oqparam.calculation_mode in ('classical', 'classical_risk') rlist = RiskFuncList() rlist.limit_states = [] for (loss_type, kind), rm in sorted(rmodels.items()): if kind == 'fragility': for (imt, riskid), ffl in sorted(rm.items()): if not rlist.limit_states: rlist.limit_states.extend(rm.limitStates) # we are rejecting the case of loss types with different # limit states; this may change in the future assert rlist.limit_states == rm.limitStates, ( rlist.limit_states, rm.limitStates) ffl.loss_type = loss_type ffl.kind = kind rlist.append(ffl) elif kind == 'consequence': for riskid, cf in sorted(rm.items()): rf = hdf5.ArrayWrapper( cf, dict(id=riskid, loss_type=loss_type, kind=kind)) rlist.append(rf) else: # vulnerability, vulnerability_retrofitted # only for classical_risk reduce the loss_ratios # to make sure they are strictly increasing for (imt, riskid), rf in sorted(rm.items()): rf = rf.strictly_increasing() if cl_risk else rf rf.loss_type = loss_type rf.kind = kind rlist.append(rf) return rlist
def _read_scenario_ruptures(self): oq = self.oqparam gsim_lt = readinput.get_gsim_lt(self.oqparam) G = gsim_lt.get_num_paths() if oq.calculation_mode.startswith('scenario'): ngmfs = oq.number_of_ground_motion_fields if oq.inputs['rupture_model'].endswith('.xml'): # check the number of branchsets bsets = len(gsim_lt._ltnode) if bsets > 1: raise InvalidFile( '%s for a scenario calculation must contain a single ' 'branchset, found %d!' % (oq.inputs['job_ini'], bsets)) [(trt, rlzs_by_gsim)] = gsim_lt.get_rlzs_by_gsim_trt().items() self.cmaker = ContextMaker(trt, rlzs_by_gsim, oq) rup = readinput.get_rupture(oq) if self.N > oq.max_sites_disagg: # many sites, split rupture ebrs = [ EBRupture(copyobj(rup, rup_id=rup.rup_id + i), 'NA', 0, G, e0=i * G, scenario=True) for i in range(ngmfs) ] else: # keep a single rupture with a big occupation number ebrs = [ EBRupture(rup, 'NA', 0, G * ngmfs, rup.rup_id, scenario=True) ] srcfilter = SourceFilter(self.sitecol, oq.maximum_distance(trt)) aw = get_rup_array(ebrs, srcfilter) if len(aw) == 0: raise RuntimeError( 'The rupture is too far from the sites! Please check the ' 'maximum_distance and the position of the rupture') elif oq.inputs['rupture_model'].endswith('.csv'): aw = get_ruptures(oq.inputs['rupture_model']) if len(gsim_lt.values) == 1: # fix for scenario_damage/case_12 aw['trt_smr'] = 0 # a single TRT if oq.calculation_mode.startswith('scenario'): # rescale n_occ by ngmfs and nrlzs aw['n_occ'] *= ngmfs * gsim_lt.get_num_paths() else: raise InvalidFile("Something wrong in %s" % oq.inputs['job_ini']) rup_array = aw.array hdf5.extend(self.datastore['rupgeoms'], aw.geom) if len(rup_array) == 0: raise RuntimeError( 'There are no sites within the maximum_distance' ' of %s km from the rupture' % oq.maximum_distance(rup.tectonic_region_type)(rup.mag)) fake = logictree.FullLogicTree.fake(gsim_lt) self.realizations = fake.get_realizations() self.datastore['full_lt'] = fake self.store_rlz_info({}) # store weights self.save_params() imp = calc.RuptureImporter(self.datastore) imp.import_rups_events(rup_array, get_rupture_getters)
def get_sitecol_assetcol(oqparam, haz_sitecol=None, cost_types=()): """ :param oqparam: calculation parameters :param haz_sitecol: the hazard site collection :param cost_types: the expected cost types :returns: (site collection, asset collection) instances """ global exposure if exposure is None: # haz_sitecol not extracted from the exposure exposure = get_exposure(oqparam) if haz_sitecol is None: haz_sitecol = get_site_collection(oqparam) missing = set(cost_types) - set(exposure.cost_types['name']) - set( ['occupants']) # TODO: remove occupants and fragility special cases if missing and not oqparam.calculation_mode.endswith('damage'): expo = oqparam.inputs.get('exposure', '') raise InvalidFile( 'Expected cost types %s but the exposure %r contains %s' % ( cost_types, expo, exposure.cost_types['name'])) if oqparam.region_grid_spacing and not oqparam.region: # extract the hazard grid from the exposure poly = exposure.mesh.get_convex_hull() mesh = poly.dilate(oqparam.region_grid_spacing).discretize( oqparam.region_grid_spacing) if len(mesh) > len(haz_sitecol): raise LargeExposureGrid(mesh, haz_sitecol.mesh, oqparam.region_grid_spacing) haz_sitecol = get_site_collection(oqparam, mesh) # redefine haz_distance = oqparam.region_grid_spacing if haz_distance != oqparam.asset_hazard_distance: logging.info('Using asset_hazard_distance=%d km instead of %d km', haz_distance, oqparam.asset_hazard_distance) else: haz_distance = oqparam.asset_hazard_distance if haz_sitecol.mesh != exposure.mesh: # associate the assets to the hazard sites tot_assets = sum(len(assets) for assets in exposure.assets_by_site) mode = 'strict' if oqparam.region_grid_spacing else 'filter' sitecol, assets_by = geo.utils.assoc( exposure.assets_by_site, haz_sitecol, haz_distance, mode) assets_by_site = [[] for _ in sitecol.complete.sids] num_assets = 0 for sid, assets in zip(sitecol.sids, assets_by): assets_by_site[sid] = assets num_assets += len(assets) logging.info( 'Associated %d assets to %d sites', num_assets, len(sitecol)) if num_assets < tot_assets: logging.warn('Discarded %d assets outside the ' 'asset_hazard_distance of %d km', tot_assets - num_assets, haz_distance) else: # asset sites and hazard sites are the same sitecol = haz_sitecol assets_by_site = exposure.assets_by_site asset_refs = numpy.array( [exposure.asset_refs[asset.ordinal] for assets in assets_by_site for asset in assets]) assetcol = asset.AssetCollection( asset_refs, assets_by_site, exposure.tagcol, exposure.cost_calculator, oqparam.time_event, exposure.occupancy_periods) return sitecol, assetcol
def _read_risk_data(self): # read the risk model (if any), the exposure (if any) and then the # site collection, possibly extracted from the exposure. oq = self.oqparam self.load_crmodel() # must be called first if (not oq.imtls and 'shakemap' not in oq.inputs and oq.ground_motion_fields): raise InvalidFile('There are no intensity measure types in %s' % oq.inputs['job_ini']) if oq.hazard_calculation_id: with util.read(oq.hazard_calculation_id) as dstore: haz_sitecol = dstore['sitecol'].complete if ('amplification' in oq.inputs and 'ampcode' not in haz_sitecol.array.dtype.names): haz_sitecol.add_col('ampcode', site.ampcode_dt) else: haz_sitecol = readinput.get_site_collection(oq, self.datastore) if hasattr(self, 'rup'): # for scenario we reduce the site collection to the sites # within the maximum distance from the rupture haz_sitecol, _dctx = self.cmaker.filter(haz_sitecol, self.rup) haz_sitecol.make_complete() if 'site_model' in oq.inputs: self.datastore['site_model'] = readinput.get_site_model(oq) oq_hazard = (self.datastore.parent['oqparam'] if self.datastore.parent else None) if 'exposure' in oq.inputs: exposure = self.read_exposure(haz_sitecol) self.datastore['assetcol'] = self.assetcol self.datastore['cost_calculator'] = exposure.cost_calculator if hasattr(readinput.exposure, 'exposures'): self.datastore['assetcol/exposures'] = (numpy.array( exposure.exposures, hdf5.vstr)) elif 'assetcol' in self.datastore.parent: assetcol = self.datastore.parent['assetcol'] if oq.region: region = wkt.loads(oq.region) self.sitecol = haz_sitecol.within(region) if oq.shakemap_id or 'shakemap' in oq.inputs: self.sitecol, self.assetcol = self.read_shakemap( haz_sitecol, assetcol) self.datastore['sitecol'] = self.sitecol self.datastore['assetcol'] = self.assetcol logging.info('Extracted %d/%d assets', len(self.assetcol), len(assetcol)) nsites = len(self.sitecol) if (oq.spatial_correlation != 'no' and nsites > MAXSITES): # hard-coded, heuristic raise ValueError(CORRELATION_MATRIX_TOO_LARGE % nsites) elif hasattr(self, 'sitecol') and general.not_equal( self.sitecol.sids, haz_sitecol.sids): self.assetcol = assetcol.reduce(self.sitecol) self.datastore['assetcol'] = self.assetcol logging.info('Extracted %d/%d assets', len(self.assetcol), len(assetcol)) else: self.assetcol = assetcol else: # no exposure self.sitecol = haz_sitecol if self.sitecol and oq.imtls: logging.info('Read N=%d hazard sites and L=%d hazard levels', len(self.sitecol), oq.imtls.size) if oq_hazard: parent = self.datastore.parent if 'assetcol' in parent: check_time_event(oq, parent['assetcol'].occupancy_periods) elif oq.job_type == 'risk' and 'exposure' not in oq.inputs: raise ValueError('Missing exposure both in hazard and risk!') if oq_hazard.time_event and oq_hazard.time_event != oq.time_event: raise ValueError( 'The risk configuration file has time_event=%s but the ' 'hazard was computed with time_event=%s' % (oq.time_event, oq_hazard.time_event)) if oq.job_type == 'risk': tmap_arr, tmap_lst = logictree.taxonomy_mapping( self.oqparam.inputs.get('taxonomy_mapping'), self.assetcol.tagcol.taxonomy) self.crmodel.tmap = tmap_lst if len(tmap_arr): self.datastore['taxonomy_mapping'] = tmap_arr taxonomies = set(taxo for items in self.crmodel.tmap for taxo, weight in items if taxo != '?') # check that we are covering all the taxonomies in the exposure missing = taxonomies - set(self.crmodel.taxonomies) if self.crmodel and missing: raise RuntimeError('The exposure contains the taxonomies %s ' 'which are not in the risk model' % missing) if len(self.crmodel.taxonomies) > len(taxonomies): logging.info('Reducing risk model from %d to %d taxonomies', len(self.crmodel.taxonomies), len(taxonomies)) self.crmodel = self.crmodel.reduce(taxonomies) self.crmodel.tmap = tmap_lst self.crmodel.reduce_cons_model(self.assetcol.tagcol) if hasattr(self, 'sitecol') and self.sitecol: if 'site_model' in oq.inputs: assoc_dist = (oq.region_grid_spacing * 1.414 if oq.region_grid_spacing else 5 ) # Graeme's 5km sm = readinput.get_site_model(oq) self.sitecol.complete.assoc(sm, assoc_dist) self.datastore['sitecol'] = self.sitecol # store amplification functions if any self.af = None if 'amplification' in oq.inputs: logging.info('Reading %s', oq.inputs['amplification']) df = readinput.get_amplification(oq) check_amplification(df, self.sitecol) self.amplifier = Amplifier(oq.imtls, df, oq.soil_intensities) if oq.amplification_method == 'kernel': # TODO: need to add additional checks on the main calculation # methodology since the kernel method is currently tested only # for classical PSHA self.af = AmplFunction.from_dframe(df) self.amplifier = None else: self.amplifier = None # manage secondary perils sec_perils = oq.get_sec_perils() for sp in sec_perils: sp.prepare(self.sitecol) # add columns as needed mal = { lt: getdefault(oq.minimum_asset_loss, lt) for lt in oq.loss_names } if mal: logging.info('minimum_asset_loss=%s', mal) self.param = dict(individual_curves=oq.individual_curves, ps_grid_spacing=oq.ps_grid_spacing, collapse_level=oq.collapse_level, split_sources=oq.split_sources, avg_losses=oq.avg_losses, amplifier=self.amplifier, sec_perils=sec_perils, ses_seed=oq.ses_seed, minimum_asset_loss=mal) # compute exposure stats if hasattr(self, 'assetcol'): save_agg_values(self.datastore, self.assetcol, oq.loss_names, oq.aggregate_by)
def get_site_model(oqparam): """ Convert the NRML file into an array of site parameters. :param oqparam: an :class:`openquake.commonlib.oqvalidation.OqParam` instance :returns: an array with fields lon, lat, vs30, ... """ req_site_params = get_gsim_lt(oqparam).req_site_params if 'amplification' in oqparam.inputs: req_site_params.add('ampcode') arrays = [] dtypedic = { None: float, 'vs30measured': numpy.uint8, 'ampcode': site.ampcode_dt } for fname in oqparam.inputs['site_model']: if isinstance(fname, str) and fname.endswith('.csv'): sm = hdf5.read_csv(fname, dtypedic).array sm['lon'] = numpy.round(sm['lon'], 5) sm['lat'] = numpy.round(sm['lat'], 5) dupl = get_duplicates(sm, 'lon', 'lat') if dupl: raise InvalidFile('Found duplicate sites %s in %s' % (dupl, fname)) if 'site_id' in sm.dtype.names: raise InvalidFile('%s: you passed a sites.csv file instead of ' 'a site_model.csv file!' % fname) z = numpy.zeros(len(sm), sorted(sm.dtype.descr)) for name in z.dtype.names: # reorder the fields z[name] = sm[name] arrays.append(z) continue nodes = nrml.read(fname).siteModel params = [valid.site_param(node.attrib) for node in nodes] missing = req_site_params - set(params[0]) if 'vs30measured' in missing: # use a default of False missing -= {'vs30measured'} for param in params: param['vs30measured'] = False if 'backarc' in missing: # use a default of False missing -= {'backarc'} for param in params: param['backarc'] = False if 'ampcode' in missing: # use a default of b'' missing -= {'ampcode'} for param in params: param['ampcode'] = b'' if missing: raise InvalidFile( '%s: missing parameter %s' % (oqparam.inputs['site_model'], ', '.join(missing))) # NB: the sorted in sorted(params[0]) is essential, otherwise there is # an heisenbug in scenario/test_case_4 site_model_dt = numpy.dtype([(p, site.site_param_dt[p]) for p in sorted(params[0])]) sm = numpy.array([ tuple(param[name] for name in site_model_dt.names) for param in params ], site_model_dt) dupl = "\n".join('%s %s' % loc for loc, n in countby(sm, 'lon', 'lat').items() if n > 1) if dupl: raise InvalidFile('There are duplicated sites in %s:\n%s' % (fname, dupl)) arrays.append(sm) return numpy.concatenate(arrays)
def __init__(self, **names_vals): super(OqParam, self).__init__(**names_vals) job_ini = self.inputs['job_ini'] if 'calculation_mode' not in names_vals: raise InvalidFile('Missing calculation_mode in %s' % job_ini) self.risk_investigation_time = (self.risk_investigation_time or self.investigation_time) if ('intensity_measure_types_and_levels' in names_vals and 'intensity_measure_types' in names_vals): logging.warn('Ignoring intensity_measure_types since ' 'intensity_measure_types_and_levels is set') if 'iml_disagg' in names_vals: self.hazard_imtls = self.iml_disagg if 'intensity_measure_types_and_levels' in names_vals: raise InvalidFile( 'Please remove the intensity_measure_types_and_levels ' 'from %s: they will be inferred from the iml_disagg ' 'dictionary' % job_ini) elif 'intensity_measure_types_and_levels' in names_vals: self.hazard_imtls = self.intensity_measure_types_and_levels delattr(self, 'intensity_measure_types_and_levels') elif 'intensity_measure_types' in names_vals: self.hazard_imtls = dict.fromkeys(self.intensity_measure_types) delattr(self, 'intensity_measure_types') self._file_type, self._risk_files = get_risk_files(self.inputs) self.check_source_model() if self.hazard_precomputed(): self.check_missing('site_model', 'error') self.check_missing('gsim_logic_tree', 'error') self.check_missing('source_model_logic_tree', 'error') # check the gsim_logic_tree if self.inputs.get('gsim_logic_tree'): if self.gsim: raise InvalidFile('%s: if `gsim_logic_tree_file` is set, there' ' must be no `gsim` key' % job_ini) path = os.path.join(self.base_path, self.inputs['gsim_logic_tree']) gsim_lt = logictree.GsimLogicTree(path, ['*']) # check the number of branchsets branchsets = len(gsim_lt._ltnode) if 'scenario' in self.calculation_mode and branchsets > 1: raise InvalidFile( '%s: %s for a scenario calculation must contain a single ' 'branchset, found %d!' % (job_ini, path, branchsets)) # check the IMTs vs the GSIMs self._gsims_by_trt = gsim_lt.values for gsims in self._gsims_by_trt.values(): self.check_gsims(gsims) elif self.gsim is not None: self.check_gsims([self.gsim]) # checks for disaggregation if self.calculation_mode == 'disaggregation': if not self.poes_disagg and not self.iml_disagg: raise InvalidFile('poes_disagg or iml_disagg must be set ' 'in %(job_ini)s' % self.inputs) elif self.poes_disagg and self.iml_disagg: raise InvalidFile( '%s: iml_disagg and poes_disagg cannot be set ' 'at the same time' % job_ini) for k in ('mag_bin_width', 'distance_bin_width', 'coordinate_bin_width', 'num_epsilon_bins'): if k not in vars(self): raise InvalidFile('%s must be set in %s' % (k, job_ini)) # checks for classical_damage if self.calculation_mode == 'classical_damage': if self.conditional_loss_poes: raise InvalidFile('%s: conditional_loss_poes are not defined ' 'for classical_damage calculations' % job_ini) # checks for event_based_risk if (self.calculation_mode == 'event_based_risk' and self.asset_correlation not in (0, 1)): raise ValueError('asset_correlation != {0, 1} is no longer' ' supported') elif (self.calculation_mode == 'event_based_risk' and self.conditional_loss_poes and not self.asset_loss_table): raise InvalidFile( '%s: asset_loss_table is not set, probably you want to remove' ' conditional_loss_poes' % job_ini) # check for GMFs from file if (self.inputs.get('gmfs', '').endswith('.csv') and not self.sites and 'sites' not in self.inputs): raise InvalidFile('%s: You forgot sites|sites_csv' % job_ini) # checks for ucerf if 'ucerf' in self.calculation_mode: if self.ses_per_logic_tree_path >= TWO16: raise ValueError('ses_per_logic_tree_path too big: %d' % self.ses_per_logic_tree_path) if self.number_of_logic_tree_samples >= TWO16: raise ValueError('number_of_logic_tree_samples too big: %d' % self.number_of_logic_tree_samples)
def get_risk_models(oqparam, kind='vulnerability fragility consequence ' 'vulnerability_retrofitted'): """ :param oqparam: an OqParam instance :param kind: a space-separated string with the kinds of risk models to read :returns: a dictionary riskid -> loss_type, kind -> function """ kinds = kind.split() rmodels = AccumDict() for kind in kinds: for key in sorted(oqparam.inputs): mo = re.match('(occupants|%s)_%s$' % (COST_TYPE_REGEX, kind), key) if mo: loss_type = mo.group(1) # the cost_type in the key # can be occupants, structural, nonstructural, ... rmodel = nrml.to_python(oqparam.inputs[key]) if len(rmodel) == 0: raise InvalidFile('%s is empty!' % oqparam.inputs[key]) rmodels[loss_type, kind] = rmodel if rmodel.lossCategory is None: # NRML 0.4 continue cost_type = str(rmodel.lossCategory) rmodel_kind = rmodel.__class__.__name__ kind_ = kind.replace('_retrofitted', '') # strip retrofitted if not rmodel_kind.lower().startswith(kind_): raise ValueError('Error in the file "%s_file=%s": is ' 'of kind %s, expected %s' % (key, oqparam.inputs[key], rmodel_kind, kind.capitalize() + 'Model')) if cost_type != loss_type: raise ValueError( 'Error in the file "%s_file=%s": lossCategory is of ' 'type "%s", expected "%s"' % (key, oqparam.inputs[key], rmodel.lossCategory, loss_type)) rdict = AccumDict(accum={}) rdict.limit_states = [] for (loss_type, kind), rm in sorted(rmodels.items()): if kind == 'fragility': # build a copy of the FragilityModel with different IM levels newfm = rm.build(oqparam.continuous_fragility_discretization, oqparam.steps_per_interval) for (imt, riskid), ffl in sorted(newfm.items()): if not rdict.limit_states: rdict.limit_states.extend(rm.limitStates) # we are rejecting the case of loss types with different # limit states; this may change in the future assert rdict.limit_states == rm.limitStates, ( rdict.limit_states, rm.limitStates) rdict[riskid][loss_type, kind] = ffl # TODO: see if it is possible to remove the attribute # below, used in classical_damage ffl.steps_per_interval = oqparam.steps_per_interval elif kind == 'consequence': for riskid, cf in sorted(rm.items()): rdict[riskid][loss_type, kind] = cf else: # vulnerability, vulnerability_retrofitted cl_risk = oqparam.calculation_mode in ('classical', 'classical_risk') # only for classical_risk reduce the loss_ratios # to make sure they are strictly increasing for (imt, riskid), rf in sorted(rm.items()): rdict[riskid][loss_type, kind] = (rf.strictly_increasing() if cl_risk else rf) return rdict
def __init__(self, **names_vals): # support legacy names for name in list(names_vals): if name == 'quantile_hazard_curves': names_vals['quantiles'] = names_vals.pop(name) elif name == 'mean_hazard_curves': names_vals['mean'] = names_vals.pop(name) elif name == 'max': names_vals['max'] = names_vals.pop(name) super().__init__(**names_vals) job_ini = self.inputs['job_ini'] if 'calculation_mode' not in names_vals: raise InvalidFile('Missing calculation_mode in %s' % job_ini) if 'region_constraint' in names_vals: if 'region' in names_vals: raise InvalidFile('You cannot have both region and ' 'region_constraint in %s' % job_ini) logging.warning( 'region_constraint is obsolete, use region instead') self.region = valid.wkt_polygon( names_vals.pop('region_constraint')) self.risk_investigation_time = (self.risk_investigation_time or self.investigation_time) if ('intensity_measure_types_and_levels' in names_vals and 'intensity_measure_types' in names_vals): logging.warning('Ignoring intensity_measure_types since ' 'intensity_measure_types_and_levels is set') if 'iml_disagg' in names_vals: self.iml_disagg.pop('default') # normalize things like SA(0.10) -> SA(0.1) self.iml_disagg = { str(from_string(imt)): val for imt, val in self.iml_disagg.items() } self.hazard_imtls = self.iml_disagg if 'intensity_measure_types_and_levels' in names_vals: raise InvalidFile( 'Please remove the intensity_measure_types_and_levels ' 'from %s: they will be inferred from the iml_disagg ' 'dictionary' % job_ini) elif 'intensity_measure_types_and_levels' in names_vals: self.hazard_imtls = self.intensity_measure_types_and_levels delattr(self, 'intensity_measure_types_and_levels') lens = set(map(len, self.hazard_imtls.values())) if len(lens) > 1: dic = {imt: len(ls) for imt, ls in self.hazard_imtls.items()} warnings.warn( 'Each IMT must have the same number of levels, instead ' 'you have %s' % dic, DeprecationWarning) elif 'intensity_measure_types' in names_vals: self.hazard_imtls = dict.fromkeys(self.intensity_measure_types) delattr(self, 'intensity_measure_types') self._risk_files = get_risk_files(self.inputs) self.check_source_model() if self.hazard_precomputed() and self.job_type == 'risk': self.check_missing('site_model', 'debug') self.check_missing('gsim_logic_tree', 'debug') self.check_missing('source_model_logic_tree', 'debug') # check the gsim_logic_tree if self.inputs.get('gsim_logic_tree'): if self.gsim != '[FromFile]': raise InvalidFile('%s: if `gsim_logic_tree_file` is set, there' ' must be no `gsim` key' % job_ini) path = os.path.join(self.base_path, self.inputs['gsim_logic_tree']) gsim_lt = logictree.GsimLogicTree(path, ['*']) # check the number of branchsets branchsets = len(gsim_lt._ltnode) if 'scenario' in self.calculation_mode and branchsets > 1: raise InvalidFile( '%s: %s for a scenario calculation must contain a single ' 'branchset, found %d!' % (job_ini, path, branchsets)) # check the IMTs vs the GSIMs self._gsims_by_trt = gsim_lt.values for gsims in gsim_lt.values.values(): self.check_gsims(gsims) elif self.gsim is not None: self.check_gsims([valid.gsim(self.gsim, self.base_path)]) # check inputs unknown = set(self.inputs) - self.KNOWN_INPUTS if unknown: raise ValueError('Unknown key %s_file in %s' % (unknown.pop(), self.inputs['job_ini'])) # checks for disaggregation if self.calculation_mode == 'disaggregation': if not self.poes_disagg and not self.iml_disagg: raise InvalidFile('poes_disagg or iml_disagg must be set ' 'in %(job_ini)s' % self.inputs) elif self.poes_disagg and self.iml_disagg: raise InvalidFile( '%s: iml_disagg and poes_disagg cannot be set ' 'at the same time' % job_ini) for k in ('mag_bin_width', 'distance_bin_width', 'coordinate_bin_width', 'num_epsilon_bins'): if k not in vars(self): raise InvalidFile('%s must be set in %s' % (k, job_ini)) # checks for classical_damage if self.calculation_mode == 'classical_damage': if self.conditional_loss_poes: raise InvalidFile('%s: conditional_loss_poes are not defined ' 'for classical_damage calculations' % job_ini) # checks for event_based_risk if (self.calculation_mode == 'event_based_risk' and self.asset_correlation not in (0, 1)): raise ValueError('asset_correlation != {0, 1} is no longer' ' supported') # checks for ebrisk if self.calculation_mode == 'ebrisk': if self.risk_investigation_time is None: raise InvalidFile('Please set the risk_investigation_time in' ' %s' % job_ini) # check for GMFs from file if (self.inputs.get('gmfs', '').endswith('.csv') and 'sites' not in self.inputs and self.sites is None): raise InvalidFile('%s: You forgot sites|sites_csv' % job_ini) elif self.inputs.get('gmfs', '').endswith('.xml'): raise InvalidFile('%s: GMFs in XML are not supported anymore' % job_ini) # checks for event_based if 'event_based' in self.calculation_mode: if self.ses_per_logic_tree_path >= TWO32: raise ValueError('ses_per_logic_tree_path too big: %d' % self.ses_per_logic_tree_path) if self.number_of_logic_tree_samples >= TWO16: raise ValueError('number_of_logic_tree_samples too big: %d' % self.number_of_logic_tree_samples) # check grid + sites if self.region_grid_spacing and ('sites' in self.inputs or self.sites): raise ValueError('You are specifying grid and sites at the same ' 'time: which one do you want?') # check for amplification if ('amplification' in self.inputs and self.imtls and self.calculation_mode in ['classical', 'classical_risk', 'disaggregation']): check_same_levels(self.imtls)
def get_source_models(oqparam, gsim_lt, source_model_lt, in_memory=True): """ Build all the source models generated by the logic tree. :param oqparam: an :class:`openquake.commonlib.oqvalidation.OqParam` instance :param gsim_lt: a :class:`openquake.commonlib.logictree.GsimLogicTree` instance :param source_model_lt: a :class:`openquake.commonlib.logictree.SourceModelLogicTree` instance :param in_memory: if True, keep in memory the sources, else just collect the TRTs :returns: an iterator over :class:`openquake.commonlib.logictree.SourceModel` tuples """ converter = sourceconverter.SourceConverter( oqparam.investigation_time, oqparam.rupture_mesh_spacing, oqparam.complex_fault_mesh_spacing, oqparam.width_of_mfd_bin, oqparam.area_source_discretization) psr = nrml.SourceModelParser(converter) # consider only the effective realizations for sm in source_model_lt.gen_source_models(gsim_lt): src_groups = [] for name in sm.name.split(): fname = possibly_gunzip( os.path.abspath(os.path.join(oqparam.base_path, name))) if in_memory: apply_unc = source_model_lt.make_apply_uncertainties(sm.path) try: logging.info('Parsing %s', fname) src_groups.extend(psr.parse_src_groups(fname, apply_unc)) except ValueError as e: if str(e) in ('Surface does not conform with Aki & ' 'Richards convention', 'Edges points are not in the right order'): raise InvalidFile('''\ %s: %s. Probably you are using an obsolete model. In that case you can fix the file with the command python -m openquake.engine.tools.correct_complex_sources %s ''' % (fname, e, fname)) else: raise else: # just collect the TRT models smodel = nrml.read(fname).sourceModel if smodel[0].tag.endswith('sourceGroup'): # NRML 0.5 format for sg_node in smodel: sg = sourceconverter.SourceGroup( sg_node['tectonicRegion']) sg.sources = sg_node.nodes src_groups.append(sg) else: # NRML 0.4 format: smodel is a list of source nodes src_groups.extend( sourceconverter.SourceGroup.collect(smodel)) num_sources = sum(len(sg.sources) for sg in src_groups) sm.src_groups = src_groups trts = [mod.trt for mod in src_groups] source_model_lt.tectonic_region_types.update(trts) logging.info( 'Processed source model %d with %d potential gsim path(s) and %d ' 'sources', sm.ordinal + 1, sm.num_gsim_paths, num_sources) gsim_file = oqparam.inputs.get('gsim_logic_tree') if gsim_file: # check TRTs for src_group in src_groups: if src_group.trt not in gsim_lt.values: raise ValueError( "Found in %r a tectonic region type %r inconsistent " "with the ones in %r" % (sm, src_group.trt, gsim_file)) yield sm # log if some source file is being used more than once for fname, hits in psr.fname_hits.items(): if hits > 1: logging.info('%s has been considered %d times', fname, hits)
def get_input_files(oqparam, hazard=False): """ :param oqparam: an OqParam instance :param hazard: if True, consider only the hazard files :returns: input path names in a specific order """ fnames = set() # files entering in the checksum uri = oqparam.shakemap_uri if isinstance(uri, dict) and uri: if uri['kind'] == 'shapefile': fname = os.path.join(oqparam.base_path, uri['fname']) fnames.update(get_shapefiles(os.path.dirname(fname))) else: # xml local files for key, val in uri.items(): if key == 'fname' or (key.endswith('_url') and os.path.exists(val)): fname = os.path.join(oqparam.base_path, val) uri[key] = fname fnames.add(fname) for key in oqparam.inputs: fname = oqparam.inputs[key] if hazard and key not in ('source_model_logic_tree', 'gsim_logic_tree', 'source'): continue # collect .hdf5 tables for the GSIMs, if any elif key == 'gsim_logic_tree': gsim_lt = get_gsim_lt(oqparam) for gsims in gsim_lt.values.values(): for gsim in gsims: for k, v in gsim.kwargs.items(): if k.endswith(('_file', '_table')): fnames.add(v) fnames.add(fname) elif key == 'source_model': # UCERF f = oqparam.inputs['source_model'] fnames.add(f) fname = nrml.read(f).sourceModel.UCERFSource['filename'] fnames.add(os.path.join(os.path.dirname(f), fname)) elif key == 'exposure': # fname is a list for exp in asset.Exposure.read_headers(fname): fnames.update(exp.datafiles) fnames.update(fname) elif isinstance(fname, dict): fnames.update(fname.values()) elif isinstance(fname, list): for f in fname: if f == oqparam.input_dir: raise InvalidFile('%s there is an empty path in %s' % (oqparam.inputs['job_ini'], key)) fnames.update(fname) elif key == 'source_model_logic_tree': args = (fname, oqparam.random_seed, oqparam.number_of_logic_tree_samples, oqparam.sampling_method) try: smlt = smlt_cache[args] except KeyError: smlt = smlt_cache[args] = logictree.SourceModelLogicTree(*args) fnames.update(smlt.hdf5_files) fnames.update(smlt.info.smpaths) fnames.add(fname) else: fnames.add(fname) return sorted(fnames)
def pre_execute(self): """ Check if there is a previous calculation ID. If yes, read the inputs by retrieving the previous calculation; if not, read the inputs directly. """ oq = self.oqparam if 'gmfs' in oq.inputs or 'multi_peril' in oq.inputs: # read hazard from files assert not oq.hazard_calculation_id, ( 'You cannot use --hc together with gmfs_file') self.read_inputs() if 'gmfs' in oq.inputs: if not oq.inputs['gmfs'].endswith('.csv'): raise NotImplementedError('Importer for %s' % oq.inputs['gmfs']) E = len( import_gmfs(self.datastore, oq.inputs['gmfs'], self.sitecol.complete.sids)) if hasattr(oq, 'number_of_ground_motion_fields'): if oq.number_of_ground_motion_fields != E: raise RuntimeError( 'Expected %d ground motion fields, found %d' % (oq.number_of_ground_motion_fields, E)) else: # set the number of GMFs from the file oq.number_of_ground_motion_fields = E else: self.save_multi_peril() elif 'hazard_curves' in oq.inputs: # read hazard from file assert not oq.hazard_calculation_id, ( 'You cannot use --hc together with hazard_curves') haz_sitecol = readinput.get_site_collection(oq) # NB: horrible: get_site_collection calls get_pmap_from_nrml # that sets oq.investigation_time, so it must be called first self.load_riskmodel() # must be after get_site_collection self.read_exposure(haz_sitecol) # define .assets_by_site self.datastore['poes/grp-00'] = fix_ones(readinput.pmap) self.datastore['sitecol'] = self.sitecol self.datastore['assetcol'] = self.assetcol self.datastore['csm_info'] = fake = source.CompositionInfo.fake() self.rlzs_assoc = fake.get_rlzs_assoc() self.datastore['rlzs_by_grp'] = self.rlzs_assoc.by_grp() elif oq.hazard_calculation_id: parent = util.read(oq.hazard_calculation_id) self.check_precalc(parent['oqparam'].calculation_mode) self.datastore.parent = parent # copy missing parameters from the parent if 'concurrent_tasks' not in vars(self.oqparam): self.oqparam.concurrent_tasks = ( self.oqparam.__class__.concurrent_tasks.default) params = { name: value for name, value in vars(parent['oqparam']).items() if name not in vars(self.oqparam) } self.save_params(**params) self.read_inputs() oqp = parent['oqparam'] if oqp.investigation_time != oq.investigation_time: raise ValueError( 'The parent calculation was using investigation_time=%s' ' != %s' % (oqp.investigation_time, oq.investigation_time)) if oqp.minimum_intensity != oq.minimum_intensity: raise ValueError( 'The parent calculation was using minimum_intensity=%s' ' != %s' % (oqp.minimum_intensity, oq.minimum_intensity)) hstats, rstats = list(oqp.hazard_stats()), list(oq.hazard_stats()) if hstats != rstats: raise ValueError('The parent calculation had stats %s != %s' % (hstats, rstats)) missing_imts = set(oq.risk_imtls) - set(oqp.imtls) if missing_imts: raise ValueError( 'The parent calculation is missing the IMT(s) %s' % ', '.join(missing_imts)) elif self.__class__.precalc: calc = calculators[self.__class__.precalc](self.oqparam, self.datastore.calc_id) calc.run() self.param = calc.param self.sitecol = calc.sitecol if hasattr(calc, 'assetcol'): self.assetcol = calc.assetcol if hasattr(calc, 'riskmodel'): self.riskmodel = calc.riskmodel if hasattr(calc, 'rlzs_assoc'): self.rlzs_assoc = calc.rlzs_assoc else: # this happens for instance for a scenario_damage without # rupture, gmfs, multi_peril raise InvalidFile( '%(job_ini)s: missing gmfs_csv, multi_peril_csv' % oq.inputs) if hasattr(calc, 'csm'): # no scenario self.csm = calc.csm else: self.read_inputs() if self.riskmodel: self.save_riskmodel()
def _get_exposure(fname, stop=None): """ :param fname: path of the XML file containing the exposure :param stop: node at which to stop parsing (or None) :returns: a pair (Exposure instance, list of asset nodes) """ [exposure] = nrml.read(fname, stop=stop) if not exposure.tag.endswith('exposureModel'): raise InvalidFile('%s: expected exposureModel, got %s' % (fname, exposure.tag)) description = exposure.description try: conversions = exposure.conversions except AttributeError: conversions = Node('conversions', nodes=[Node('costTypes', [])]) try: inslimit = conversions.insuranceLimit except AttributeError: inslimit = Node('insuranceLimit', text=True) try: deductible = conversions.deductible except AttributeError: deductible = Node('deductible', text=True) try: area = conversions.area except AttributeError: # NB: the area type cannot be an empty string because when sending # around the CostCalculator object we would run into this numpy bug # about pickling dictionaries with empty strings: # https://github.com/numpy/numpy/pull/5475 area = Node('area', dict(type='?')) try: occupancy_periods = exposure.occupancyPeriods.text or '' except AttributeError: occupancy_periods = '' try: tagNames = exposure.tagNames except AttributeError: tagNames = Node('tagNames', text='') tagnames = ~tagNames or [] tagnames.insert(0, 'taxonomy') # read the cost types and make some check cost_types = [] retrofitted = False for ct in conversions.costTypes: with context(fname, ct): ctname = ct['name'] if ctname == 'structural' and 'retrofittedType' in ct.attrib: if ct['retrofittedType'] != ct['type']: raise ValueError( 'The retrofittedType %s is different from the type' '%s' % (ct['retrofittedType'], ct['type'])) if ct['retrofittedUnit'] != ct['unit']: raise ValueError( 'The retrofittedUnit %s is different from the unit' '%s' % (ct['retrofittedUnit'], ct['unit'])) retrofitted = True cost_types.append( (ctname, valid.cost_type_type(ct['type']), ct['unit'])) if 'occupants' in cost_types: cost_types.append(('occupants', 'per_area', 'people')) cost_types.sort(key=operator.itemgetter(0)) cost_types = numpy.array(cost_types, cost_type_dt) insurance_limit_is_absolute = il = inslimit.get('isAbsolute') deductible_is_absolute = de = deductible.get('isAbsolute') cc = CostCalculator( {}, {}, {}, True if de is None else de, True if il is None else il, {name: i for i, name in enumerate(tagnames)}, ) for ct in cost_types: name = ct['name'] # structural, nonstructural, ... cc.cost_types[name] = ct['type'] # aggregated, per_asset, per_area cc.area_types[name] = area['type'] cc.units[name] = ct['unit'] assets = [] asset_refs = [] exp = Exposure(exposure['id'], exposure['category'], description.text, cost_types, occupancy_periods, insurance_limit_is_absolute, deductible_is_absolute, retrofitted, area.attrib, assets, asset_refs, cc, TagCollection(tagnames)) return exp, exposure.assets
def info(calculators, gsims, views, exports, extracts, parameters, report, input_file=''): """ Give information. You can pass the name of an available calculator, a job.ini file, or a zip archive with the input files. """ if calculators: for calc in sorted(base.calculators): print(calc) if gsims: for gs in gsim.get_available_gsims(): print(gs) if views: for name in sorted(view): print(name) if exports: dic = groupby(export, operator.itemgetter(0), lambda group: [r[1] for r in group]) n = 0 for exporter, formats in dic.items(): print(exporter, formats) n += len(formats) print('There are %d exporters defined.' % n) if extracts: for key in extract: func = extract[key] if hasattr(func, '__wrapped__'): fm = FunctionMaker(func.__wrapped__) else: fm = FunctionMaker(func) print('%s(%s)%s' % (fm.name, fm.signature, fm.doc)) if parameters: params = [] for val in vars(OqParam).values(): if hasattr(val, 'name'): params.append(val) params.sort(key=lambda x: x.name) for param in params: print(param.name) if os.path.isdir(input_file) and report: with Monitor('info', measuremem=True) as mon: with mock.patch.object(logging.root, 'info'): # reduce logging do_build_reports(input_file) print(mon) elif input_file.endswith('.xml'): node = nrml.read(input_file) if node[0].tag.endswith('sourceModel'): if node['xmlns'].endswith('nrml/0.4'): raise InvalidFile( '%s is in NRML 0.4 format, please run the following ' 'command:\noq upgrade_nrml %s' % ( input_file, os.path.dirname(input_file) or '.')) print(source_model_info([node[0]])) elif node[0].tag.endswith('logicTree'): nodes = [nrml.read(sm_path)[0] for sm_path in logictree.collect_info(input_file).smpaths] print(source_model_info(nodes)) else: print(node.to_str()) elif input_file.endswith(('.ini', '.zip')): with Monitor('info', measuremem=True) as mon: if report: print('Generated', reportwriter.build_report(input_file)) else: print_csm_info(input_file) if mon.duration > 1: print(mon) elif input_file: print("No info for '%s'" % input_file)
def execute(self): oq = self.oqparam self.set_param() self.offset = 0 srcfilter = self.src_filter(self.datastore.tempname) self.indices = AccumDict(accum=[]) # sid, idx -> indices if oq.hazard_calculation_id: # from ruptures self.datastore.parent = util.read(oq.hazard_calculation_id) self.init_logic_tree(self.datastore.parent['csm_info']) else: # from sources self.build_events_from_sources(srcfilter) if (oq.ground_motion_fields is False and oq.hazard_curves_from_gmfs is False): return {} if not oq.imtls: raise InvalidFile('There are no intensity measure types in %s' % oq.inputs['job_ini']) N = len(self.sitecol.complete) if oq.ground_motion_fields: nrups = len(self.datastore['ruptures']) self.datastore.create_dset('gmf_data/data', oq.gmf_data_dt()) self.datastore.create_dset('gmf_data/sigma_epsilon', sig_eps_dt(oq.imtls)) self.datastore.create_dset('gmf_data/indices', hdf5.vuint32, shape=(N, 2), fillvalue=None) self.datastore.create_dset('gmf_data/events_by_sid', U32, (N, )) self.datastore.create_dset('gmf_data/time_by_rup', time_dt, (nrups, ), fillvalue=None) if oq.hazard_curves_from_gmfs: self.param['rlz_by_event'] = self.datastore['events']['rlz_id'] # compute_gmfs in parallel self.datastore.swmr_on() logging.info('Reading %d ruptures', len(self.datastore['ruptures'])) iterargs = ((rgetter, srcfilter, self.param) for rgetter in gen_rupture_getters(self.datastore, srcfilter=srcfilter)) acc = parallel.Starmap(self.core_task.__func__, iterargs, h5=self.datastore.hdf5, num_cores=oq.num_cores).reduce( self.agg_dicts, self.acc0()) if self.indices: dset = self.datastore['gmf_data/indices'] num_evs = self.datastore['gmf_data/events_by_sid'] logging.info('Saving gmf_data/indices') with self.monitor('saving gmf_data/indices', measuremem=True): self.datastore['gmf_data/imts'] = ' '.join(oq.imtls) for sid in self.sitecol.complete.sids: start = numpy.array(self.indices[sid, 0]) stop = numpy.array(self.indices[sid, 1]) dset[sid, 0] = start dset[sid, 1] = stop num_evs[sid] = (stop - start).sum() avg_events_by_sid = num_evs[()].sum() / N logging.info('Found ~%d GMVs per site', avg_events_by_sid) elif oq.ground_motion_fields: raise RuntimeError('No GMFs were generated, perhaps they were ' 'all below the minimum_intensity threshold') return acc
def get_gmfs(oqparam): """ :param oqparam: an :class:`openquake.commonlib.oqvalidation.OqParam` instance :returns: sitecol, eids, gmf array of shape (R, N, E, M) """ M = len(oqparam.imtls) fname = oqparam.inputs['gmfs'] if fname.endswith('.csv'): array = writers.read_composite_array(fname).array R = len(numpy.unique(array['rlzi'])) if R > 1: raise InvalidFile('%s: found %d realizations, currently only one ' 'realization is supported' % (fname, R)) # the array has the structure rlzi, sid, eid, gmv_PGA, gmv_... dtlist = [(name, array.dtype[name]) for name in array.dtype.names[:3]] required_imts = list(oqparam.imtls) imts = [name[4:] for name in array.dtype.names[3:]] if imts != required_imts: raise ValueError('Required %s, but %s contains %s' % ( required_imts, fname, imts)) dtlist.append(('gmv', (F32, M))) eids = numpy.unique(array['eid']) E = len(eids) found_eids = set(eids) expected_eids = set(range(E)) # expected incremental eids missing_eids = expected_eids - found_eids if missing_eids: raise InvalidFile('Missing eids in the gmfs.csv file: %s' % missing_eids) assert expected_eids == found_eids, (expected_eids, found_eids) eidx = {eid: e for e, eid in enumerate(eids)} sitecol = get_site_collection(oqparam) expected_sids = set(sitecol.sids) found_sids = set(numpy.unique(array['sid'])) missing_sids = found_sids - expected_sids if missing_sids: raise InvalidFile( 'Found site IDs missing in the sites.csv file: %s' % missing_sids) N = len(sitecol) gmfs = numpy.zeros((R, N, E, M), F32) counter = collections.Counter() for row in array.view(dtlist): key = row['rlzi'], row['sid'], eidx[row['eid']] gmfs[key] = row['gmv'] counter[key] += 1 dupl = [key for key in counter if counter[key] > 1] if dupl: raise InvalidFile('Duplicated (rlzi, sid, eid) in the GMFs file: ' '%s' % dupl) elif fname.endswith('.xml'): eids, gmfs_by_imt = get_scenario_from_nrml(oqparam, fname) N, E = gmfs_by_imt.shape gmfs = numpy.zeros((1, N, E, M), F32) for imti, imtstr in enumerate(oqparam.imtls): gmfs[0, :, :, imti] = gmfs_by_imt[imtstr] else: raise NotImplemented('Reading from %s' % fname) return eids, gmfs
def __init__(self, **names_vals): for name in list(names_vals): if name == 'quantile_hazard_curves': names_vals['quantiles'] = names_vals.pop(name) super().__init__(**names_vals) job_ini = self.inputs['job_ini'] if 'calculation_mode' not in names_vals: raise InvalidFile('Missing calculation_mode in %s' % job_ini) if 'region_constraint' in names_vals: if 'region' in names_vals: raise InvalidFile('You cannot have both region and ' 'region_constraint in %s' % job_ini) logging.warning( 'region_constraint is obsolete, use region instead') self.region = valid.wkt_polygon( names_vals.pop('region_constraint')) self.risk_investigation_time = (self.risk_investigation_time or self.investigation_time) if ('intensity_measure_types_and_levels' in names_vals and 'intensity_measure_types' in names_vals): logging.warning('Ignoring intensity_measure_types since ' 'intensity_measure_types_and_levels is set') if 'iml_disagg' in names_vals: self.hazard_imtls = self.iml_disagg if 'intensity_measure_types_and_levels' in names_vals: raise InvalidFile( 'Please remove the intensity_measure_types_and_levels ' 'from %s: they will be inferred from the iml_disagg ' 'dictionary' % job_ini) elif 'intensity_measure_types_and_levels' in names_vals: self.hazard_imtls = self.intensity_measure_types_and_levels delattr(self, 'intensity_measure_types_and_levels') elif 'intensity_measure_types' in names_vals: self.hazard_imtls = dict.fromkeys(self.intensity_measure_types) delattr(self, 'intensity_measure_types') self._file_type, self._risk_files = get_risk_files(self.inputs) self.check_source_model() if (self.hazard_calculation_id and self.calculation_mode == 'ucerf_risk'): raise ValueError('You cannot use the --hc option with ucerf_risk') if self.hazard_precomputed() and self.job_type == 'risk': self.check_missing('site_model', 'debug') self.check_missing('gsim_logic_tree', 'debug') self.check_missing('source_model_logic_tree', 'debug') # check the gsim_logic_tree if self.inputs.get('gsim_logic_tree'): if self.gsim != '[FromFile]': raise InvalidFile('%s: if `gsim_logic_tree_file` is set, there' ' must be no `gsim` key' % job_ini) path = os.path.join(self.base_path, self.inputs['gsim_logic_tree']) gsim_lt = logictree.GsimLogicTree(path, ['*']) # check the number of branchsets branchsets = len(gsim_lt._ltnode) if 'scenario' in self.calculation_mode and branchsets > 1: raise InvalidFile( '%s: %s for a scenario calculation must contain a single ' 'branchset, found %d!' % (job_ini, path, branchsets)) # check the IMTs vs the GSIMs self._gsims_by_trt = gsim_lt.values for gsims in self._gsims_by_trt.values(): self.check_gsims(gsims) elif self.gsim is not None: self.check_gsims([valid.gsim(self.gsim)]) # checks for disaggregation if self.calculation_mode == 'disaggregation': if not self.poes_disagg and not self.iml_disagg: raise InvalidFile('poes_disagg or iml_disagg must be set ' 'in %(job_ini)s' % self.inputs) elif self.poes_disagg and self.iml_disagg: raise InvalidFile( '%s: iml_disagg and poes_disagg cannot be set ' 'at the same time' % job_ini) for k in ('mag_bin_width', 'distance_bin_width', 'coordinate_bin_width', 'num_epsilon_bins'): if k not in vars(self): raise InvalidFile('%s must be set in %s' % (k, job_ini)) # checks for classical_damage if self.calculation_mode == 'classical_damage': if self.conditional_loss_poes: raise InvalidFile('%s: conditional_loss_poes are not defined ' 'for classical_damage calculations' % job_ini) # checks for event_based_risk if (self.calculation_mode == 'event_based_risk' and self.asset_correlation not in (0, 1)): raise ValueError('asset_correlation != {0, 1} is no longer' ' supported') # checks for ebrisk if self.calculation_mode == 'ebrisk': if self.insured_losses: raise ValueError('ebrisk does not support insured losses') elif self.number_of_logic_tree_samples == 0: logging.warning('ebrisk is not meant for full enumeration') # check for GMFs from file if (self.inputs.get('gmfs', '').endswith('.csv') and not self.sites and 'sites' not in self.inputs): raise InvalidFile('%s: You forgot sites|sites_csv' % job_ini) elif (self.inputs.get('gmfs', '').endswith('.xml') and 'sites' in self.inputs): raise InvalidFile('%s: You cannot have both sites_csv and ' 'gmfs_file' % job_ini) # checks for event_based if 'event_based' in self.calculation_mode: if self.ses_per_logic_tree_path >= TWO32: raise ValueError('ses_per_logic_tree_path too big: %d' % self.ses_per_logic_tree_path) if self.number_of_logic_tree_samples >= TWO16: raise ValueError('number_of_logic_tree_samples too big: %d' % self.number_of_logic_tree_samples) # check grid + sites if (self.region_grid_spacing and 'site_model' in self.inputs and 'exposure' in self.inputs): logging.warning( 'You are specifying a grid, a site model and an exposure at ' 'the same time: consider using `oq prepare_site_model`')
def ffconvert(fname, limit_states, ff, min_iml=1E-10): """ Convert a fragility function into a numpy array plus a bunch of attributes. :param fname: path to the fragility model file :param limit_states: expected limit states :param ff: fragility function node :returns: a pair (array, dictionary) """ with context(fname, ff): ffs = ff[1:] imls = ff.imls nodamage = imls.attrib.get('noDamageLimit') if nodamage == 0: # use a cutoff to avoid log(0) in GMPE.to_distribution_values logging.warning('Found a noDamageLimit=0 in %s, line %s, ' 'using %g instead', fname, ff.lineno, min_iml) nodamage = min_iml with context(fname, imls): attrs = dict(format=ff['format'], imt=imls['imt'], id=ff['id'], nodamage=nodamage) LS = len(limit_states) if LS != len(ffs): with context(fname, ff): raise InvalidFile('expected %d limit states, found %d' % (LS, len(ffs))) if ff['format'] == 'continuous': minIML = float(imls['minIML']) if minIML == 0: # use a cutoff to avoid log(0) in GMPE.to_distribution_values logging.warning('Found minIML=0 in %s, line %s, using %g instead', fname, imls.lineno, min_iml) minIML = min_iml attrs['minIML'] = minIML attrs['maxIML'] = float(imls['maxIML']) array = numpy.zeros(LS, [('mean', F64), ('stddev', F64)]) for i, ls, node in zip(range(LS), limit_states, ff[1:]): if ls != node['ls']: with context(fname, node): raise InvalidFile('expected %s, found' % (ls, node['ls'])) array['mean'][i] = node['mean'] array['stddev'][i] = node['stddev'] elif ff['format'] == 'discrete': attrs['imls'] = ~imls valid.check_levels(attrs['imls'], attrs['imt'], min_iml) num_poes = len(attrs['imls']) array = numpy.zeros((LS, num_poes)) for i, ls, node in zip(range(LS), limit_states, ff[1:]): with context(fname, node): if ls != node['ls']: raise InvalidFile('expected %s, found' % (ls, node['ls'])) poes = (~node if isinstance(~node, list) else valid.probabilities(~node)) if len(poes) != num_poes: raise InvalidFile('expected %s, found' % (num_poes, len(poes))) array[i, :] = poes # NB: the format is constrained in nrml.FragilityNode to be either # discrete or continuous, there is no third option return array, attrs
def get_mesh(oqparam): """ Extract the mesh of points to compute from the sites, the sites_csv, the region, the site model, the exposure in this order. :param oqparam: an :class:`openquake.commonlib.oqvalidation.OqParam` instance """ global pmap, exposure, gmfs, eids if 'exposure' in oqparam.inputs and exposure is None: # read it only once exposure = get_exposure(oqparam) if oqparam.sites: return geo.Mesh.from_coords(oqparam.sites) elif 'sites' in oqparam.inputs: fname = oqparam.inputs['sites'] header = get_csv_header(fname) if 'lon' in header: data = [] for i, row in enumerate( csv.DictReader(open(fname, encoding='utf-8-sig'))): if header[0] == 'site_id' and row['site_id'] != str(i): raise InvalidFile('%s: expected site_id=%d, got %s' % (fname, i, row['site_id'])) data.append(' '.join([row['lon'], row['lat']])) elif 'gmfs' in oqparam.inputs: raise InvalidFile('Missing header in %(sites)s' % oqparam.inputs) else: data = [ line.replace(',', ' ') for line in open(fname, encoding='utf-8-sig') ] coords = valid.coordinates(','.join(data)) start, stop = oqparam.sites_slice c = (coords[start:stop] if header[0] == 'site_id' else sorted(coords[start:stop])) # NB: Notice the sort=False below # Calculations starting from ground motion fields input by the user # require at least two input files related to the gmf data: # 1. A sites.csv file, listing {site_id, lon, lat} tuples # 2. A gmfs.csv file, listing {event_id, site_id, gmv[IMT1], # gmv[IMT2], ...} tuples # The site coordinates defined in the sites file do not need to be in # sorted order. # We must only ensure uniqueness of the provided site_ids and # coordinates. # When creating the site mesh from the site coordinates read from # the csv file, the sort=False flag maintains the user-specified # site_ids instead of reassigning them after sorting. return geo.Mesh.from_coords(c, sort=False) elif 'hazard_curves' in oqparam.inputs: fname = oqparam.inputs['hazard_curves'] if isinstance(fname, list): # for csv mesh, pmap = get_pmap_from_csv(oqparam, fname) else: raise NotImplementedError('Reading from %s' % fname) return mesh elif oqparam.region_grid_spacing: if oqparam.region: poly = geo.Polygon.from_wkt(oqparam.region) elif exposure: # in case of implicit grid the exposure takes precedence over # the site model poly = exposure.mesh.get_convex_hull() elif 'site_model' in oqparam.inputs: # this happens in event_based/case_19, where there is an implicit # grid over the site model sm = get_site_model(oqparam) poly = geo.Mesh(sm['lon'], sm['lat']).get_convex_hull() else: raise InvalidFile('There is a grid spacing but not a region, ' 'nor a site model, nor an exposure in %s' % oqparam.inputs['job_ini']) try: logging.info('Inferring the hazard grid from the exposure') mesh = poly.dilate(oqparam.region_grid_spacing).discretize( oqparam.region_grid_spacing) return geo.Mesh.from_coords(zip(mesh.lons, mesh.lats)) except Exception: raise ValueError('Could not discretize region with grid spacing ' '%(region_grid_spacing)s' % vars(oqparam)) # the site model has the precedence over the exposure, see the # discussion in https://github.com/gem/oq-engine/pull/5217 elif 'site_model' in oqparam.inputs: logging.info('Extracting the hazard sites from the site model') sm = get_site_model(oqparam) return geo.Mesh(sm['lon'], sm['lat']) elif 'exposure' in oqparam.inputs: return exposure.mesh
def get_vulnerability_functions_05(node, fname): """ :param node: a vulnerabilityModel node :param fname: path of the vulnerability filter :returns: a dictionary imt, vf_id -> vulnerability function """ # NB: the IMTs can be duplicated and with different levels, each # vulnerability function in a set will get its own levels vf_ids = set() vmodel = scientific.VulnerabilityModel(**node.attrib) # imt, vf_id -> vulnerability function for vfun in node.getnodes('vulnerabilityFunction'): with context(fname, vfun): imt = vfun.imls['imt'] imls = numpy.array(~vfun.imls) vf_id = vfun['id'] if vf_id in vf_ids: raise InvalidFile( 'Duplicated vulnerabilityFunctionID: %s: %s, line %d' % (vf_id, fname, vfun.lineno)) vf_ids.add(vf_id) num_probs = None if vfun['dist'] == 'PM': loss_ratios, probs = [], [] for probabilities in vfun[1:]: loss_ratios.append(probabilities['lr']) probs.append(valid.probabilities(~probabilities)) if num_probs is None: num_probs = len(probs[-1]) elif len(probs[-1]) != num_probs: raise ValueError( 'Wrong number of probabilities (expected %d, ' 'got %d) in %s, line %d' % (num_probs, len(probs[-1]), fname, probabilities.lineno)) all_probs = numpy.array(probs) assert all_probs.shape == (len(loss_ratios), len(imls)), ( len(loss_ratios), len(imls)) vmodel[imt, vf_id] = ( scientific.VulnerabilityFunctionWithPMF( vf_id, imt, imls, numpy.array(loss_ratios), all_probs)) # the seed will be set by readinput.get_crmodel else: with context(fname, vfun): loss_ratios = ~vfun.meanLRs coefficients = ~vfun.covLRs if len(loss_ratios) != len(imls): raise InvalidFile( 'There are %d loss ratios, but %d imls: %s, line %d' % (len(loss_ratios), len(imls), fname, vfun.meanLRs.lineno)) if len(coefficients) != len(imls): raise InvalidFile( 'There are %d coefficients, but %d imls: %s, ' 'line %d' % (len(coefficients), len(imls), fname, vfun.covLRs.lineno)) with context(fname, vfun): vmodel[imt, vf_id] = scientific.VulnerabilityFunction( vf_id, imt, imls, loss_ratios, coefficients, vfun['dist']) return vmodel
def _get_exposure(fname, stop=None): """ :param fname: path of the XML file containing the exposure :param stop: node at which to stop parsing (or None) :returns: a pair (Exposure instance, list of asset nodes) """ [exposure] = nrml.read(fname, stop=stop) if not exposure.tag.endswith('exposureModel'): raise InvalidFile('%s: expected exposureModel, got %s' % (fname, exposure.tag)) description = exposure.description try: conversions = exposure.conversions except AttributeError: conversions = Node('conversions', nodes=[Node('costTypes', [])]) try: area = conversions.area except AttributeError: # NB: the area type cannot be an empty string because when sending # around the CostCalculator object we would run into this numpy bug # about pickling dictionaries with empty strings: # https://github.com/numpy/numpy/pull/5475 area = Node('area', dict(type='?')) try: occupancy_periods = exposure.occupancyPeriods.text or '' except AttributeError: occupancy_periods = '' try: tagNames = exposure.tagNames except AttributeError: tagNames = Node('tagNames', text='') tagnames = ['id'] + (~tagNames or []) if set(tagnames) & {'taxonomy', 'exposure', 'country'}: raise InvalidFile('taxonomy, exposure and country are reserved names ' 'you cannot use it in <tagNames>: %s' % fname) tagnames.insert(0, 'taxonomy') # read the cost types and make some check cost_types = [] retrofitted = False for ct in conversions.costTypes: with context(fname, ct): ctname = ct['name'] if ctname == 'structural' and 'retrofittedType' in ct.attrib: if ct['retrofittedType'] != ct['type']: raise ValueError( 'The retrofittedType %s is different from the type' '%s' % (ct['retrofittedType'], ct['type'])) if ct['retrofittedUnit'] != ct['unit']: raise ValueError( 'The retrofittedUnit %s is different from the unit' '%s' % (ct['retrofittedUnit'], ct['unit'])) retrofitted = True cost_types.append( (ctname, valid.cost_type_type(ct['type']), ct['unit'])) if 'occupants' in cost_types: cost_types.append(('occupants', 'per_area', 'people')) cost_types.sort(key=operator.itemgetter(0)) cost_types = numpy.array(cost_types, cost_type_dt) cc = CostCalculator( {}, {}, {}, {name: i for i, name in enumerate(tagnames)}) for ct in cost_types: name = ct['name'] # structural, nonstructural, ... cc.cost_types[name] = ct['type'] # aggregated, per_asset, per_area cc.area_types[name] = area['type'] cc.units[name] = ct['unit'] exp = Exposure( exposure['id'], exposure['category'], description.text, cost_types, occupancy_periods, retrofitted, area.attrib, [], [], cc, TagCollection(tagnames)) assets_text = exposure.assets.text.strip() if assets_text: # the <assets> tag contains a list of file names dirname = os.path.dirname(fname) exp.datafiles = [os.path.join(dirname, f) for f in assets_text.split()] else: exp.datafiles = [] return exp, exposure.assets
def __init__(self, **names_vals): if '_job_id' in names_vals: # called from engine del names_vals['_job_id'] # support legacy names for name in list(names_vals): if name == 'quantile_hazard_curves': names_vals['quantiles'] = names_vals.pop(name) elif name == 'mean_hazard_curves': names_vals['mean'] = names_vals.pop(name) elif name == 'max': names_vals['max'] = names_vals.pop(name) super().__init__(**names_vals) if 'job_ini' not in self.inputs: self.inputs['job_ini'] = '<in-memory>' job_ini = self.inputs['job_ini'] if 'calculation_mode' not in names_vals: raise InvalidFile('Missing calculation_mode in %s' % job_ini) if 'region_constraint' in names_vals: if 'region' in names_vals: raise InvalidFile('You cannot have both region and ' 'region_constraint in %s' % job_ini) logging.warning( 'region_constraint is obsolete, use region instead') self.region = valid.wkt_polygon( names_vals.pop('region_constraint')) self.risk_investigation_time = (self.risk_investigation_time or self.investigation_time) self.collapse_level = int(self.collapse_level) if ('intensity_measure_types_and_levels' in names_vals and 'intensity_measure_types' in names_vals): logging.warning('Ignoring intensity_measure_types since ' 'intensity_measure_types_and_levels is set') if 'iml_disagg' in names_vals: self.iml_disagg.pop('default') # normalize things like SA(0.10) -> SA(0.1) self.iml_disagg = { str(from_string(imt)): val for imt, val in self.iml_disagg.items() } self.hazard_imtls = self.iml_disagg if 'intensity_measure_types_and_levels' in names_vals: raise InvalidFile( 'Please remove the intensity_measure_types_and_levels ' 'from %s: they will be inferred from the iml_disagg ' 'dictionary' % job_ini) elif 'intensity_measure_types_and_levels' in names_vals: self.hazard_imtls = self.intensity_measure_types_and_levels delattr(self, 'intensity_measure_types_and_levels') lens = set(map(len, self.hazard_imtls.values())) if len(lens) > 1: dic = {imt: len(ls) for imt, ls in self.hazard_imtls.items()} raise ValueError( 'Each IMT must have the same number of levels, instead ' 'you have %s' % dic) elif 'intensity_measure_types' in names_vals: self.hazard_imtls = dict.fromkeys(self.intensity_measure_types) if 'maximum_intensity' in names_vals: minint = self.minimum_intensity or {'default': 1E-2} for imt in self.hazard_imtls: i1 = calc.filters.getdefault(minint, imt) i2 = calc.filters.getdefault(self.maximum_intensity, imt) self.hazard_imtls[imt] = list(valid.logscale(i1, i2, 20)) delattr(self, 'intensity_measure_types') if ('ps_grid_spacing' in names_vals and 'pointsource_distance' not in names_vals): raise InvalidFile('%s: ps_grid_spacing requires setting a ' 'pointsource_distance!' % self.inputs['job_ini']) self._risk_files = get_risk_files(self.inputs) if self.hazard_precomputed() and self.job_type == 'risk': self.check_missing('site_model', 'debug') self.check_missing('gsim_logic_tree', 'debug') self.check_missing('source_model_logic_tree', 'debug') # check investigation_time if (self.investigation_time and self.calculation_mode.startswith('scenario')): raise ValueError('%s: there cannot be investigation_time in %s' % (self.inputs['job_ini'], self.calculation_mode)) # check the gsim_logic_tree if self.inputs.get('gsim_logic_tree'): if self.gsim != '[FromFile]': raise InvalidFile('%s: if `gsim_logic_tree_file` is set, there' ' must be no `gsim` key' % job_ini) path = os.path.join(self.base_path, self.inputs['gsim_logic_tree']) gsim_lt = logictree.GsimLogicTree(path, ['*']) # check the IMTs vs the GSIMs self._trts = set(gsim_lt.values) for gsims in gsim_lt.values.values(): self.check_gsims(gsims) elif self.gsim is not None: self.check_gsims([valid.gsim(self.gsim, self.base_path)]) # check inputs unknown = set(self.inputs) - self.KNOWN_INPUTS if unknown: raise ValueError('Unknown key %s_file in %s' % (unknown.pop(), self.inputs['job_ini'])) # checks for disaggregation if self.calculation_mode == 'disaggregation': if not self.poes_disagg and self.poes: self.poes_disagg = self.poes elif not self.poes and self.poes_disagg: self.poes = self.poes_disagg elif self.poes != self.poes_disagg: raise InvalidFile( 'poes_disagg != poes: %s!=%s in %s' % (self.poes_disagg, self.poes, self.inputs['job_ini'])) if not self.poes_disagg and not self.iml_disagg: raise InvalidFile('poes_disagg or iml_disagg must be set ' 'in %(job_ini)s' % self.inputs) elif self.poes_disagg and self.iml_disagg: raise InvalidFile( '%s: iml_disagg and poes_disagg cannot be set ' 'at the same time' % job_ini) for k in ('mag_bin_width', 'distance_bin_width', 'coordinate_bin_width', 'num_epsilon_bins'): if k not in vars(self): raise InvalidFile('%s must be set in %s' % (k, job_ini)) if self.disagg_outputs and not any('Eps' in out for out in self.disagg_outputs): self.num_epsilon_bins = 1 if (self.rlz_index is not None and self.num_rlzs_disagg is not None): raise InvalidFile('%s: you cannot set rlzs_index and ' 'num_rlzs_disagg at the same time' % job_ini) # checks for classical_damage if self.calculation_mode == 'classical_damage': if self.conditional_loss_poes: raise InvalidFile('%s: conditional_loss_poes are not defined ' 'for classical_damage calculations' % job_ini) # checks for event_based_risk if (self.calculation_mode == 'event_based_risk' and self.asset_correlation not in (0, 1)): raise ValueError('asset_correlation != {0, 1} is no longer' ' supported') # checks for ebrisk if self.calculation_mode == 'ebrisk': if self.risk_investigation_time is None: raise InvalidFile('Please set the risk_investigation_time in' ' %s' % job_ini) elif self.aggregate_by: raise InvalidFile('aggregate_by cannot be set in %s [not ebrisk]' % job_ini) # check for GMFs from file if (self.inputs.get('gmfs', '').endswith('.csv') and 'sites' not in self.inputs and self.sites is None): raise InvalidFile('%s: You forgot sites|sites_csv' % job_ini) elif self.inputs.get('gmfs', '').endswith('.xml'): raise InvalidFile('%s: GMFs in XML are not supported anymore' % job_ini) # checks for event_based if 'event_based' in self.calculation_mode: if self.ps_grid_spacing: logging.warning('ps_grid_spacing is ignored in event_based ' 'calculations"') if self.ses_per_logic_tree_path >= TWO32: raise ValueError('ses_per_logic_tree_path too big: %d' % self.ses_per_logic_tree_path) if self.number_of_logic_tree_samples >= TWO16: raise ValueError('number_of_logic_tree_samples too big: %d' % self.number_of_logic_tree_samples) # check grid + sites if self.region_grid_spacing and ('sites' in self.inputs or self.sites): raise ValueError('You are specifying grid and sites at the same ' 'time: which one do you want?') # check for amplification if ('amplification' in self.inputs and self.imtls and self.calculation_mode in ['classical', 'classical_risk', 'disaggregation']): check_same_levels(self.imtls)