def _taxonomy_mapping(filename, taxonomies): try: tmap_df = pandas.read_csv(filename, converters=dict(weight=float)) except Exception as e: raise e.__class__('%s while reading %s' % (e, filename)) if 'weight' not in tmap_df: tmap_df['weight'] = 1. assert set(tmap_df) in ({'taxonomy', 'conversion', 'weight'}, {'taxonomy', 'risk_id', 'weight'}) # NB: conversion was the old name in the header for engine <= 3.12 risk_id = 'risk_id' if 'risk_id' in tmap_df.columns else 'conversion' dic = dict(list(tmap_df.groupby('taxonomy'))) taxonomies = taxonomies[1:] # strip '?' missing = set(taxonomies) - set(dic) if missing: raise InvalidFile('The taxonomies %s are in the exposure but not in ' 'the taxonomy mapping %s' % (missing, filename)) lst = [[("?", 1)]] for taxo in taxonomies: recs = dic[taxo] if abs(recs['weight'].sum() - 1.) > pmf.PRECISION: raise InvalidFile('%s: the weights do not sum up to 1 for %s' % (filename, taxo)) lst.append([(rec[risk_id], rec['weight']) for r, rec in recs.iterrows()]) return lst
def get_crmodel(oqparam): """ Return a :class:`openquake.risklib.riskinput.CompositeRiskModel` instance :param oqparam: an :class:`openquake.commonlib.oqvalidation.OqParam` instance """ risklist = get_risk_functions(oqparam) if not oqparam.limit_states and risklist.limit_states: oqparam.limit_states = risklist.limit_states elif 'damage' in oqparam.calculation_mode and risklist.limit_states: assert oqparam.limit_states == risklist.limit_states loss_types = oqparam.loss_dt().names consdict = {} if 'consequence' in oqparam.inputs: # build consdict of the form consequence_by_tagname -> tag -> array for by, fnames in oqparam.inputs['consequence'].items(): if isinstance(fnames, str): # single file fnames = [fnames] dtypedict = { by: str, 'consequence': str, 'loss_type': str, None: float } # i.e. files collapsed.csv, fatalities.csv, ... with headers # taxonomy,consequence,loss_type,slight,moderate,extensive arrays = [] for fname in fnames: arr = hdf5.read_csv(fname, dtypedict).array arrays.append(arr) for no, row in enumerate(arr, 2): if row['loss_type'] not in loss_types: msg = '%s: %s is not a recognized loss type, line=%d' raise InvalidFile(msg % (fname, row['loss_type'], no)) array = numpy.concatenate(arrays) dic = group_array(array, 'consequence') for consequence, group in dic.items(): if consequence not in scientific.KNOWN_CONSEQUENCES: raise InvalidFile('Unknown consequence %s in %s' % (consequence, fnames)) bytag = { tag: _cons_coeffs(grp, loss_types, risklist.limit_states) for tag, grp in group_array(group, by).items() } consdict['%s_by_%s' % (consequence, by)] = bytag # for instance consdict['collapsed_by_taxonomy']['W_LFM-DUM_H3'] # is [(0.05,), (0.2 ,), (0.6 ,), (1. ,)] for damage state and structural crm = riskmodels.CompositeRiskModel(oqparam, risklist, consdict) return crm
def read_csv(fname, dtypedict={None: float}, renamedict={}, sep=','): """ :param fname: a CSV file with an header and float fields :param dtypedict: a dictionary fieldname -> dtype, None -> default :param renamedict: aliases for the fields to rename :param sep: separator (default comma) :return: a structured array of floats """ attrs = {} with open(fname, encoding='utf-8-sig') as f: while True: first = next(f) if first.startswith('#'): attrs = dict(parse_comment(first.strip('#,\n '))) continue break header = first.strip().split(sep) try: arr = _read_csv(f, build_dt(dtypedict, header)) except KeyError: raise KeyError('Missing None -> default in dtypedict') except Exception as exc: raise InvalidFile('%s: %s' % (fname, exc)) if renamedict: newnames = [] for name in arr.dtype.names: new = renamedict.get(name, name) newnames.append(new) arr.dtype.names = newnames return ArrayWrapper(arr, attrs)
def get_sitecol_assetcol(oqparam, haz_sitecol=None, cost_types=()): """ :param oqparam: calculation parameters :param haz_sitecol: the hazard site collection :param cost_types: the expected cost types :returns: (site collection, asset collection, discarded) """ global exposure asset_hazard_distance = max(oqparam.asset_hazard_distance.values()) if exposure is None: # haz_sitecol not extracted from the exposure exposure = get_exposure(oqparam) if haz_sitecol is None: haz_sitecol = get_site_collection(oqparam) if oqparam.region_grid_spacing: haz_distance = oqparam.region_grid_spacing * 1.414 if haz_distance != asset_hazard_distance: logging.debug('Using asset_hazard_distance=%d km instead of %d km', haz_distance, asset_hazard_distance) else: haz_distance = asset_hazard_distance if haz_sitecol.mesh != exposure.mesh: # associate the assets to the hazard sites sitecol, assets_by, discarded = geo.utils.assoc( exposure.assets_by_site, haz_sitecol, haz_distance, 'filter') assets_by_site = [[] for _ in sitecol.complete.sids] num_assets = 0 for sid, assets in zip(sitecol.sids, assets_by): assets_by_site[sid] = assets num_assets += len(assets) logging.info('Associated {:_d} assets to {:_d} sites'.format( num_assets, len(sitecol))) else: # asset sites and hazard sites are the same sitecol = haz_sitecol assets_by_site = exposure.assets_by_site discarded = [] logging.info('Read %d sites and %d assets from the exposure', len(sitecol), sum(len(a) for a in assets_by_site)) assetcol = asset.AssetCollection( exposure, assets_by_site, oqparam.time_event, oqparam.aggregate_by) if assetcol.occupancy_periods: missing = set(cost_types) - set(exposure.cost_types['name']) - set( ['occupants']) else: missing = set(cost_types) - set(exposure.cost_types['name']) if missing and not oqparam.calculation_mode.endswith('damage'): raise InvalidFile('The exposure %s is missing %s' % (oqparam.inputs['exposure'], missing)) if (not oqparam.hazard_calculation_id and 'gmfs' not in oqparam.inputs and 'hazard_curves' not in oqparam.inputs and sitecol is not sitecol.complete): # for predefined hazard you cannot reduce the site collection; instead # you can in other cases, typically with a grid which is mostly empty # (i.e. there are many hazard sites with no assets) assetcol.reduce_also(sitecol) return sitecol, assetcol, discarded
def get_input_files(oqparam): """ :param oqparam: an OqParam instance :param hazard: if True, consider only the hazard files :returns: input path names in a specific order """ fnames = set() # files entering in the checksum uri = oqparam.shakemap_uri if isinstance(uri, dict) and uri: # local files for key, val in uri.items(): if key == 'fname' or key.endswith('_url'): val = val.replace('file://', '') fname = os.path.join(oqparam.base_path, val) if os.path.exists(fname): uri[key] = fname fnames.add(fname) # additional separate shapefiles if uri['kind'] == 'shapefile' and not uri['fname'].endswith('.zip'): fnames.update(get_shapefiles(os.path.dirname(fname))) for key in oqparam.inputs: fname = oqparam.inputs[key] # collect .hdf5 tables for the GSIMs, if any if key == 'gsim_logic_tree': fnames.update(gsim_lt.collect_files(fname)) fnames.add(fname) elif key == 'source_model': # UCERF f = oqparam.inputs['source_model'] fnames.add(f) fname = nrml.read(f).sourceModel.UCERFSource['filename'] fnames.add(os.path.join(os.path.dirname(f), fname)) elif key == 'exposure': # fname is a list for exp in asset.Exposure.read_headers(fname): fnames.update(exp.datafiles) fnames.update(fname) elif isinstance(fname, dict): for key, val in fname.items(): if isinstance(val, list): # list of files fnames.update(val) else: fnames.add(val) elif isinstance(fname, list): for f in fname: if f == oqparam.input_dir: raise InvalidFile('%s there is an empty path in %s' % (oqparam.inputs['job_ini'], key)) fnames.update(fname) elif key == 'source_model_logic_tree': info = logictree.collect_info(fname) fnames.update(info.smpaths) fnames.update(info.h5paths) fnames.add(fname) else: fnames.add(fname) return sorted(fnames)
def _get_site_model(fname, req_site_params): sm = hdf5.read_csv(fname, site.site_param_dt).array sm['lon'] = numpy.round(sm['lon'], 5) sm['lat'] = numpy.round(sm['lat'], 5) dupl = general.get_duplicates(sm, 'lon', 'lat') if dupl: raise InvalidFile('Found duplicate sites %s in %s' % (dupl, fname)) z = numpy.zeros(len(sm), sorted(sm.dtype.descr)) for name in z.dtype.names: z[name] = sm[name] return z
def __init__(self, ctrl_port=config.zworkers.ctrl_port, host_cores=None, remote_python=None, receiver_ports=None): # NB: receiver_ports is not used but needed for compliance self.ctrl_port = int(ctrl_port) self.host_cores = ([hc.split() for hc in host_cores.split(',')] if host_cores else []) for host, cores in self.host_cores: if int(cores) < -1: raise InvalidFile('openquake.cfg: found %s %s' % (host, cores)) self.remote_python = remote_python or sys.executable self.popens = []
def get_site_collection(oqparam, h5=None): """ Returns a SiteCollection instance by looking at the points and the site model defined by the configuration parameters. :param oqparam: an :class:`openquake.commonlib.oqvalidation.OqParam` instance """ if h5 and 'sitecol' in h5: return h5['sitecol'] mesh = get_mesh(oqparam, h5) if mesh is None and oqparam.ground_motion_fields: raise InvalidFile('You are missing sites.csv or site_model.csv in %s' % oqparam.inputs['job_ini']) elif mesh is None: # a None sitecol is okay when computing the ruptures only return else: # use the default site params req_site_params = get_gsim_lt(oqparam).req_site_params if 'amplification' in oqparam.inputs: req_site_params.add('ampcode') if h5 and 'site_model' in h5: # comes from a site_model.csv sm = h5['site_model'][:] else: sm = oqparam sitecol = site.SiteCollection.from_points( mesh.lons, mesh.lats, mesh.depths, sm, req_site_params) ss = oqparam.sites_slice # can be None or (start, stop) if ss: if 'custom_site_id' not in sitecol.array.dtype.names: gh = sitecol.geohash(6) assert len(numpy.unique(gh)) == len(gh), 'geohashes are not unique' sitecol.add_col('custom_site_id', 'S6', gh) mask = (sitecol.sids >= ss[0]) & (sitecol.sids < ss[1]) sitecol = sitecol.filter(mask) sitecol.make_complete() ss = os.environ.get('OQ_SAMPLE_SITES') if ss: # debugging tip to reduce the size of a calculation # OQ_SAMPLE_SITES=.1 oq engine --run job.ini # will run a computation with 10 times less sites sitecol.array = numpy.array(random_filter(sitecol.array, float(ss))) sitecol.make_complete() if h5: h5['sitecol'] = sitecol return sitecol
def read_csv(fname, dtypedict={None: float}, renamedict={}, sep=',', index=None, errors=None): """ :param fname: a CSV file with an header and float fields :param dtypedict: a dictionary fieldname -> dtype, None -> default :param renamedict: aliases for the fields to rename :param sep: separator (default comma) :param index: if not None, returns a pandas DataFrame :param errors: passed to the underlying open function (default None) :returns: an ArrayWrapper, unless there is an index """ attrs = {} with open(fname, encoding='utf-8-sig', errors=errors) as f: while True: first = next(f) if first.startswith('#'): attrs = dict(parse_comment(first.strip('#,\n '))) continue break header = first.strip().split(sep) if isinstance(dtypedict, dict): dt = build_dt(dtypedict, header) else: # in test_recompute dt is already a composite dtype dt = dtypedict try: arr = _read_csv(f, dt) except KeyError: raise KeyError('Missing None -> default in dtypedict') except Exception as exc: raise InvalidFile('%s: %s' % (fname, exc)) if renamedict: newnames = [] for name in arr.dtype.names: new = renamedict.get(name, name) newnames.append(new) arr.dtype.names = newnames if index: df = pandas.DataFrame.from_records(arr, index) vars(df).update(attrs) return df return ArrayWrapper(arr, attrs)
def get_site_model(oqparam): """ Convert the NRML file into an array of site parameters. :param oqparam: an :class:`openquake.commonlib.oqvalidation.OqParam` instance :returns: an array with fields lon, lat, vs30, ... """ req_site_params = get_gsim_lt(oqparam).req_site_params if 'amplification' in oqparam.inputs: req_site_params.add('ampcode') arrays = [] for fname in oqparam.inputs['site_model']: if isinstance(fname, str) and fname.endswith('.csv'): sm = hdf5.read_csv(fname, site.site_param_dt).array sm['lon'] = numpy.round(sm['lon'], 5) sm['lat'] = numpy.round(sm['lat'], 5) dupl = get_duplicates(sm, 'lon', 'lat') if dupl: raise InvalidFile( 'Found duplicate sites %s in %s' % (dupl, fname)) if 'site_id' in sm.dtype.names: raise InvalidFile('%s: you passed a sites.csv file instead of ' 'a site_model.csv file!' % fname) params = sorted(set(sm.dtype.names) | req_site_params) z = numpy.zeros( len(sm), [(p, site.site_param_dt[p]) for p in params]) for name in z.dtype.names: try: z[name] = sm[name] except ValueError: # missing, use the global parameter # exercised in the test classical/case_28 value = getattr(oqparam, site.param[name]) if name == 'vs30measured': # special case value = value == 'measured' z[name] = value arrays.append(z) continue nodes = nrml.read(fname).siteModel params = [valid.site_param(node.attrib) for node in nodes] missing = req_site_params - set(params[0]) if 'vs30measured' in missing: # use a default of False missing -= {'vs30measured'} for param in params: param['vs30measured'] = False if 'backarc' in missing: # use a default of False missing -= {'backarc'} for param in params: param['backarc'] = False if 'ampcode' in missing: # use a default of b'' missing -= {'ampcode'} for param in params: param['ampcode'] = b'' if missing: raise InvalidFile('%s: missing parameter %s' % (oqparam.inputs['site_model'], ', '.join(missing))) # NB: the sorted in sorted(params[0]) is essential, otherwise there is # an heisenbug in scenario/test_case_4 site_model_dt = numpy.dtype([(p, site.site_param_dt[p]) for p in sorted(params[0])]) sm = numpy.array([tuple(param[name] for name in site_model_dt.names) for param in params], site_model_dt) dupl = "\n".join( '%s %s' % loc for loc, n in countby(sm, 'lon', 'lat').items() if n > 1) if dupl: raise InvalidFile('There are duplicated sites in %s:\n%s' % (fname, dupl)) arrays.append(sm) return numpy.concatenate(arrays)
def get_mesh(oqparam, h5=None): """ Extract the mesh of points to compute from the sites, the sites_csv, the region, the site model, the exposure in this order. :param oqparam: an :class:`openquake.commonlib.oqvalidation.OqParam` instance """ global pmap, exposure, gmfs, eids if 'exposure' in oqparam.inputs and exposure is None: # read it only once exposure = get_exposure(oqparam) if oqparam.sites: return geo.Mesh.from_coords(oqparam.sites) elif 'sites' in oqparam.inputs: fname = oqparam.inputs['sites'] header = get_csv_header(fname) if 'lon' in header: data = [] for i, row in enumerate( csv.DictReader(open(fname, encoding='utf-8-sig'))): if header[0] == 'site_id' and row['site_id'] != str(i): raise InvalidFile('%s: expected site_id=%d, got %s' % ( fname, i, row['site_id'])) data.append(' '.join([row['lon'], row['lat']])) elif 'gmfs' in oqparam.inputs: raise InvalidFile('Missing header in %(sites)s' % oqparam.inputs) else: data = [line.replace(',', ' ') for line in open(fname, encoding='utf-8-sig')] coords = valid.coordinates(','.join(data)) # sorting the coordinates so that event_based results do not # depend on the order in the sites.csv file c = coords if header[0] == 'site_id' else sorted(coords) # NB: Notice the sort=False below # Calculations starting from predefined ground motion fields # require at least two input files related to the gmf data: # 1. A sites.csv file, listing {site_id, lon, lat} tuples # 2. A gmfs.csv file, listing {event_id, site_id, gmv[IMT1], # gmv[IMT2], ...} tuples # The site coordinates defined in the sites file do not need to be in # sorted order. # We must only ensure uniqueness of the provided site_ids and # coordinates. # When creating the site mesh from the site coordinates read from # the csv file, the sort=False flag maintains the user-specified # site_ids instead of reassigning them after sorting. return geo.Mesh.from_coords(c, sort=False) elif 'hazard_curves' in oqparam.inputs: fname = oqparam.inputs['hazard_curves'] if isinstance(fname, list): # for csv mesh, pmap = get_pmap_from_csv(oqparam, fname) else: raise NotImplementedError('Reading from %s' % fname) return mesh elif oqparam.region_grid_spacing: if oqparam.region: poly = geo.Polygon.from_wkt(oqparam.region) elif exposure: # in case of implicit grid the exposure takes precedence over # the site model poly = exposure.mesh.get_convex_hull() elif 'site_model' in oqparam.inputs: # this happens in event_based/case_19, where there is an implicit # grid over the site model sm = get_site_model(oqparam) # do not store in h5! poly = geo.Mesh(sm['lon'], sm['lat']).get_convex_hull() else: raise InvalidFile('There is a grid spacing but not a region, ' 'nor a site model, nor an exposure in %s' % oqparam.inputs['job_ini']) try: logging.info('Inferring the hazard grid') mesh = poly.dilate(oqparam.region_grid_spacing).discretize( oqparam.region_grid_spacing) return geo.Mesh.from_coords(zip(mesh.lons, mesh.lats)) except Exception: raise ValueError( 'Could not discretize region with grid spacing ' '%(region_grid_spacing)s' % vars(oqparam)) # the site model has the precedence over the exposure, see the # discussion in https://github.com/gem/oq-engine/pull/5217 elif 'site_model' in oqparam.inputs: logging.info('Extracting the hazard sites from the site model') sm = get_site_model(oqparam) if h5: h5['site_model'] = sm mesh = geo.Mesh(sm['lon'], sm['lat']) return mesh elif 'exposure' in oqparam.inputs: return exposure.mesh
def get_input_files(oqparam, hazard=False): """ :param oqparam: an OqParam instance :param hazard: if True, consider only the hazard files :returns: input path names in a specific order """ fnames = set() # files entering in the checksum uri = oqparam.shakemap_uri if isinstance(uri, dict) and uri: # local files for key, val in uri.items(): if key == 'fname' or key.endswith('_url'): val = val.replace('file://', '') fname = os.path.join(oqparam.base_path, val) if os.path.exists(fname): uri[key] = fname fnames.add(fname) # additional separate shapefiles if uri['kind'] == 'shapefile' and not uri['fname'].endswith('.zip'): fnames.update(get_shapefiles(os.path.dirname(fname))) for key in oqparam.inputs: fname = oqparam.inputs[key] if hazard and key not in ('source_model_logic_tree', 'gsim_logic_tree', 'source'): continue # collect .hdf5 tables for the GSIMs, if any elif key == 'gsim_logic_tree': gsim_lt = get_gsim_lt(oqparam) for gsims in gsim_lt.values.values(): for gsim in gsims: for k, v in gsim.kwargs.items(): if k.endswith(('_file', '_table')): fnames.add(v) fnames.add(fname) elif key == 'source_model': # UCERF f = oqparam.inputs['source_model'] fnames.add(f) fname = nrml.read(f).sourceModel.UCERFSource['filename'] fnames.add(os.path.join(os.path.dirname(f), fname)) elif key == 'exposure': # fname is a list for exp in asset.Exposure.read_headers(fname): fnames.update(exp.datafiles) fnames.update(fname) elif isinstance(fname, dict): for key, val in fname.items(): if isinstance(val, list): # list of files fnames.update(val) else: fnames.add(val) elif isinstance(fname, list): for f in fname: if f == oqparam.input_dir: raise InvalidFile('%s there is an empty path in %s' % (oqparam.inputs['job_ini'], key)) fnames.update(fname) elif key == 'source_model_logic_tree': args = (fname, oqparam.random_seed, oqparam.number_of_logic_tree_samples, oqparam.sampling_method) try: smlt = smlt_cache[args] except KeyError: smlt = smlt_cache[args] = logictree.SourceModelLogicTree(*args) fnames.update(smlt.hdf5_files) fnames.update(smlt.info.smpaths) fnames.add(fname) else: fnames.add(fname) return sorted(fnames)