def show(calc_id, key=None, rlzs=None): """ Show the content of a datastore. :param calc_id: numeric calculation ID; if 0, show all calculations :param key: key of the datastore :param rlzs: flag; if given, print out the realizations in order """ if not calc_id: if not os.path.exists(datastore.DATADIR): return rows = [] for calc_id in datastore.get_calc_ids(datastore.DATADIR): try: oq = OqParam.from_(datastore.DataStore(calc_id).attrs) cmode, descr = oq.calculation_mode, oq.description except: # invalid datastore directory logging.warn('Removed invalid calculation %d', calc_id) shutil.rmtree( os.path.join(datastore.DATADIR, 'calc_%s' % calc_id)) else: rows.append((calc_id, cmode, descr)) for row in sorted(rows, key=lambda row: row[0]): # by calc_id print('#%d %s: %s' % row) return ds = datastore.DataStore(calc_id) if key: if key in datastore.view: print(datastore.view(key, ds)) return obj = ds[key] if hasattr(obj, 'value'): # an array print(write_csv(io.StringIO(), obj.value)) else: print(obj) return # print all keys oq = OqParam.from_(ds.attrs) print( oq.calculation_mode, 'calculation (%r) saved in %s contains:' % (oq.description, ds.calc_dir)) for key in ds: print(key, humansize(ds.getsize(key))) # this part is experimental and not tested on purpose if rlzs and 'curves_by_trt_gsim' in ds: min_value = 0.01 # used in rmsep curves_by_rlz, mean_curves = combined_curves(ds) dists = [] for rlz in sorted(curves_by_rlz): curves = curves_by_rlz[rlz] dist = sum( rmsep(mean_curves[imt], curves[imt], min_value) for imt in mean_curves.dtype.fields) dists.append((dist, rlz)) for dist, rlz in sorted(dists): print('rlz=%s, rmsep=%s' % (rlz, dist))
def show(calc_id, key=None, rlzs=None): """ Show the content of a datastore. :param calc_id: numeric calculation ID; if 0, show all calculations :param key: key of the datastore :param rlzs: flag; if given, print out the realizations in order """ if not calc_id: if not os.path.exists(datastore.DATADIR): return rows = [] for calc_id in datastore.get_calc_ids(datastore.DATADIR): try: oq = OqParam.from_(datastore.DataStore(calc_id).attrs) cmode, descr = oq.calculation_mode, oq.description except: # invalid datastore directory logging.warn('Removed invalid calculation %d', calc_id) shutil.rmtree(os.path.join( datastore.DATADIR, 'calc_%s' % calc_id)) else: rows.append((calc_id, cmode, descr)) for row in sorted(rows, key=lambda row: row[0]): # by calc_id print('#%d %s: %s' % row) return ds = datastore.DataStore(calc_id) if key: if key in datastore.view: print(datastore.view(key, ds)) return obj = ds[key] if hasattr(obj, 'value'): # an array print(write_csv(io.StringIO(), obj.value)) else: print(obj) return # print all keys oq = OqParam.from_(ds.attrs) print(oq.calculation_mode, 'calculation (%r) saved in %s contains:' % (oq.description, ds.hdf5path)) for key in ds: print(key, humansize(ds.getsize(key))) # this part is experimental and not tested on purpose if rlzs and 'curves_by_trt_gsim' in ds: min_value = 0.01 # used in rmsep curves_by_rlz, mean_curves = combined_curves(ds) dists = [] for rlz in sorted(curves_by_rlz): curves = curves_by_rlz[rlz] dist = sum(rmsep(mean_curves[imt], curves[imt], min_value) for imt in mean_curves.dtype.fields) dists.append((dist, rlz)) for dist, rlz in sorted(dists): print('rlz=%s, rmsep=%s' % (rlz, dist))
def export_hcurves_csv(ekey, dstore): """ Exports the hazard curves into several .csv files :param ekey: export key, i.e. a pair (datastore key, fmt) :param dstore: datastore object """ oq = OqParam.from_(dstore.attrs) rlzs_assoc = dstore['rlzs_assoc'] sitecol = dstore['sitecol'] sitemesh = dstore['sitemesh'] key, fmt = ekey fnames = [] for kind, hcurves in dstore['hmaps' if key == 'uhs' else key].items(): fname = hazard_curve_name(dstore, ekey, kind, rlzs_assoc, oq.number_of_logic_tree_samples) if key == 'uhs': uhs_curves = calc.make_uhs(hcurves, oq.imtls, oq.poes) write_csv(fname, util.compose_arrays(sitemesh, uhs_curves)) elif key == 'hmaps': write_csv(fname, util.compose_arrays(sitemesh, hcurves)) else: export_hazard_curves_csv(ekey, fname, sitecol, hcurves, oq.imtls) fnames.append(fname) return sorted(fnames)
def export_ses_xml(ekey, dstore): """ :param ekey: export key, i.e. a pair (datastore key, fmt) :param dstore: datastore object """ fmt = ekey[-1] oq = OqParam.from_(dstore.attrs) try: csm_info = dstore['rlzs_assoc'].csm_info except AttributeError: # for scenario calculators don't export return [] sescollection = dstore['sescollection'] col_id = 0 fnames = [] for sm in csm_info.source_models: for trt_model in sm.trt_models: sesruptures = list(sescollection[col_id].values()) col_id += 1 ses_coll = SESCollection( groupby(sesruptures, operator.attrgetter('ses_idx')), sm.path, oq.investigation_time) smpath = '_'.join(sm.path) fname = 'ses-%d-smltp_%s.%s' % (trt_model.id, smpath, fmt) dest = os.path.join(dstore.export_dir, fname) globals()['_export_ses_' + fmt](dest, ses_coll) fnames.append(os.path.join(dstore.export_dir, fname)) return fnames
def export_uhs_xml(ekey, dstore): oq = OqParam.from_(dstore.attrs) rlzs_assoc = dstore['rlzs_assoc'] sitemesh = dstore['sitemesh'].value key, fmt = ekey fnames = [] periods = [imt for imt in oq.imtls if imt.startswith('SA') or imt == 'PGA'] for kind, hmaps in dstore['hmaps'].items(): metadata = get_metadata(rlzs_assoc.realizations, kind) _, periods = calc.get_imts_periods(oq.imtls) uhs = calc.make_uhs(hmaps, oq.imtls, oq.poes) for poe in oq.poes: poe_str = 'poe~%s' % poe fname = hazard_curve_name(dstore, ekey, kind + '-%s' % poe, rlzs_assoc, oq.number_of_logic_tree_samples) writer = hazard_writers.UHSXMLWriter( fname, periods=periods, poe=poe, investigation_time=oq.investigation_time, **metadata) data = [] for site, curve in zip(sitemesh, uhs[poe_str]): data.append(UHS(curve, Location(site))) writer.serialize(data) fnames.append(fname) return sorted(fnames)
def export_gmf(ekey, dstore): """ :param ekey: export key, i.e. a pair (datastore key, fmt) :param dstore: datastore object """ sitecol = dstore['sitecol'] rlzs_assoc = dstore['rlzs_assoc'] rupture_by_tag = sum(dstore['sescollection'], AccumDict()) all_tags = dstore['tags'].value oq = OqParam.from_(dstore.attrs) investigation_time = (None if oq.calculation_mode == 'scenario' else oq.investigation_time) samples = oq.number_of_logic_tree_samples fmt = ekey[-1] gmfs = dstore[ekey[0]] nbytes = gmfs.attrs['nbytes'] logging.info('Internal size of the GMFs: %s', humansize(nbytes)) if nbytes > GMF_MAX_SIZE: logging.warn(GMF_WARNING, dstore.hdf5path) fnames = [] for rlz, gmf_by_idx in zip( rlzs_assoc.realizations, rlzs_assoc.combine_gmfs(gmfs)): tags = all_tags[list(gmf_by_idx)] gmfs = list(gmf_by_idx.values()) if not gmfs: continue ruptures = [rupture_by_tag[tag] for tag in tags] fname = build_name(dstore, rlz, 'gmf', fmt, samples) fnames.append(fname) globals()['export_gmf_%s' % fmt]( ('gmf', fmt), fname, sitecol, ruptures, gmfs, rlz, investigation_time) return fnames
def export_hcurves_xml_json(ekey, dstore): export_type = ekey[1] len_ext = len(export_type) + 1 oq = OqParam.from_(dstore.attrs) sitemesh = dstore['sitemesh'].value rlzs_assoc = dstore['rlzs_assoc'] fnames = [] writercls = (hazard_writers.HazardCurveGeoJSONWriter if export_type == 'geojson' else hazard_writers.HazardCurveXMLWriter) rlzs = iter(rlzs_assoc.realizations) for kind, curves in dstore[ekey[0]].items(): rlz = next(rlzs) name = hazard_curve_name( dstore, ekey, kind, rlzs_assoc, oq.number_of_logic_tree_samples) for imt in oq.imtls: fname = name[:-len_ext] + '-' + imt + '.' + export_type data = [HazardCurve(Location(site), poes[imt]) for site, poes in zip(sitemesh, curves)] writer = writercls(fname, investigation_time=oq.investigation_time, imls=oq.imtls[imt], smlt_path='_'.join(rlz.sm_lt_path), gsimlt_path=rlz.gsim_rlz.uid) writer.serialize(data) fnames.append(fname) return sorted(fnames)
def export_gmf(ekey, dstore): """ :param ekey: export key, i.e. a pair (datastore key, fmt) :param dstore: datastore object """ sitecol = dstore['sitecol'] rlzs_assoc = dstore['rlzs_assoc'] rupture_by_tag = sum(dstore['sescollection'], AccumDict()) all_tags = dstore['tags'].value oq = OqParam.from_(dstore.attrs) investigation_time = (None if oq.calculation_mode == 'scenario' else oq.investigation_time) samples = oq.number_of_logic_tree_samples fmt = ekey[-1] gmfs = dstore[ekey[0]] nbytes = gmfs.attrs['nbytes'] logging.info('Internal size of the GMFs: %s', humansize(nbytes)) if nbytes > GMF_MAX_SIZE: logging.warn(GMF_WARNING, dstore.hdf5path) fnames = [] for rlz, gmf_by_idx in zip(rlzs_assoc.realizations, rlzs_assoc.combine_gmfs(gmfs)): tags = all_tags[list(gmf_by_idx)] gmfs = list(gmf_by_idx.values()) if not gmfs: continue ruptures = [rupture_by_tag[tag] for tag in tags] fname = build_name(rlz, 'gmf', fmt, samples) fnames.append(os.path.join(dstore.export_dir, fname)) globals()['export_gmf_%s' % fmt](('gmf', fmt), dstore.export_dir, fname, sitecol, ruptures, gmfs, rlz, investigation_time) return fnames
def get_data_transfer(dstore): """ Determine the amount of data transferred from the controller node to the workers and back in a classical calculation. :param dstore: a :class:`openquake.commonlib.datastore.DataStore` instance :returns: (block_info, to_send_forward, to_send_back) """ oqparam = OqParam.from_(dstore.attrs) sitecol = dstore['sitecol'] rlzs_assoc = dstore['rlzs_assoc'] info = dstore['job_info'] sources = dstore['composite_source_model'].get_sources() num_gsims_by_trt = groupby(rlzs_assoc, operator.itemgetter(0), lambda group: sum(1 for row in group)) gsims_assoc = rlzs_assoc.gsims_by_trt_id to_send_forward = 0 to_send_back = 0 block_info = [] for block in split_in_blocks(sources, oqparam.concurrent_tasks or 1, operator.attrgetter('weight'), operator.attrgetter('trt_model_id')): num_gsims = num_gsims_by_trt.get(block[0].trt_model_id, 0) back = info['n_sites'] * info['n_levels'] * info['n_imts'] * num_gsims to_send_back += back * 8 # 8 bytes per float args = (block, sitecol, gsims_assoc, PerformanceMonitor('')) to_send_forward += sum(len(p) for p in parallel.pickle_sequence(args)) block_info.append((len(block), block.weight)) return numpy.array(block_info, block_dt), to_send_forward, to_send_back
def pre_execute(self): """ Check if there is a pre_calculator or a previous calculation ID. If yes, read the inputs by invoking the precalculator or by retrieving the previous calculation; if not, read the inputs directly. """ if self.pre_calculator is not None: # the parameter hazard_calculation_id is only meaningful if # there is a precalculator precalc_id = self.oqparam.hazard_calculation_id if precalc_id is None: # recompute everything precalc = calculators[self.pre_calculator]( self.oqparam, self.monitor('precalculator'), self.datastore.calc_id) precalc.run() if 'scenario' not in self.oqparam.calculation_mode: self.csm = precalc.csm else: # read previously computed data parent = datastore.DataStore(precalc_id) self.datastore.set_parent(parent) # update oqparam with the attributes saved in the datastore self.oqparam = OqParam.from_(self.datastore.attrs) self.read_risk_data() else: # we are in a basic calculator self.read_risk_data() self.read_sources() self.datastore.hdf5.flush()
def export_agg_curve(ekey, dstore): oq = OqParam.from_(dstore.attrs) cost_types = dstore['cost_types'] rlzs = dstore['rlzs_assoc'].realizations agg_curve = dstore[ekey[0]] fnames = [] L, R = len(cost_types), len(rlzs) for ct in cost_types: loss_type = ct['name'] array = agg_curve[loss_type].value for ins in range(oq.insured_losses + 1): for rlz in rlzs: suffix = '' if L == 1 and R == 1 else '-gsimltp_%s_%s' % ( rlz.uid, loss_type) dest = dstore.export_path('agg_curve%s%s.%s' % ( suffix, '_ins' if ins else '', ekey[1])) rec = array[rlz.ordinal, ins] curve = AggCurve(rec['losses'], rec['poes'], rec['avg'], None) risk_writers.AggregateLossCurveXMLWriter( dest, oq.investigation_time, loss_type, source_model_tree_path='_'.join(rlz.sm_lt_path), gsim_tree_path='_'.join(rlz.gsim_lt_path), unit=ct['unit']).serialize(curve) fnames.append(dest) return sorted(fnames)
def export_ses_xml(ekey, dstore): """ :param ekey: export key, i.e. a pair (datastore key, fmt) :param dstore: datastore object """ fmt = ekey[-1] oq = OqParam.from_(dstore.attrs) try: csm_info = dstore['rlzs_assoc'].csm_info except AttributeError: # for scenario calculators don't export return [] sescollection = dstore['sescollection'] col_id = 0 fnames = [] for sm in csm_info.source_models: for trt_model in sm.trt_models: sesruptures = list(sescollection[col_id].values()) col_id += 1 ses_coll = SESCollection( groupby(sesruptures, operator.attrgetter('ses_idx')), sm.path, oq.investigation_time) smpath = '_'.join(sm.path) fname = 'ses-%d-smltp_%s.%s' % (trt_model.id, smpath, fmt) dest = os.path.join(dstore.export_dir, fname) globals()['_export_ses_' + fmt](dest, ses_coll) fnames.append(fname) return fnames
def pre_execute(self): """ Check if there is a pre_calculator or a previous calculation ID. If yes, read the inputs by invoking the precalculator or by retrieving the previous calculation; if not, read the inputs directly. """ if self.pre_calculator is not None: # the parameter hazard_calculation_id is only meaningful if # there is a precalculator precalc_id = self.oqparam.hazard_calculation_id if precalc_id is None: # recompute everything precalc = calculators[self.pre_calculator]( self.oqparam, self.monitor('precalculator'), self.datastore.calc_id) precalc.run(clean_up=False) if 'scenario' not in self.oqparam.calculation_mode: self.csm = precalc.csm else: # read previously computed data self.datastore.set_parent(datastore.DataStore(precalc_id)) # update oqparam with the attributes saved in the datastore self.oqparam = OqParam.from_(self.datastore.attrs) self.read_exposure_sitecol() else: # we are in a basic calculator self.read_exposure_sitecol() self.read_sources() self.datastore.hdf5.flush()
def get_data_transfer(dstore): """ Determine the amount of data transferred from the controller node to the workers and back in a classical calculation. :param dstore: a :class:`openquake.commonlib.datastore.DataStore` instance :returns: (block_info, to_send_forward, to_send_back) """ oqparam = OqParam.from_(dstore.attrs) sitecol = dstore['sitecol'] rlzs_assoc = dstore['rlzs_assoc'] info = dstore['job_info'] sources = dstore['composite_source_model'].get_sources() num_gsims_by_trt = groupby(rlzs_assoc, operator.itemgetter(0), lambda group: sum(1 for row in group)) gsims_assoc = rlzs_assoc.gsims_by_trt_id to_send_forward = 0 to_send_back = 0 block_info = [] for block in split_in_blocks(sources, oqparam.concurrent_tasks or 1, operator.attrgetter('weight'), operator.attrgetter('trt_model_id')): num_gsims = num_gsims_by_trt.get(block[0].trt_model_id, 0) back = info['n_sites'] * info['n_levels'] * info['n_imts'] * num_gsims to_send_back += back * 8 # 8 bytes per float args = (block, sitecol, gsims_assoc, PerformanceMonitor('')) to_send_forward += sum(len(p) for p in parallel.pickle_sequence(args)) block_info.append((len(block), block.weight)) return numpy.array(block_info, block_dt), to_send_forward, to_send_back
def export_bcr_map_rlzs(ekey, dstore): assetcol = dstore['assetcol'] sitemesh = dstore['sitemesh'] bcr_data = dstore['bcr-rlzs'] N, R = bcr_data.shape oq = OqParam.from_(dstore.attrs) realizations = dstore['rlzs_assoc'].realizations loss_types = dstore['riskmodel'].loss_types writercls = risk_writers.BCRMapXMLWriter fnames = [] for rlz in realizations: suffix = '.xml' if R == 1 else '-gsimltp_%s.xml' % rlz.uid for l, loss_type in enumerate(loss_types): rlz_data = bcr_data[loss_type][:, rlz.ordinal] path = dstore.export_path('bcr-%s%s' % (loss_type, suffix)) writer = writercls(path, oq.interest_rate, oq.asset_life_expectancy, loss_type, **get_paths(rlz)) data = [] for ass, value in zip(assetcol, rlz_data): loc = Location(sitemesh[ass['site_id']]) data.append( BcrData(loc, ass['asset_ref'], value['annual_loss_orig'], value['annual_loss_retro'], value['bcr'])) writer.serialize(data) fnames.append(path) return sorted(fnames)
def export_hcurves_xml_json(ekey, dstore): export_type = ekey[1] len_ext = len(export_type) + 1 oq = OqParam.from_(dstore.attrs) sitemesh = dstore['sitemesh'].value rlzs_assoc = dstore['rlzs_assoc'] fnames = [] writercls = (hazard_writers.HazardCurveGeoJSONWriter if export_type == 'geojson' else hazard_writers.HazardCurveXMLWriter) rlzs = iter(rlzs_assoc.realizations) for kind, curves in dstore[ekey[0]].items(): rlz = next(rlzs) name = hazard_curve_name(dstore, ekey, kind, rlzs_assoc, oq.number_of_logic_tree_samples) for imt in oq.imtls: fname = name[:-len_ext] + '-' + imt + '.' + export_type data = [ HazardCurve(Location(site), poes[imt]) for site, poes in zip(sitemesh, curves) ] writer = writercls(fname, investigation_time=oq.investigation_time, imls=oq.imtls[imt], smlt_path='_'.join(rlz.sm_lt_path), gsimlt_path=rlz.gsim_rlz.uid) writer.serialize(data) fnames.append(fname) return sorted(fnames)
def get_oqparam(self): """ Return an OqParam object as read from the database """ datadir = os.path.dirname(self.ds_calc_dir) dstore = datastore.read(self.id, datadir=datadir) oqparam = OqParam.from_(dstore.attrs) return oqparam
def get_oqparam(self): """ Return an OqParam object as read from the database """ datadir = os.path.dirname(self.ds_calc_dir) dstore = datastore.read(self.id, datadir=datadir) oqparam = OqParam.from_(dstore.attrs) return oqparam
def view_inputs(token, dstore): inputs = OqParam.from_(dstore.attrs).inputs.copy() try: source_models = [('source', fname) for fname in inputs['source']] del inputs['source'] except KeyError: # there is no 'source' in scenario calculations source_models = [] return rst_table(build_links(list(inputs.items()) + source_models), header=['Name', 'File'])
def view_params(token, dstore): oq = OqParam.from_(dstore.attrs) params = ('calculation_mode', 'number_of_logic_tree_samples', 'maximum_distance', 'investigation_time', 'ses_per_logic_tree_path', 'truncation_level', 'rupture_mesh_spacing', 'complex_fault_mesh_spacing', 'width_of_mfd_bin', 'area_source_discretization', 'random_seed', 'master_seed', 'concurrent_tasks') return rst_table([(param, getattr(oq, param)) for param in params])
def view_params(token, dstore): oq = OqParam.from_(dstore.attrs) params = ('calculation_mode', 'number_of_logic_tree_samples', 'maximum_distance', 'investigation_time', 'ses_per_logic_tree_path', 'truncation_level', 'rupture_mesh_spacing', 'complex_fault_mesh_spacing', 'width_of_mfd_bin', 'area_source_discretization', 'random_seed', 'master_seed', 'concurrent_tasks') return rst_table([(param, getattr(oq, param)) for param in params])
def view_inputs(token, dstore): inputs = OqParam.from_(dstore.attrs).inputs.copy() try: source_models = [('source', fname) for fname in inputs['source']] del inputs['source'] except KeyError: # there is no 'source' in scenario calculations source_models = [] return rst_table( build_links(list(inputs.items()) + source_models), header=['Name', 'File'])
def view_gmfs_total_size(name, dstore): """ :returns: the total size of the GMFs as human readable string; it assumes 4 bytes for the rupture index, 4 bytes for the realization index and 8 bytes for each float (there are num_imts floats per gmf) """ nbytes = 0 num_imts = len(OqParam.from_(dstore.attrs).imtls) for counts in dstore['counts_per_rlz']: nbytes += 8 * counts['gmf'] * (num_imts + 1) return humansize(nbytes)
def view_gmfs_total_size(name, dstore): """ :returns: the total size of the GMFs as human readable string; it assumes 4 bytes for the rupture index, 4 bytes for the realization index and 8 bytes for each float (there are num_imts floats per gmf) """ nbytes = 0 num_imts = len(OqParam.from_(dstore.attrs).imtls) for counts in dstore['counts_per_rlz']: nbytes += 8 * counts['gmf'] * (num_imts + 1) return humansize(nbytes)
def avglosses_data_transfer(token, dstore): """ Determine the amount of average losses transferred from the workers to the controller node in a risk calculation. """ oq = OqParam.from_(dstore.attrs) N = len(dstore['assetcol']) R = len(dstore['rlzs_assoc'].realizations) L = len(dstore['riskmodel'].loss_types) ct = oq.concurrent_tasks size_bytes = N * R * L * 2 * 8 * ct # two 8 byte floats, loss and ins_loss return ('%d asset(s) x %d realization(s) x %d loss type(s) x 2 losses x ' '8 bytes x %d tasks = %s' % (N, R, L, ct, humansize(size_bytes)))
def avglosses_data_transfer(token, dstore): """ Determine the amount of average losses transferred from the workers to the controller node in a risk calculation. """ oq = OqParam.from_(dstore.attrs) N = len(dstore['assetcol']) R = len(dstore['rlzs_assoc'].realizations) L = len(dstore['riskmodel'].loss_types) ct = oq.concurrent_tasks size_bytes = N * R * L * 2 * 8 * ct # two 8 byte floats, loss and ins_loss return ('%d asset(s) x %d realization(s) x %d loss type(s) x 2 losses x ' '8 bytes x %d tasks = %s' % (N, R, L, ct, humansize(size_bytes)))
def export_loss_maps_xml_geojson(ekey, dstore): oq = OqParam.from_(dstore.attrs) unit_by_lt = { riskmodels.cost_type_to_loss_type(ct['name']): ct['unit'] for ct in dstore['cost_types'] } unit_by_lt['fatalities'] = 'people' rlzs = dstore['rlzs_assoc'].realizations loss_maps = dstore[ekey[0]] riskmodel = dstore['riskmodel'] assetcol = dstore['assetcol'] R = len(rlzs) sitemesh = dstore['sitemesh'] L = len(riskmodel.loss_types) fnames = [] export_type = ekey[1] writercls = (risk_writers.LossMapGeoJSONWriter if export_type == 'geojson' else risk_writers.LossMapXMLWriter) loss_types = [ cb.loss_type for cb in riskmodel.curve_builders if cb.user_provided ] for lt in loss_types: loss_maps_lt = loss_maps[lt] for r in range(R): lmaps = loss_maps_lt[:, r] for p, poe in enumerate(oq.conditional_loss_poes): for insflag in range(oq.insured_losses + 1): ins = '_ins' if insflag else '' rlz = rlzs[r] unit = unit_by_lt[lt] suffix = '' if L == 1 and R == 1 else '-gsimltp_%s_%s' % ( rlz.uid, lt) root = ekey[0][:-5] # strip -rlzs name = '%s%s-poe-%s%s.%s' % (root, suffix, poe, ins, ekey[1]) fname = dstore.export_path(name) data = [] poe_str = 'poe~%s' % poe + ins for ass, stat in zip(assetcol, lmaps[poe_str]): loc = Location(sitemesh[ass['site_id']]) lm = LossMap(loc, ass['asset_ref'], stat, None) data.append(lm) writer = writercls(fname, oq.investigation_time, poe=poe, loss_type=lt, unit=unit, **get_paths(rlz)) writer.serialize(data) fnames.append(fname) return sorted(fnames)
def read_exposure_sitecol(self): """ Read the exposure (if any) and then the site collection, possibly extracted from the exposure. """ inputs = self.oqparam.inputs if 'gmfs' in inputs and self.oqparam.sites: haz_sitecol = self.sitecol = readinput.get_site_collection( self.oqparam) if 'scenario_' in self.oqparam.calculation_mode: self.gmfs = get_gmfs(self) haz_sitecol = self.sitecol if 'exposure' in inputs: logging.info('Reading the exposure') with self.monitor('reading exposure', autoflush=True): self.exposure = readinput.get_exposure(self.oqparam) self.sitecol, self.assets_by_site = ( readinput.get_sitecol_assets(self.oqparam, self.exposure)) self.cost_types = self.exposure.cost_types self.taxonomies = numpy.array( sorted(self.exposure.taxonomies), '|S100') num_assets = self.count_assets() if self.datastore.parent: haz_sitecol = self.datastore.parent['sitecol'] elif 'gmfs' in inputs: pass # haz_sitecol is already defined # TODO: think about the case hazard_curves in inputs else: haz_sitecol = None if haz_sitecol is not None and haz_sitecol != self.sitecol: with self.monitor('assoc_assets_sites'): self.sitecol, self.assets_by_site = \ self.assoc_assets_sites(haz_sitecol.complete) ok_assets = self.count_assets() num_sites = len(self.sitecol) logging.warn('Associated %d assets to %d sites, %d discarded', ok_assets, num_sites, num_assets - ok_assets) elif (self.datastore.parent and 'exposure' in OqParam.from_(self.datastore.parent.attrs).inputs): logging.info('Re-using the already imported exposure') else: # no exposure logging.info('Reading the site collection') with self.monitor('reading site collection', autoflush=True): self.sitecol = readinput.get_site_collection(self.oqparam) # save mesh and asset collection self.save_mesh() if hasattr(self, 'assets_by_site'): self.assetcol = riskinput.build_asset_collection( self.assets_by_site, self.oqparam.time_event)
def export_loss_map_xml_geojson(ekey, dstore): oq = OqParam.from_(dstore.attrs) unit_by_lt = { riskmodels.cost_type_to_loss_type(ct['name']): ct['unit'] for ct in dstore['cost_types'] } unit_by_lt['fatalities'] = 'people' rlzs = dstore['rlzs_assoc'].realizations loss_map = dstore[ekey[0]] riskmodel = dstore['riskmodel'] assetcol = dstore['assetcol'] R = len(rlzs) sitemesh = dstore['sitemesh'] L = len(riskmodel.loss_types) fnames = [] export_type = ekey[1] writercls = (risk_writers.LossMapGeoJSONWriter if export_type == 'geojson' else risk_writers.LossMapXMLWriter) loss_types = riskmodel.loss_types for lt in loss_types: alosses = loss_map[lt] for ins in range(oq.insured_losses + 1): means = alosses['mean' + ('_ins' if ins else '')] stddevs = alosses['stddev' + ('_ins' if ins else '')] for r in range(R): rlz = rlzs[r] unit = unit_by_lt[lt] suffix = '' if L == 1 and R == 1 else '-gsimltp_%s_%s' % ( rlz.uid, lt) root = ekey[0][:-5] # strip -rlzs name = '%s%s%s.%s' % (root, suffix, '_ins' if ins else '', ekey[1]) fname = dstore.export_path(name) data = [] for ass, mean, stddev in zip(assetcol, means[:, r], stddevs[:, r]): loc = Location(sitemesh[ass['site_id']]) lm = LossMap(loc, ass['asset_ref'], mean, stddev) data.append(lm) writer = writercls(fname, oq.investigation_time, poe=None, loss_type=lt, gsim_tree_path=rlz.uid, unit=unit) writer.serialize(data) fnames.append(fname) return sorted(fnames)
def export_avg_losses_stats(ekey, dstore): """ :param ekey: export key, i.e. a pair (datastore key, fmt) :param dstore: datastore object """ oq = OqParam.from_(dstore.attrs) avg_losses = dstore[ekey[0]].value quantiles = ['mean'] + ['quantile-%s' % q for q in oq.quantile_loss_curves] assets = get_assets(dstore) writer = writers.CsvWriter(fmt='%10.6E') for i, quantile in enumerate(quantiles): losses = avg_losses[:, i] dest = dstore.export_path('avg_losses-%s.csv' % quantile) data = compose_arrays(assets, losses) writer.save(data, dest) return writer.getsaved()
def export_avg_losses_stats(ekey, dstore): """ :param ekey: export key, i.e. a pair (datastore key, fmt) :param dstore: datastore object """ oq = OqParam.from_(dstore.attrs) avg_losses = dstore[ekey[0]] quantiles = ['mean'] + ['quantile-%s' % q for q in oq.quantile_loss_curves] assets = get_assets(dstore) fnames = [] for i, quantile in enumerate(quantiles): losses = avg_losses[:, i] dest = dstore.export_path('avg_losses-%s.csv' % quantile) data = compose_arrays(assets, losses) writers.write_csv(dest, data, fmt='%10.6E') fnames.append(dest) return fnames
def read_exposure_sitecol(self): """ Read the exposure (if any) and then the site collection, possibly extracted from the exposure. """ logging.info('Reading the site collection') with self.monitor('reading site collection', autoflush=True): haz_sitecol = readinput.get_site_collection(self.oqparam) inputs = self.oqparam.inputs if 'exposure' in inputs: logging.info('Reading the exposure') with self.monitor('reading exposure', autoflush=True): self.exposure = readinput.get_exposure(self.oqparam) self.sitecol, self.assets_by_site = ( readinput.get_sitecol_assets(self.oqparam, self.exposure)) if len(self.exposure.cost_types): self.cost_types = self.exposure.cost_types self.taxonomies = numpy.array( sorted(self.exposure.taxonomies), '|S100') num_assets = self.count_assets() if self.datastore.parent: haz_sitecol = self.datastore.parent['sitecol'] if haz_sitecol is not None and haz_sitecol != self.sitecol: with self.monitor('assoc_assets_sites'): self.sitecol, self.assets_by_site = \ self.assoc_assets_sites(haz_sitecol.complete) ok_assets = self.count_assets() num_sites = len(self.sitecol) logging.warn('Associated %d assets to %d sites, %d discarded', ok_assets, num_sites, num_assets - ok_assets) elif (self.datastore.parent and 'exposure' in OqParam.from_(self.datastore.parent.attrs).inputs): logging.info('Re-using the already imported exposure') else: # no exposure self.sitecol = haz_sitecol # save mesh and asset collection self.save_mesh() if hasattr(self, 'assets_by_site'): self.assetcol = riskinput.build_asset_collection( self.assets_by_site, self.oqparam.time_event) spec = set(self.oqparam.specific_assets) unknown = spec - set(self.assetcol['asset_ref']) if unknown: raise ValueError('The specific asset(s) %s are not in the ' 'exposure' % ', '.join(unknown))
def export_lossmaps_xml_geojson(ekey, dstore): oq = OqParam.from_(dstore.attrs) unit_by_lt = { riskmodels.cost_type_to_loss_type(ct['name']): ct['unit'] for ct in dstore['cost_types'] } unit_by_lt['fatalities'] = 'people' rlzs = dstore['rlzs_assoc'].realizations avglosses = dstore[ekey[0]] riskmodel = dstore['riskmodel'] assetcol = dstore['assetcol'] sitemesh = dstore['sitemesh'] L = len(riskmodel.loss_types) N, R = avglosses.shape fnames = [] export_type = ekey[1] writercls = (risk_writers.LossMapGeoJSONWriter if export_type == 'geojson' else risk_writers.LossMapXMLWriter) for l, lt in enumerate(riskmodel.loss_types): alosses = avglosses[lt] for r in range(R): rlz = rlzs[r] unit = unit_by_lt[lt] suffix = '' if L == 1 and R == 1 else '-gsimltp_%s_%s' % (rlz.uid, lt) name = '%s%s.%s' % (ekey[0], suffix, ekey[1]) fname = dstore.export_path(name) data = [] for ass, stat in zip(assetcol, alosses[:, r]): loc = Location(sitemesh[ass['site_id']]) lm = LossMap(loc, ass['asset_ref'], stat['mean'], stat['stddev']) data.append(lm) writer = writercls(fname, oq.investigation_time, poe=None, loss_type=lt, gsim_tree_path=None, unit=unit, loss_category=None) # TODO: replace the category with the exposure category writer.serialize(data) fnames.append(fname) return sorted(fnames)
def _print_info(dstore, filtersources=True, weightsources=True): assoc = dstore["rlzs_assoc"] oqparam = OqParam.from_(dstore.attrs) csm = dstore["composite_source_model"] sitecol = dstore["sitecol"] print(csm.get_info()) print("See https://github.com/gem/oq-risklib/blob/master/doc/" "effective-realizations.rst for an explanation") print(assoc) if filtersources or weightsources: [info] = readinput.get_job_info(oqparam, csm, sitecol) info["n_sources"] = csm.get_num_sources() curve_matrix_size = info["n_sites"] * info["n_levels"] * info["n_imts"] * len(assoc) * 8 for k in info.dtype.fields: if k == "input_weight" and not weightsources: pass else: print(k, info[k]) print("curve_matrix_size", humansize(curve_matrix_size)) if "num_ruptures" in dstore: print(datastore.view("rupture_collections", dstore))
def export_hcurves_csv(ekey, dstore): """ Exports the hazard curves into several .csv files :param ekey: export key, i.e. a pair (datastore key, fmt) :param dstore: datastore object """ oq = OqParam.from_(dstore.attrs) rlzs_assoc = dstore['rlzs_assoc'] sitecol = dstore['sitecol'] key, fmt = ekey fnames = [] for kind, hcurves in dstore[key].items(): fname = hazard_curve_name(dstore, ekey, kind, rlzs_assoc, oq.number_of_logic_tree_samples) if key == 'uhs': export_uhs_csv(ekey, fname, sitecol, hcurves) else: export_hazard_curves_csv(ekey, fname, sitecol, hcurves, oq.imtls) fnames.append(fname) return sorted(fnames)
def export_agg_curve_stats(ekey, dstore): oq = OqParam.from_(dstore.attrs) quantiles = oq.quantile_loss_curves cost_types = dstore['cost_types'] agg_curve = dstore[ekey[0]] fnames = [] for ct in cost_types: loss_type = ct['name'] array = agg_curve[loss_type].value for ins in range(oq.insured_losses + 1): for i, sname, qvalue in _gen_idx_sname_qvalue(quantiles): dest = dstore.export_path('agg_curve-%s-%s%s.%s' % ( sname, loss_type, '_ins' if ins else '', ekey[1])) rec = array[i, ins] curve = AggCurve(rec['losses'], rec['poes'], rec['avg'], None) risk_writers.AggregateLossCurveXMLWriter( dest, oq.investigation_time, loss_type, statistics=sname, quantile_value=qvalue, unit=ct['unit']).serialize(curve) fnames.append(dest) return sorted(fnames)
def get_hcurves_and_means(dstore): """ Extract hcurves from the datastore and compute their means. :returns: curves_by_rlz, mean_curves """ oq = OqParam.from_(dstore.attrs) hcurves = dstore['hcurves'] realizations = dstore['rlzs_assoc'].realizations weights = [rlz.weight for rlz in realizations] curves_by_rlz = { rlz: hcurves['rlz-%03d' % rlz.ordinal] for rlz in realizations } N = len(dstore['sitemesh']) mean_curves = zero_curves(N, oq.imtls) for imt in oq.imtls: mean_curves[imt] = scientific.mean_curve( [curves_by_rlz[rlz][imt] for rlz in sorted(curves_by_rlz)], weights) return curves_by_rlz, mean_curves
def export_disagg_xml(ekey, dstore): oq = OqParam.from_(dstore.attrs) rlzs = dstore['rlzs_assoc'].realizations group = dstore['disagg'] fnames = [] writercls = hazard_writers.DisaggXMLWriter for key in group: matrix = pickle.loads(group[key].value) attrs = group[key].attrs rlz = rlzs[attrs['rlzi']] poe = attrs['poe'] iml = attrs['iml'] imt, sa_period, sa_damping = from_string(attrs['imt']) fname = dstore.export_path(key + '.xml') lon, lat = attrs['location'] # TODO: add poe=poe below writer = writercls( fname, investigation_time=oq.investigation_time, imt=imt, smlt_path='_'.join(rlz.sm_lt_path), gsimlt_path=rlz.gsim_rlz.uid, lon=lon, lat=lat, sa_period=sa_period, sa_damping=sa_damping, mag_bin_edges=attrs['mag_bin_edges'], dist_bin_edges=attrs['dist_bin_edges'], lon_bin_edges=attrs['lon_bin_edges'], lat_bin_edges=attrs['lat_bin_edges'], eps_bin_edges=attrs['eps_bin_edges'], tectonic_region_types=attrs['trts'], ) data = [ DisaggMatrix(poe, iml, dim_labels, matrix[i]) for i, dim_labels in enumerate(disagg.pmf_map) ] writer.serialize(data) fnames.append(fname) return sorted(fnames)
def _print_info(dstore, filtersources=True, weightsources=True): assoc = dstore['rlzs_assoc'] oqparam = OqParam.from_(dstore.attrs) csm = dstore['composite_source_model'] sitecol = dstore['sitecol'] print(csm.get_info()) print('See https://github.com/gem/oq-risklib/blob/master/doc/' 'effective-realizations.rst for an explanation') print(assoc) if filtersources or weightsources: [info] = readinput.get_job_info(oqparam, csm, sitecol) info['n_sources'] = csm.get_num_sources() curve_matrix_size = (info['n_sites'] * info['n_levels'] * info['n_imts'] * len(assoc) * 8) for k in info.dtype.fields: if k == 'input_weight' and not weightsources: pass else: print(k, info[k]) print('curve_matrix_size', humansize(curve_matrix_size)) if 'num_ruptures' in dstore: print(datastore.view('rupture_collections', dstore))
def export_lossmaps_xml_geojson(ekey, dstore): oq = OqParam.from_(dstore.attrs) unit_by_lt = {riskmodels.cost_type_to_loss_type(ct['name']): ct['unit'] for ct in dstore['cost_types']} unit_by_lt['fatalities'] = 'people' rlzs = dstore['rlzs_assoc'].realizations avglosses = dstore[ekey[0]] riskmodel = dstore['riskmodel'] assetcol = dstore['assetcol'] sitemesh = dstore['sitemesh'] L = len(riskmodel.loss_types) N, R = avglosses.shape fnames = [] export_type = ekey[1] writercls = (risk_writers.LossMapGeoJSONWriter if export_type == 'geojson' else risk_writers.LossMapXMLWriter) for l, lt in enumerate(riskmodel.loss_types): alosses = avglosses[lt] for r in range(R): rlz = rlzs[r] unit = unit_by_lt[lt] suffix = '' if L == 1 and R == 1 else '-gsimltp_%s_%s' % ( rlz.uid, lt) name = '%s%s.%s' % (ekey[0], suffix, ekey[1]) fname = dstore.export_path(name) data = [] for ass, stat in zip(assetcol, alosses[:, r]): loc = Location(sitemesh[ass['site_id']]) lm = LossMap(loc, ass['asset_ref'], stat['mean'], stat['stddev']) data.append(lm) writer = writercls( fname, oq.investigation_time, poe=None, loss_type=lt, gsim_tree_path=None, unit=unit, loss_category=None) # TODO: replace the category with the exposure category writer.serialize(data) fnames.append(fname) return sorted(fnames)
def export_hcurves_xml_json(ekey, dstore): export_type = ekey[1] len_ext = len(export_type) + 1 oq = OqParam.from_(dstore.attrs) sitemesh = dstore['sitemesh'].value rlzs_assoc = dstore['rlzs_assoc'] hcurves = dstore[ekey[0]] fnames = [] writercls = (hazard_writers.HazardCurveGeoJSONWriter if export_type == 'geojson' else hazard_writers.HazardCurveXMLWriter) for kind in hcurves: if kind.startswith('rlz-'): rlz = rlzs_assoc.realizations[int(kind[4:])] smlt_path = '_'.join(rlz.sm_lt_path) gsimlt_path = rlz.gsim_rlz.uid else: smlt_path = '' gsimlt_path = '' curves = hcurves[kind] name = hazard_curve_name(dstore, ekey, kind, rlzs_assoc, oq.number_of_logic_tree_samples) for imt in oq.imtls: imtype, sa_period, sa_damping = from_string(imt) fname = name[:-len_ext] + '-' + imt + '.' + export_type data = [ HazardCurve(Location(site), poes[imt]) for site, poes in zip(sitemesh, curves) ] writer = writercls(fname, investigation_time=oq.investigation_time, imls=oq.imtls[imt], imt=imtype, sa_period=sa_period, sa_damping=sa_damping, smlt_path=smlt_path, gsimlt_path=gsimlt_path) writer.serialize(data) fnames.append(fname) return sorted(fnames)
def export_hcurves_csv(ekey, dstore): """ Exports the hazard curves into several .csv files :param ekey: export key, i.e. a pair (datastore key, fmt) :param dstore: datastore object """ oq = OqParam.from_(dstore.attrs) rlzs_assoc = dstore['rlzs_assoc'] sitecol = dstore['sitecol'] key, fmt = ekey fnames = [] for kind, hcurves in dstore[key].items(): fname = hazard_curve_name( dstore, ekey, kind, rlzs_assoc, oq.number_of_logic_tree_samples) if key == 'uhs': export_uhs_csv(ekey, fname, sitecol, hcurves) else: export_hazard_curves_csv(ekey, fname, sitecol, hcurves, oq.imtls) fnames.append(fname) return sorted(fnames)
def export_lossmaps_xml(ekey, dstore): oq = OqParam.from_(dstore.attrs) unit_by_lt = { riskmodels.cost_type_to_loss_type(ct['name']): ct['unit'] for ct in dstore['cost_types'] } unit_by_lt['fatalities'] = 'people' rlzs = dstore['rlzs_assoc'].realizations avglosses = dstore['avglosses'] riskmodel = dstore['riskmodel'] assetcol = dstore['assetcol'] sitemesh = dstore['sitemesh'] N, L, R = avglosses.shape fnames = [] for l, r in itertools.product(range(L), range(R)): rlz = rlzs[r] lt = riskmodel.loss_types[l] unit = unit_by_lt[lt] suffix = '' if L == 1 and R == 1 else '-gsimltp_%s_%s' % (rlz.uid, lt) fname = os.path.join(dstore.export_dir, '%s%s.%s' % (ekey[0], suffix, ekey[1])) data = [] for ass, stat in zip(assetcol, avglosses[:, l, r]): loc = Location(sitemesh[ass['site_id']]) lm = LossMap(loc, ass['asset_ref'], stat['mean'], stat['stddev']) data.append(lm) writer = risk_writers.LossMapXMLWriter(fname, oq.investigation_time, poe=None, loss_type=lt, gsim_tree_path=None, unit=unit, loss_category=None) # TODO: replace the category with the exposure category writer.serialize(data) fnames.append(fname) return sorted(fnames)
def export_hmaps_xml_json(ekey, dstore): export_type = ekey[1] oq = OqParam.from_(dstore.attrs) sitemesh = dstore['sitemesh'].value rlzs_assoc = dstore['rlzs_assoc'] hmaps = dstore[ekey[0]] fnames = [] writercls = (hazard_writers.HazardMapGeoJSONWriter if export_type == 'geojson' else hazard_writers.HazardMapXMLWriter) for kind in hmaps: if kind.startswith('rlz-'): rlz = rlzs_assoc.realizations[int(kind[4:])] smlt_path = '_'.join(rlz.sm_lt_path) gsimlt_path = rlz.gsim_rlz.uid else: smlt_path = '' gsimlt_path = '' maps = hmaps[kind] for imt in oq.imtls: for poe in oq.poes: suffix = '-%s-%s' % (poe, imt) fname = hazard_curve_name(dstore, ekey, kind + suffix, rlzs_assoc, oq.number_of_logic_tree_samples) data = [ HazardMap(site[0], site[1], hmap['%s~%s' % (imt, poe)]) for site, hmap in zip(sitemesh, maps) ] writer = writercls(fname, investigation_time=oq.investigation_time, imt=imt, poe=poe, smlt_path=smlt_path, gsimlt_path=gsimlt_path) writer.serialize(data) fnames.append(fname) return sorted(fnames)
def _gen_writers(dstore, writercls, root): # build XMLWriter instances oq = OqParam.from_(dstore.attrs) rlzs = dstore['rlzs_assoc'].realizations cost_types = dstore['cost_types'] L, R = len(cost_types), len(rlzs) for l, ct in enumerate(cost_types): loss_type = riskmodels.cost_type_to_loss_type(ct['name']) for ins in range(oq.insured_losses + 1): if root.endswith('-rlzs'): for rlz in rlzs: suffix = '' if L == 1 and R == 1 else '-gsimltp_%s_%s' % ( rlz.uid, loss_type) dest = dstore.export_path( '%s%s%s.xml' % (root[:-5], suffix, '_ins' if ins else '')) yield writercls(dest, oq.investigation_time, loss_type, unit=ct['unit'], **get_paths(rlz)), (loss_type, rlz.ordinal, ins) elif root.endswith('-stats'): pairs = [('mean', None)] + [('quantile-%s' % q, q) for q in oq.quantile_loss_curves] for ordinal, (statname, statvalue) in enumerate(pairs): dest = dstore.export_path('%s-%s-%s%s.xml' % (root[:-6], statname, loss_type, '_ins' if ins else '')) yield writercls( dest, oq.investigation_time, loss_type, statistics='mean' if ordinal == 0 else 'quantile', quantile_value=statvalue, unit=ct['unit']), (loss_type, ordinal, ins)
def __init__(self, dstore): self.dstore = dstore self.oq = oq = OqParam.from_(dstore.attrs) self.text = oq.description + '\n' + '=' * len(oq.description) self.text += '\n\nnum_sites = %d' % len(dstore['sitemesh'])
def show(calc_id, key=None, rlzs=None): """ Show the content of a datastore. :param calc_id: numeric calculation ID; if 0, show all calculations :param key: key of the datastore :param rlzs: flag; if given, print out the realizations in order """ if calc_id == 0: # show all if not os.path.exists(datastore.DATADIR): return rows = [] for calc_id in datastore.get_calc_ids(datastore.DATADIR): try: ds = datastore.DataStore(calc_id, mode='r') oq = OqParam.from_(ds.attrs) cmode, descr = oq.calculation_mode, oq.description except: # invalid datastore file, or missing calculation_mode # and description attributes, perhaps due to a manual kill logging.warn('Removed invalid calculation %d', calc_id) os.remove( os.path.join(datastore.DATADIR, 'calc_%s.hdf5' % calc_id)) continue else: rows.append((calc_id, cmode, descr)) ds.close() for row in sorted(rows, key=lambda row: row[0]): # by calc_id print('#%d %s: %s' % row) return ds = datastore.DataStore(calc_id, mode='r') if key: if key in datastore.view: print(datastore.view(key, ds)) return obj = ds[key] if hasattr(obj, 'value'): # an array print(write_csv(io.StringIO(), obj.value)) else: print(obj) return oq = OqParam.from_(ds.attrs) # this part is experimental if rlzs and 'hcurves' in ds: min_value = 0.01 # used in rmsep curves_by_rlz, mean_curves = get_hcurves_and_means(ds) dists = [] for rlz, curves in curves_by_rlz.items(): dist = sum( rmsep(mean_curves[imt], curves[imt], min_value) for imt in mean_curves.dtype.fields) dists.append((dist, rlz)) print('Realizations in order of distance from the mean curves') for dist, rlz in sorted(dists): print('%s: rmsep=%s' % (rlz, dist)) else: # print all keys print( oq.calculation_mode, 'calculation (%r) saved in %s contains:' % (oq.description, ds.hdf5path)) for key in ds: print(key, humansize(ds.getsize(key)))
def __init__(self, dstore): self.dstore = dstore self.oq = oq = OqParam.from_(dstore.attrs) self.text = oq.description + '\n' + '=' * len(oq.description)