def checksum(thing): """ Get the checksum of a calculation from the calculation ID (if already done) or from the job.ini/job.zip file (if not done yet). If `thing` is a source model logic tree file, get the checksum of the model by ignoring the job.ini, the gmpe logic tree file and possibly other files. """ try: job_id = int(thing) job_file = None except ValueError: job_id = None job_file = thing if not os.path.exists(job_file): sys.exit('%s does not correspond to an existing file' % job_file) if job_id: dstore = util.read(job_id) checksum = dstore['/'].attrs['checksum32'] elif job_file.endswith('.xml'): # assume it is a smlt file inputs = {'source_model_logic_tree': job_file} checksum = readinput.get_checksum32(mock.Mock(inputs=inputs)) else: oq = readinput.get_oqparam(job_file) checksum = readinput.get_checksum32(oq) print(checksum)
def export(datastore_key, calc_id=-1, exports='csv', export_dir='.'): """ Export an output from the datastore. """ dstore = util.read(calc_id) parent_id = dstore['oqparam'].hazard_calculation_id if parent_id: dstore.parent = util.read(parent_id) dstore.export_dir = export_dir with performance.Monitor('export', measuremem=True) as mon: for fmt in exports.split(','): fnames = export_((datastore_key, fmt), dstore) nbytes = sum(os.path.getsize(f) for f in fnames) print('Exported %s in %s' % (general.humansize(nbytes), fnames)) if mon.duration > 1: print(mon) dstore.close()
def plot_ac(calc_id): """ Aggregate loss curves plotter. """ # read the hazard data dstore = util.read(calc_id) agg_curve = dstore['agg_curve-rlzs'] plt = make_figure(agg_curve) plt.show()
def hazard_precomputed(self): """ :returns: True if the hazard is precomputed """ if 'gmfs' in self.inputs or 'hazard_curves' in self.inputs: return True elif self.hazard_calculation_id: parent = list(util.read(self.hazard_calculation_id)) return 'gmf_data' in parent or 'poes' in parent
def plot_losses(calc_id, bins=7): """ losses_by_event plotter """ # read the hazard data dstore = util.read(calc_id) losses_by_rlzi = dict(extract(dstore, 'losses_by_event')) oq = dstore['oqparam'] plt = make_figure(losses_by_rlzi, oq.loss_dt().names, bins) plt.show()
def plot_memory(calc_id=-1): """ Plot the memory occupation """ dstore = util.read(calc_id) plots = [] for task_name in dstore['task_info']: mem = dstore['task_info/' + task_name]['mem_gb'] plots.append((task_name, mem)) plt = make_figure(plots) plt.show()
def all_cost_types(self): """ Return the cost types of the computation (including `occupants` if it is there) in order. """ # rt has the form 'vulnerability/structural', 'fragility/...', ... costtypes = sorted(rt.rsplit('/')[1] for rt in self.risk_files) if not costtypes and self.hazard_calculation_id: with util.read(self.hazard_calculation_id) as ds: parent = ds['oqparam'] self._risk_files = get_risk_files(parent.inputs) costtypes = sorted(rt.rsplit('/')[1] for rt in self.risk_files) return costtypes
def show_attrs(key, calc_id=-1): """ Show the attributes of a HDF5 dataset in the datastore. """ ds = util.read(calc_id) try: attrs = h5py.File.__getitem__(ds.hdf5, key).attrs except KeyError: print('%r is not in %s' % (key, ds)) else: if len(attrs) == 0: print('%s has no attributes' % key) for name, value in attrs.items(): print(name, value) finally: ds.close()
def plot_pyro(calc_id=-1): """ Plot the pyroclastic cloud and the assets """ # NB: matplotlib is imported inside since it is a costly import import matplotlib.pyplot as p dstore = util.read(calc_id) sitecol = dstore['sitecol'] asset_risk = dstore['asset_risk'].value pyro, = numpy.where(dstore['multi_peril']['PYRO'] == 1) lons = sitecol.lons[pyro] lats = sitecol.lats[pyro] p.scatter(lons, lats, marker='o', color='red') building_pyro, = numpy.where(asset_risk['building-PYRO'] == 1) lons = sitecol.lons[building_pyro] lats = sitecol.lats[building_pyro] p.scatter(lons, lats, marker='.', color='green') p.show()
def plot_sites(calc_id=-1): """ Plot the sites """ # NB: matplotlib is imported inside since it is a costly import import matplotlib.pyplot as p dstore = util.read(calc_id) sitecol = dstore['sitecol'] lons, lats = sitecol.lons, sitecol.lats if len(lons) > 1 and cross_idl(*lons): lons %= 360 fig, ax = p.subplots() ax.grid(True) if 'site_model' in dstore: sm = dstore['site_model'] sm_lons, sm_lats = sm['lon'], sm['lat'] if len(sm_lons) > 1 and cross_idl(*sm_lons): sm_lons %= 360 p.scatter(sm_lons, sm_lats, marker='.', color='orange') p.scatter(lons, lats, marker='+') p.show()
def plot_assets(calc_id=-1, site_model=False): """ Plot the sites and the assets """ # NB: matplotlib is imported inside since it is a costly import import matplotlib.pyplot as p from openquake.hmtk.plotting.patch import PolygonPatch dstore = util.read(calc_id) try: region = dstore['oqparam'].region except KeyError: region = None sitecol = dstore['sitecol'] try: assetcol = dstore['assetcol'].value except AttributeError: assetcol = dstore['assetcol'].array fig = p.figure() ax = fig.add_subplot(111) if region: pp = PolygonPatch(shapely.wkt.loads(region), alpha=0.1) ax.add_patch(pp) ax.grid(True) if site_model and 'site_model' in dstore: sm = dstore['site_model'] sm_lons, sm_lats = sm['lon'], sm['lat'] if len(sm_lons) > 1 and cross_idl(*sm_lons): sm_lons %= 360 p.scatter(sm_lons, sm_lats, marker='.', color='orange') p.scatter(sitecol.complete.lons, sitecol.complete.lats, marker='.', color='gray') p.scatter(assetcol['lon'], assetcol['lat'], marker='.', color='green') p.scatter(sitecol.lons, sitecol.lats, marker='+', color='black') if 'discarded' in dstore: disc = numpy.unique(dstore['discarded'].value[['lon', 'lat']]) p.scatter(disc['lon'], disc['lat'], marker='x', color='red') p.show()
def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ oq = self.oqparam if oq.hazard_calculation_id and not oq.compare_with_classical: with util.read(self.oqparam.hazard_calculation_id) as parent: self.full_lt = parent['full_lt'] self.calc_stats() # post-processing return {} assert oq.max_sites_per_tile > oq.max_sites_disagg, ( oq.max_sites_per_tile, oq.max_sites_disagg) psd = self.set_psd() srcfilter = self.src_filter() performance.Monitor.save(self.datastore, 'srcfilter', srcfilter) srcs = self.csm.get_sources(atomic=False) if srcs: res = parallel.Starmap.apply(preclassical, (srcs, self.params), concurrent_tasks=oq.concurrent_tasks or 1, h5=self.datastore.hdf5).reduce() if oq.calculation_mode == 'preclassical': self.store_source_info(res['calc_times'], nsites=True) self.datastore['full_lt'] = self.csm.full_lt self.datastore.swmr_on() # fixes HDF5 error in build_hazard return self.update_source_info(res['calc_times'], nsites=True) sources_by_grp = groupby(res['sources'], operator.attrgetter('grp_id')) else: for src in self.csm.get_sources(atomic=True): src.num_ruptures = src.count_ruptures() src.nsites = self.N sources_by_grp = {} self.csm.src_groups = [sg for sg in self.csm.src_groups if sg.atomic] if oq.ps_grid_spacing: smap = parallel.Starmap( grid_point_sources, h5=self.datastore.hdf5, distribute=None if len(sources_by_grp) > 1 else 'no') for grp_id, sources in sources_by_grp.items(): smap.submit((sources, oq.ps_grid_spacing)) dic = smap.reduce() before, after = 0, 0 for grp_id, sources in sources_by_grp.items(): before += len(sources) after += len(dic[grp_id]) sg = SourceGroup(sources[0].tectonic_region_type) sg.sources = dic[grp_id] self.csm.src_groups.append(sg) logging.info('Reduced point sources %d->%d', before, after) else: for grp_id, sources in sources_by_grp.items(): sg = SourceGroup(sources[0].tectonic_region_type) sg.sources = sources self.csm.src_groups.append(sg) smap = parallel.Starmap(classical, h5=self.datastore.hdf5) self.submit_tasks(smap) acc0 = self.acc0() # create the rup/ datasets BEFORE swmr_on() self.datastore.swmr_on() smap.h5 = self.datastore.hdf5 self.calc_times = AccumDict(accum=numpy.zeros(3, F32)) try: acc = smap.reduce(self.agg_dicts, acc0) self.store_rlz_info(acc.eff_ruptures) finally: source_ids = self.store_source_info(self.calc_times) if self.by_task: logging.info('Storing by_task information') num_tasks = max(self.by_task) + 1, er = self.datastore.create_dset('by_task/eff_ruptures', U32, num_tasks) es = self.datastore.create_dset('by_task/eff_sites', U32, num_tasks) si = self.datastore.create_dset('by_task/srcids', hdf5.vstr, num_tasks, fillvalue=None) for task_no, rec in self.by_task.items(): effrups, effsites, srcids = rec er[task_no] = effrups es[task_no] = effsites si[task_no] = ' '.join(source_ids[s] for s in srcids) self.by_task.clear() if self.calc_times: # can be empty in case of errors self.numrups = sum(arr[0] for arr in self.calc_times.values()) numsites = sum(arr[1] for arr in self.calc_times.values()) logging.info('Effective number of ruptures: {:_d}/{:_d}'.format( int(self.numrups), self.totrups)) logging.info('Effective number of sites per rupture: %d', numsites / self.numrups) if psd: psdist = max(max(psd.ddic[trt].values()) for trt in psd.ddic) if psdist and self.maxradius >= psdist / 2: logging.warning( 'The pointsource_distance of %d km is too ' 'small compared to a maxradius of %d km', psdist, self.maxradius) self.calc_times.clear() # save a bit of memory return acc
def execute(self): oq = self.oqparam self.set_param() self.offset = 0 srcfilter = self.src_filter(self.datastore.tempname) self.indices = AccumDict(accum=[]) # sid, idx -> indices if oq.hazard_calculation_id: # from ruptures self.datastore.parent = util.read(oq.hazard_calculation_id) self.init_logic_tree(self.datastore.parent['full_lt']) else: # from sources self.build_events_from_sources(srcfilter) if (oq.ground_motion_fields is False and oq.hazard_curves_from_gmfs is False): return {} if not oq.imtls: raise InvalidFile('There are no intensity measure types in %s' % oq.inputs['job_ini']) N = len(self.sitecol.complete) if oq.ground_motion_fields: nrups = len(self.datastore['ruptures']) self.datastore.create_dset('gmf_data/data', oq.gmf_data_dt()) self.datastore.create_dset('gmf_data/sigma_epsilon', sig_eps_dt(oq.imtls)) self.datastore.create_dset('gmf_data/indices', hdf5.vuint32, shape=(N, 2), fillvalue=None) self.datastore.create_dset('gmf_data/events_by_sid', U32, (N, )) self.datastore.create_dset('gmf_data/time_by_rup', time_dt, (nrups, ), fillvalue=None) if oq.hazard_curves_from_gmfs: self.param['rlz_by_event'] = self.datastore['events']['rlz_id'] # compute_gmfs in parallel self.datastore.swmr_on() logging.info('Reading %d ruptures', len(self.datastore['ruptures'])) iterargs = ((rgetter, srcfilter, self.param) for rgetter in gen_rupture_getters( self.datastore, srcfilter, oq.concurrent_tasks)) # parallel compute_gmfs acc = parallel.Starmap(self.core_task.__func__, iterargs, h5=self.datastore.hdf5, num_cores=oq.num_cores).reduce( self.agg_dicts, self.acc0()) if self.indices: dset = self.datastore['gmf_data/indices'] num_evs = self.datastore['gmf_data/events_by_sid'] logging.info('Saving gmf_data/indices') with self.monitor('saving gmf_data/indices', measuremem=True): self.datastore['gmf_data/imts'] = ' '.join(oq.imtls) for sid in self.sitecol.complete.sids: start = numpy.array(self.indices[sid, 0]) stop = numpy.array(self.indices[sid, 1]) dset[sid, 0] = start dset[sid, 1] = stop num_evs[sid] = (stop - start).sum() avg_events_by_sid = num_evs[()].sum() / N logging.info('Found ~%d GMVs per site', avg_events_by_sid) elif oq.ground_motion_fields: raise RuntimeError('No GMFs were generated, perhaps they were ' 'all below the minimum_intensity threshold') return acc
def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ oq = self.oqparam if oq.hazard_calculation_id and not oq.compare_with_classical: with util.read(self.oqparam.hazard_calculation_id) as parent: self.full_lt = parent['full_lt'] self.calc_stats() # post-processing return {} assert oq.max_sites_per_tile > oq.max_sites_disagg, ( oq.max_sites_per_tile, oq.max_sites_disagg) psd = self.set_psd() # must go before to set the pointsource_distance run_preclassical(self.csm, oq, self.datastore) # exit early if we want to perform only a preclassical if oq.calculation_mode == 'preclassical': recs = [tuple(row) for row in self.csm.source_info.values()] self.datastore['source_info'] = numpy.array( recs, readinput.source_info_dt) self.datastore['full_lt'] = self.csm.full_lt self.datastore.swmr_on() # fixes HDF5 error in build_hazard return acc0 = self.acc0() # create the rup/ datasets BEFORE swmr_on() smap = parallel.Starmap(classical, self.get_args(acc0), h5=self.datastore.hdf5) smap.monitor.save('srcfilter', self.src_filter()) self.datastore.swmr_on() smap.h5 = self.datastore.hdf5 self.calc_times = AccumDict(accum=numpy.zeros(3, F32)) try: acc = smap.reduce(self.agg_dicts, acc0) self.store_rlz_info(acc.eff_ruptures) finally: source_ids = self.store_source_info(self.calc_times) if self.by_task: logging.info('Storing by_task information') num_tasks = max(self.by_task) + 1, er = self.datastore.create_dset('by_task/eff_ruptures', U32, num_tasks) es = self.datastore.create_dset('by_task/eff_sites', U32, num_tasks) si = self.datastore.create_dset('by_task/srcids', hdf5.vstr, num_tasks, fillvalue=None) for task_no, rec in self.by_task.items(): effrups, effsites, srcids = rec er[task_no] = effrups es[task_no] = effsites si[task_no] = ' '.join(source_ids[s] for s in srcids) self.by_task.clear() if self.calc_times: # can be empty in case of errors self.numctxs = sum(arr[0] for arr in self.calc_times.values()) numsites = sum(arr[1] for arr in self.calc_times.values()) logging.info('Total number of contexts: {:_d}'.format( int(self.numctxs))) logging.info('Average number of sites per context: %d', numsites / self.numctxs) if psd: psdist = max(max(psd.ddic[trt].values()) for trt in psd.ddic) if psdist and self.maxradius >= psdist / 2: logging.warning( 'The pointsource_distance of %d km is too ' 'small compared to a maxradius of %d km', psdist, self.maxradius) self.calc_times.clear() # save a bit of memory return acc
def _read_risk_data(self): # read the exposure (if any), the risk model (if any) and then the # site collection, possibly extracted from the exposure. oq = self.oqparam self.load_crmodel() # must be called first if oq.hazard_calculation_id: with util.read(oq.hazard_calculation_id) as dstore: haz_sitecol = dstore['sitecol'].complete else: haz_sitecol = readinput.get_site_collection(oq) if hasattr(self, 'rup'): # for scenario we reduce the site collection to the sites # within the maximum distance from the rupture haz_sitecol, _dctx = self.cmaker.filter(haz_sitecol, self.rup) haz_sitecol.make_complete() if 'site_model' in oq.inputs: self.datastore['site_model'] = readinput.get_site_model(oq) oq_hazard = (self.datastore.parent['oqparam'] if self.datastore.parent else None) if 'exposure' in oq.inputs: exposure = self.read_exposure(haz_sitecol) self.datastore['assetcol'] = self.assetcol self.datastore['cost_calculator'] = exposure.cost_calculator if hasattr(readinput.exposure, 'exposures'): self.datastore['assetcol/exposures'] = (numpy.array( exposure.exposures, hdf5.vstr)) elif 'assetcol' in self.datastore.parent: assetcol = self.datastore.parent['assetcol'] if oq.region: region = wkt.loads(oq.region) self.sitecol = haz_sitecol.within(region) if oq.shakemap_id or 'shakemap' in oq.inputs: self.sitecol, self.assetcol = self.read_shakemap( haz_sitecol, assetcol) self.datastore['assetcol'] = self.assetcol logging.info('Extracted %d/%d assets', len(self.assetcol), len(assetcol)) nsites = len(self.sitecol) if (oq.spatial_correlation != 'no' and nsites > MAXSITES): # hard-coded, heuristic raise ValueError(CORRELATION_MATRIX_TOO_LARGE % nsites) elif hasattr(self, 'sitecol') and general.not_equal( self.sitecol.sids, haz_sitecol.sids): self.assetcol = assetcol.reduce(self.sitecol) self.datastore['assetcol'] = self.assetcol logging.info('Extracted %d/%d assets', len(self.assetcol), len(assetcol)) else: self.assetcol = assetcol else: # no exposure self.sitecol = haz_sitecol if self.sitecol: logging.info('Read N=%d hazard sites and L=%d hazard levels', len(self.sitecol), len(oq.imtls.array)) if oq_hazard: parent = self.datastore.parent if 'assetcol' in parent: check_time_event(oq, parent['assetcol'].occupancy_periods) elif oq.job_type == 'risk' and 'exposure' not in oq.inputs: raise ValueError('Missing exposure both in hazard and risk!') if oq_hazard.time_event and oq_hazard.time_event != oq.time_event: raise ValueError( 'The risk configuration file has time_event=%s but the ' 'hazard was computed with time_event=%s' % (oq.time_event, oq_hazard.time_event)) if oq.job_type == 'risk': tmap_arr, tmap_lst = logictree.taxonomy_mapping( self.oqparam.inputs.get('taxonomy_mapping'), self.assetcol.tagcol.taxonomy) self.crmodel.tmap = tmap_lst if len(tmap_arr): self.datastore['taxonomy_mapping'] = tmap_arr taxonomies = set(taxo for items in self.crmodel.tmap for taxo, weight in items if taxo != '?') # check that we are covering all the taxonomies in the exposure missing = taxonomies - set(self.crmodel.taxonomies) if self.crmodel and missing: raise RuntimeError('The exposure contains the taxonomies %s ' 'which are not in the risk model' % missing) if len(self.crmodel.taxonomies) > len(taxonomies): logging.info('Reducing risk model from %d to %d taxonomies', len(self.crmodel.taxonomies), len(taxonomies)) self.crmodel = self.crmodel.reduce(taxonomies) self.crmodel.tmap = tmap_lst if hasattr(self, 'sitecol') and self.sitecol: self.datastore['sitecol'] = self.sitecol.complete # used in the risk calculators self.param = dict(individual_curves=oq.individual_curves, avg_losses=oq.avg_losses) # compute exposure stats if hasattr(self, 'assetcol'): arr = self.assetcol.array num_assets = list(general.countby(arr, 'site_id').values()) self.datastore['assets_by_site'] = get_stats(num_assets) num_taxos = self.assetcol.num_taxonomies_by_site() self.datastore['taxonomies_by_site'] = get_stats(num_taxos) save_exposed_values(self.datastore, self.assetcol, oq.loss_names, oq.aggregate_by)
def __init__(self, calc_id): self.calc_id = calc_id self.dstore = util.read(calc_id) self.oqparam = self.dstore['oqparam']
def pre_execute(self): """ Check if there is a previous calculation ID. If yes, read the inputs by retrieving the previous calculation; if not, read the inputs directly. """ oq = self.oqparam if 'gmfs' in oq.inputs or 'multi_peril' in oq.inputs: # read hazard from files assert not oq.hazard_calculation_id, ( 'You cannot use --hc together with gmfs_file') self.read_inputs() if 'gmfs' in oq.inputs: save_gmfs(self) else: self.save_multi_peril() elif 'hazard_curves' in oq.inputs: # read hazard from file assert not oq.hazard_calculation_id, ( 'You cannot use --hc together with hazard_curves') haz_sitecol = readinput.get_site_collection(oq) # NB: horrible: get_site_collection calls get_pmap_from_nrml # that sets oq.investigation_time, so it must be called first self.load_riskmodel() # must be after get_site_collection self.read_exposure(haz_sitecol) # define .assets_by_site self.datastore['poes/grp-00'] = fix_ones(readinput.pmap) self.datastore['sitecol'] = self.sitecol self.datastore['assetcol'] = self.assetcol self.datastore['csm_info'] = fake = source.CompositionInfo.fake() self.rlzs_assoc = fake.get_rlzs_assoc() elif oq.hazard_calculation_id: parent = util.read(oq.hazard_calculation_id) self.check_precalc(parent['oqparam'].calculation_mode) self.datastore.parent = parent # copy missing parameters from the parent params = {name: value for name, value in vars(parent['oqparam']).items() if name not in vars(self.oqparam)} self.save_params(**params) self.read_inputs() oqp = parent['oqparam'] if oqp.investigation_time != oq.investigation_time: raise ValueError( 'The parent calculation was using investigation_time=%s' ' != %s' % (oqp.investigation_time, oq.investigation_time)) if oqp.minimum_intensity != oq.minimum_intensity: raise ValueError( 'The parent calculation was using minimum_intensity=%s' ' != %s' % (oqp.minimum_intensity, oq.minimum_intensity)) missing_imts = set(oq.risk_imtls) - set(oqp.imtls) if missing_imts: raise ValueError( 'The parent calculation is missing the IMT(s) %s' % ', '.join(missing_imts)) elif self.__class__.precalc: calc = calculators[self.__class__.precalc]( self.oqparam, self.datastore.calc_id) calc.run() self.param = calc.param self.sitecol = calc.sitecol self.assetcol = calc.assetcol self.riskmodel = calc.riskmodel if hasattr(calc, 'rlzs_assoc'): self.rlzs_assoc = calc.rlzs_assoc else: # this happens for instance for a scenario_damage without # rupture, gmfs, multi_peril raise InvalidFile( '%(job_ini)s: missing gmfs_csv, multi_peril_csv' % oq.inputs) if hasattr(calc, 'csm'): # no scenario self.csm = calc.csm else: self.read_inputs() if self.riskmodel: self.save_riskmodel()
def pre_execute(self): """ Check if there is a previous calculation ID. If yes, read the inputs by retrieving the previous calculation; if not, read the inputs directly. """ oq = self.oqparam if 'gmfs' in oq.inputs or 'multi_peril' in oq.inputs: # read hazard from files assert not oq.hazard_calculation_id, ( 'You cannot use --hc together with gmfs_file') self.read_inputs() if 'gmfs' in oq.inputs: if not oq.inputs['gmfs'].endswith('.csv'): raise NotImplementedError('Importer for %s' % oq.inputs['gmfs']) E = len( import_gmfs(self.datastore, oq.inputs['gmfs'], self.sitecol.complete.sids)) if hasattr(oq, 'number_of_ground_motion_fields'): if oq.number_of_ground_motion_fields != E: raise RuntimeError( 'Expected %d ground motion fields, found %d' % (oq.number_of_ground_motion_fields, E)) else: # set the number of GMFs from the file oq.number_of_ground_motion_fields = E else: self.save_multi_peril() self.save_crmodel() elif 'hazard_curves' in oq.inputs: # read hazard from file assert not oq.hazard_calculation_id, ( 'You cannot use --hc together with hazard_curves') haz_sitecol = readinput.get_site_collection(oq) self.load_crmodel() # must be after get_site_collection self.read_exposure(haz_sitecol) # define .assets_by_site self.datastore['poes/grp-00'] = fix_ones(readinput.pmap) self.datastore['sitecol'] = self.sitecol self.datastore['assetcol'] = self.assetcol self.datastore['csm_info'] = fake = source.CompositionInfo.fake() self.rlzs_assoc = fake.get_rlzs_assoc() self.datastore['rlzs_by_grp'] = self.rlzs_assoc.by_grp() self.save_crmodel() elif oq.hazard_calculation_id: parent = util.read(oq.hazard_calculation_id) self.check_precalc(parent['oqparam'].calculation_mode) self.datastore.parent = parent # copy missing parameters from the parent if 'concurrent_tasks' not in vars(self.oqparam): self.oqparam.concurrent_tasks = ( self.oqparam.__class__.concurrent_tasks.default) params = { name: value for name, value in vars(parent['oqparam']).items() if name not in vars(self.oqparam) } self.save_params(**params) self.read_inputs() oqp = parent['oqparam'] if oqp.investigation_time != oq.investigation_time: raise ValueError( 'The parent calculation was using investigation_time=%s' ' != %s' % (oqp.investigation_time, oq.investigation_time)) if not equivalent(oqp.minimum_intensity, oq.minimum_intensity): raise ValueError( 'The parent calculation was using minimum_intensity=%s' ' != %s' % (oqp.minimum_intensity, oq.minimum_intensity)) hstats, rstats = list(oqp.hazard_stats()), list(oq.hazard_stats()) if hstats != rstats: raise ValueError('The parent calculation had stats %s != %s' % (hstats, rstats)) missing_imts = set(oq.risk_imtls) - set(oqp.imtls) if missing_imts: raise ValueError( 'The parent calculation is missing the IMT(s) %s' % ', '.join(missing_imts)) self.save_crmodel() elif self.__class__.precalc: calc = calculators[self.__class__.precalc](self.oqparam, self.datastore.calc_id) calc.run(remove=False) for name in ('csm param sitecol assetcol crmodel rlzs_assoc ' 'policy_name policy_dict csm_info').split(): if hasattr(calc, name): setattr(self, name, getattr(calc, name)) else: self.read_inputs() self.save_crmodel()
def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ oq = self.oqparam if oq.hazard_calculation_id and not oq.compare_with_classical: with util.read(self.oqparam.hazard_calculation_id) as parent: self.csm_info = parent['csm_info'] self.calc_stats() # post-processing return {} mags = self.datastore['source_mags'][()] if len(mags) == 0: # everything was discarded raise RuntimeError('All sources were discarded!?') gsims_by_trt = self.csm_info.get_gsims_by_trt() dist_bins = { trt: oq.maximum_distance.get_dist_bins(trt) for trt in gsims_by_trt } if oq.pointsource_distance: logging.info('Computing effect of the ruptures') mon = self.monitor('rupture effect') effect = parallel.Starmap.apply( get_effect_by_mag, (mags, self.sitecol.one(), gsims_by_trt, oq.maximum_distance, oq.imtls, mon)).reduce() self.datastore['effect'] = effect self.datastore.set_attrs('effect', **dist_bins) self.effect = { trt: Effect({mag: effect[mag][:, t] for mag in effect}, dist_bins[trt], getdefault(oq.pointsource_distance, trt)) for t, trt in enumerate(gsims_by_trt) } for trt, eff in self.effect.items(): oq.maximum_distance.magdist[trt] = eff.dist_by_mag() oq.pointsource_distance[trt] = eff.dist_by_mag( eff.collapse_value) else: self.effect = {} if oq.calculation_mode == 'preclassical' and self.N == 1: smap = parallel.Starmap(ruptures_by_mag_dist) for func, args in self.gen_task_queue(): smap.submit(args) counts = smap.reduce() ndists = oq.maximum_distance.get_dist_bins.__defaults__[0] for mag, mag in enumerate(mags): arr = numpy.zeros((ndists, len(gsims_by_trt)), U32) for trti, trt in enumerate(gsims_by_trt): try: arr[:, trti] = counts[trt][mag] except KeyError: pass self.datastore['rups_by_mag_dist/' + mag] = arr self.datastore.set_attrs('rups_by_mag_dist', **dist_bins) self.datastore['csm_info'] = self.csm_info return {} smap = parallel.Starmap(self.core_task.__func__, h5=self.datastore.hdf5) smap.task_queue = list(self.gen_task_queue()) # really fast acc0 = self.acc0() # create the rup/ datasets BEFORE swmr_on() self.datastore.swmr_on() smap.h5 = self.datastore.hdf5 self.calc_times = AccumDict(accum=numpy.zeros(3, F32)) try: acc = smap.get_results().reduce(self.agg_dicts, acc0) self.store_rlz_info(acc.eff_ruptures) finally: with self.monitor('store source_info'): self.store_source_info(self.calc_times) if self.by_task: logging.info('Storing by_task information') num_tasks = max(self.by_task) + 1, er = self.datastore.create_dset('by_task/eff_ruptures', U32, num_tasks) es = self.datastore.create_dset('by_task/eff_sites', U32, num_tasks) si = self.datastore.create_dset('by_task/srcids', hdf5.vuint32, num_tasks, fillvalue=None) for task_no, rec in self.by_task.items(): effrups, effsites, srcids = rec er[task_no] = effrups es[task_no] = effsites si[task_no] = srcids self.by_task.clear() numrups = sum(arr[0] for arr in self.calc_times.values()) if self.totrups != numrups: logging.info('Considered %d/%d ruptures', numrups, self.totrups) self.calc_times.clear() # save a bit of memory return acc
def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ oq = self.oqparam if oq.hazard_calculation_id and not oq.compare_with_classical: with util.read(self.oqparam.hazard_calculation_id) as parent: self.full_lt = parent['full_lt'] self.calc_stats() # post-processing return {} mags = self.datastore['source_mags'][()] if len(mags) == 0: # everything was discarded raise RuntimeError('All sources were discarded!?') gsims_by_trt = self.full_lt.get_gsims_by_trt() if 'source_mags' in self.datastore and oq.imtls: mags = self.datastore['source_mags'][()] aw = calc.get_effect(mags, self.sitecol, gsims_by_trt, oq) if hasattr(aw, 'array'): self.datastore['effect_by_mag_dst_trt'] = aw smap = parallel.Starmap(self.core_task.__func__, h5=self.datastore.hdf5, num_cores=oq.num_cores) smap.task_queue = list(self.gen_task_queue()) # really fast acc0 = self.acc0() # create the rup/ datasets BEFORE swmr_on() self.datastore.swmr_on() smap.h5 = self.datastore.hdf5 self.calc_times = AccumDict(accum=numpy.zeros(3, F32)) try: acc = smap.get_results().reduce(self.agg_dicts, acc0) self.store_rlz_info(acc.eff_ruptures) finally: with self.monitor('store source_info'): self.store_source_info(self.calc_times) if self.by_task: logging.info('Storing by_task information') num_tasks = max(self.by_task) + 1, er = self.datastore.create_dset('by_task/eff_ruptures', U32, num_tasks) es = self.datastore.create_dset('by_task/eff_sites', U32, num_tasks) si = self.datastore.create_dset('by_task/srcids', hdf5.vstr, num_tasks, fillvalue=None) for task_no, rec in self.by_task.items(): effrups, effsites, srcids = rec er[task_no] = effrups es[task_no] = effsites si[task_no] = ' '.join(srcids) self.by_task.clear() self.numrups = sum(arr[0] for arr in self.calc_times.values()) numsites = sum(arr[1] for arr in self.calc_times.values()) logging.info('Effective number of ruptures: {:_d}/{:_d}'.format( int(self.numrups), self.totrups)) logging.info('Effective number of sites per rupture: %d', numsites / self.numrups) self.calc_times.clear() # save a bit of memory return acc
def _read_risk_data(self): # read the exposure (if any), the risk model (if any) and then the # site collection, possibly extracted from the exposure. oq = self.oqparam self.load_riskmodel() # must be called first if oq.hazard_calculation_id: with util.read(oq.hazard_calculation_id) as dstore: haz_sitecol = dstore['sitecol'].complete else: haz_sitecol = readinput.get_site_collection(oq) if hasattr(self, 'rup'): # for scenario we reduce the site collection to the sites # within the maximum distance from the rupture haz_sitecol, _dctx = self.cmaker.filter(haz_sitecol, self.rup) haz_sitecol.make_complete() if 'site_model' in oq.inputs: self.datastore['site_model'] = readinput.get_site_model(oq) oq_hazard = (self.datastore.parent['oqparam'] if self.datastore.parent else None) if 'exposure' in oq.inputs: exposure = self.read_exposure(haz_sitecol) self.datastore['assetcol'] = self.assetcol self.datastore['assetcol/num_taxonomies'] = ( self.assetcol.num_taxonomies_by_site()) if hasattr(readinput.exposure, 'exposures'): self.datastore['assetcol/exposures'] = (numpy.array( exposure.exposures, hdf5.vstr)) elif 'assetcol' in self.datastore.parent: assetcol = self.datastore.parent['assetcol'] if oq.region: region = wkt.loads(oq.region) self.sitecol = haz_sitecol.within(region) if oq.shakemap_id or 'shakemap' in oq.inputs: self.sitecol, self.assetcol = self.read_shakemap( haz_sitecol, assetcol) self.datastore['assetcol'] = self.assetcol logging.info('Extracted %d/%d assets', len(self.assetcol), len(assetcol)) nsites = len(self.sitecol) if (oq.spatial_correlation != 'no' and nsites > MAXSITES): # hard-coded, heuristic raise ValueError(CORRELATION_MATRIX_TOO_LARGE % nsites) elif hasattr(self, 'sitecol') and general.not_equal( self.sitecol.sids, haz_sitecol.sids): self.assetcol = assetcol.reduce(self.sitecol) self.datastore['assetcol'] = self.assetcol self.datastore['assetcol/num_taxonomies'] = ( self.assetcol.num_taxonomies_by_site()) logging.info('Extracted %d/%d assets', len(self.assetcol), len(assetcol)) else: self.assetcol = assetcol else: # no exposure self.sitecol = haz_sitecol if self.sitecol: logging.info('Read %d hazard sites', len(self.sitecol)) if oq_hazard: parent = self.datastore.parent if 'assetcol' in parent: check_time_event(oq, parent['assetcol'].occupancy_periods) elif oq.job_type == 'risk' and 'exposure' not in oq.inputs: raise ValueError('Missing exposure both in hazard and risk!') if oq_hazard.time_event and oq_hazard.time_event != oq.time_event: raise ValueError( 'The risk configuration file has time_event=%s but the ' 'hazard was computed with time_event=%s' % (oq.time_event, oq_hazard.time_event)) if oq.job_type == 'risk': taxonomies = set(taxo for taxo in self.assetcol.tagcol.taxonomy if taxo != '?') # check that we are covering all the taxonomies in the exposure missing = taxonomies - set(self.riskmodel.taxonomies) if self.riskmodel and missing: raise RuntimeError('The exposure contains the taxonomies %s ' 'which are not in the risk model' % missing) # same check for the consequence models, if any consequence_models = riskmodels.get_risk_models(oq, 'consequence') for lt, cm in consequence_models.items(): missing = taxonomies - set(cm) if missing: raise ValueError('Missing consequenceFunctions for %s' % ' '.join(missing)) if hasattr(self, 'sitecol') and self.sitecol: self.datastore['sitecol'] = self.sitecol.complete # used in the risk calculators self.param = dict(individual_curves=oq.individual_curves, avg_losses=oq.avg_losses) # store the `exposed_value` if there is an exposure if 'exposed_value' not in set(self.datastore) and hasattr( self, 'assetcol'): self.datastore['exposed_value'] = self.assetcol.agg_value( *oq.aggregate_by)
def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ oq = self.oqparam if oq.hazard_calculation_id and not oq.compare_with_classical: with util.read(self.oqparam.hazard_calculation_id) as parent: self.full_lt = parent['full_lt'] self.calc_stats() # post-processing return {} srcfilter = self.src_filter() srcs = self.csm.get_sources() calc_times = parallel.Starmap.apply( preclassical, (srcs, srcfilter), concurrent_tasks=oq.concurrent_tasks or 1, num_cores=oq.num_cores, h5=self.datastore.hdf5).reduce() if oq.calculation_mode == 'preclassical': self.store_source_info(calc_times, nsites=True) self.datastore['full_lt'] = self.csm.full_lt self.datastore.swmr_on() # fixes HDF5 error in build_hazard return self.update_source_info(calc_times, nsites=True) # if OQ_SAMPLE_SOURCES is set extract one source for group ss = os.environ.get('OQ_SAMPLE_SOURCES') if ss: for sg in self.csm.src_groups: if not sg.atomic: srcs = [src for src in sg if src.nsites] sg.sources = [srcs[0]] mags = self.datastore['source_mags'] # by TRT if len(mags) == 0: # everything was discarded raise RuntimeError('All sources were discarded!?') gsims_by_trt = self.full_lt.get_gsims_by_trt() mags_by_trt = {} for trt in mags: mags_by_trt[trt] = mags[trt][()] psd = oq.pointsource_distance if psd is not None: psd.interp(mags_by_trt) for trt, dic in psd.ddic.items(): # the sum is zero for {'default': [(1, 0), (10, 0)]} if sum(dic.values()): it = list(dic.items()) md = '%s->%d ... %s->%d' % (it[0] + it[-1]) logging.info('ps_dist %s: %s', trt, md) imts_with_period = [imt for imt in oq.imtls if imt == 'PGA' or imt.startswith('SA')] imts_ok = len(imts_with_period) == len(oq.imtls) if (imts_ok and psd and psd.suggested()) or ( imts_ok and oq.minimum_intensity): aw = get_effect(mags_by_trt, self.sitecol.one(), gsims_by_trt, oq) if psd: dic = {trt: [(float(mag), int(dst)) for mag, dst in psd.ddic[trt].items()] for trt in psd.ddic if trt != 'default'} logging.info('pointsource_distance=\n%s', pprint.pformat(dic)) if len(vars(aw)) > 1: # more than _extra self.datastore['effect_by_mag_dst'] = aw smap = parallel.Starmap(classical, h5=self.datastore.hdf5, num_cores=oq.num_cores) smap.monitor.save('srcfilter', self.src_filter()) rlzs_by_gsim_list = self.submit_tasks(smap) rlzs_by_g = [] for rlzs_by_gsim in rlzs_by_gsim_list: for rlzs in rlzs_by_gsim.values(): rlzs_by_g.append(rlzs) self.datastore['rlzs_by_g'] = [U32(rlzs) for rlzs in rlzs_by_g] acc0 = self.acc0() # create the rup/ datasets BEFORE swmr_on() poes_shape = (self.N, len(oq.imtls.array), len(rlzs_by_g)) # NLG size = numpy.prod(poes_shape) * 8 logging.info('Requiring %s for ProbabilityMap of shape %s', humansize(size), poes_shape) self.datastore.create_dset('_poes', F64, poes_shape) self.datastore.swmr_on() smap.h5 = self.datastore.hdf5 self.calc_times = AccumDict(accum=numpy.zeros(3, F32)) try: acc = smap.reduce(self.agg_dicts, acc0) self.store_rlz_info(acc.eff_ruptures) finally: with self.monitor('store source_info'): self.store_source_info(self.calc_times) if self.by_task: logging.info('Storing by_task information') num_tasks = max(self.by_task) + 1, er = self.datastore.create_dset('by_task/eff_ruptures', U32, num_tasks) es = self.datastore.create_dset('by_task/eff_sites', U32, num_tasks) si = self.datastore.create_dset('by_task/srcids', hdf5.vstr, num_tasks, fillvalue=None) for task_no, rec in self.by_task.items(): effrups, effsites, srcids = rec er[task_no] = effrups es[task_no] = effsites si[task_no] = ' '.join(srcids) self.by_task.clear() self.numrups = sum(arr[0] for arr in self.calc_times.values()) numsites = sum(arr[1] for arr in self.calc_times.values()) logging.info('Effective number of ruptures: {:_d}/{:_d}'.format( int(self.numrups), self.totrups)) logging.info('Effective number of sites per rupture: %d', numsites / self.numrups) if psd: psdist = max(max(psd.ddic[trt].values()) for trt in psd.ddic) if psdist and self.maxradius >= psdist / 2: logging.warning('The pointsource_distance of %d km is too ' 'small compared to a maxradius of %d km', psdist, self.maxradius) self.calc_times.clear() # save a bit of memory return acc
def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ oq = self.oqparam if oq.hazard_calculation_id and not oq.compare_with_classical: with util.read(self.oqparam.hazard_calculation_id) as parent: self.csm_info = parent['csm_info'] self.calc_stats() # post-processing return {} mags = self.datastore['source_mags'][()] gsims_by_trt = self.csm_info.get_gsims_by_trt() dist_bins = { trt: oq.maximum_distance.get_dist_bins(trt) for trt in gsims_by_trt } if oq.minimum_intensity and len(self.sitecol) == 1 and len(mags): logging.info('Computing effect of the ruptures') mon = self.monitor('rupture effect') effect = parallel.Starmap.apply( get_effect, (mags, self.sitecol, gsims_by_trt, oq.maximum_distance, oq.imtls, mon)).reduce() self.datastore['effect'] = effect self.datastore.set_attrs('effect', **dist_bins) threshold = getdefault(oq.minimum_intensity, list(oq.imtls)[-1]) self.effect = { trt: Effect({mag: effect[mag][:, t] for mag in effect}, dists=dist_bins[trt], threshold=threshold) for t, trt in enumerate(gsims_by_trt) } else: self.effect = {} if oq.calculation_mode == 'preclassical' and self.N == 1: mags = sorted(set('%.3f' % mag for mag in mags)) smap = parallel.Starmap(ruptures_by_mag_dist) for func, args in self.gen_task_queue(): smap.submit(args) counts = smap.reduce() ndists = oq.maximum_distance.get_dist_bins.__defaults__[0] for mag, mag in enumerate(mags): arr = numpy.zeros((ndists, len(gsims_by_trt)), U32) for trti, trt in enumerate(gsims_by_trt): try: arr[:, trti] = counts[trt][mag] except KeyError: pass self.datastore['rups_by_mag_dist/' + mag] = arr self.datastore.set_attrs('rups_by_mag_dist', **dist_bins) self.datastore['csm_info'] = self.csm_info return {} smap = parallel.Starmap(self.core_task.__func__) smap.task_queue = list(self.gen_task_queue()) # really fast acc0 = self.acc0() # create the rup/ datasets BEFORE swmr_on() self.datastore.swmr_on() smap.h5 = self.datastore.hdf5 self.calc_times = AccumDict(accum=numpy.zeros(3, F32)) self.maxdists = [] try: acc = smap.get_results().reduce(self.agg_dicts, acc0) self.store_rlz_info(acc.eff_ruptures) finally: if self.maxdists: maxdist = numpy.mean(self.maxdists) logging.info( 'Using effective maximum distance for ' 'point sources %d km', maxdist) with self.monitor('store source_info'): self.store_source_info(self.calc_times) if self.sources_by_task: num_tasks = max(self.sources_by_task) + 1 sbt = numpy.zeros(num_tasks, [('eff_ruptures', U32), ('eff_sites', U32), ('srcids', hdf5.vuint32)]) for task_no in range(num_tasks): sbt[task_no] = self.sources_by_task.get( task_no, (0, 0, U32([]))) self.datastore['sources_by_task'] = sbt self.sources_by_task.clear() numrups = sum(arr[0] for arr in self.calc_times.values()) if self.totrups != numrups: logging.info('Considered %d/%d ruptures', numrups, self.totrups) self.calc_times.clear() # save a bit of memory return acc
def _read_risk_data(self): # read the exposure (if any), the risk model (if any) and then the # site collection, possibly extracted from the exposure. oq = self.oqparam self.load_riskmodel() # must be called first if oq.hazard_calculation_id: with util.read(oq.hazard_calculation_id) as dstore: haz_sitecol = dstore['sitecol'].complete else: haz_sitecol = readinput.get_site_collection(oq) if hasattr(self, 'rup'): # for scenario we reduce the site collection to the sites # within the maximum distance from the rupture haz_sitecol, _dctx = self.cmaker.filter( haz_sitecol, self.rup) haz_sitecol.make_complete() if 'site_model' in oq.inputs: self.datastore['site_model'] = readinput.get_site_model(oq) oq_hazard = (self.datastore.parent['oqparam'] if self.datastore.parent else None) if 'exposure' in oq.inputs: exposure = self.read_exposure(haz_sitecol) self.datastore['assetcol'] = self.assetcol self.datastore['assetcol/num_taxonomies'] = ( self.assetcol.num_taxonomies_by_site()) if hasattr(readinput.exposure, 'exposures'): self.datastore['assetcol/exposures'] = ( numpy.array(exposure.exposures, hdf5.vstr)) elif 'assetcol' in self.datastore.parent: assetcol = self.datastore.parent['assetcol'] if oq.region: region = wkt.loads(oq.region) self.sitecol = haz_sitecol.within(region) if oq.shakemap_id or 'shakemap' in oq.inputs: self.sitecol, self.assetcol = self.read_shakemap( haz_sitecol, assetcol) self.datastore['assetcol'] = self.assetcol logging.info('Extracted %d/%d assets', len(self.assetcol), len(assetcol)) nsites = len(self.sitecol) if (oq.spatial_correlation != 'no' and nsites > MAXSITES): # hard-coded, heuristic raise ValueError(CORRELATION_MATRIX_TOO_LARGE % nsites) elif hasattr(self, 'sitecol') and general.not_equal( self.sitecol.sids, haz_sitecol.sids): self.assetcol = assetcol.reduce(self.sitecol) self.datastore['assetcol'] = self.assetcol self.datastore['assetcol/num_taxonomies'] = ( self.assetcol.num_taxonomies_by_site()) logging.info('Extracted %d/%d assets', len(self.assetcol), len(assetcol)) else: self.assetcol = assetcol else: # no exposure self.sitecol = haz_sitecol if self.sitecol: logging.info('Read %d hazard sites', len(self.sitecol)) if oq_hazard: parent = self.datastore.parent if 'assetcol' in parent: check_time_event(oq, parent['assetcol'].occupancy_periods) elif oq.job_type == 'risk' and 'exposure' not in oq.inputs: raise ValueError('Missing exposure both in hazard and risk!') if oq_hazard.time_event and oq_hazard.time_event != oq.time_event: raise ValueError( 'The risk configuration file has time_event=%s but the ' 'hazard was computed with time_event=%s' % ( oq.time_event, oq_hazard.time_event)) if oq.job_type == 'risk': taxonomies = set(taxo for taxo in self.assetcol.tagcol.taxonomy if taxo != '?') # check that we are covering all the taxonomies in the exposure missing = taxonomies - set(self.riskmodel.taxonomies) if self.riskmodel and missing: raise RuntimeError('The exposure contains the taxonomies %s ' 'which are not in the risk model' % missing) # same check for the consequence models, if any if any(key.endswith('_consequence') for key in oq.inputs): for taxonomy in taxonomies: cfs = self.riskmodel[taxonomy].consequence_functions if not cfs: raise ValueError( 'Missing consequenceFunctions for %s' % taxonomy) if hasattr(self, 'sitecol') and self.sitecol: self.datastore['sitecol'] = self.sitecol.complete # used in the risk calculators self.param = dict(individual_curves=oq.individual_curves, avg_losses=oq.avg_losses) # store the `exposed_value` if there is an exposure if 'exposed_value' not in set(self.datastore) and hasattr( self, 'assetcol'): self.datastore['exposed_value'] = self.assetcol.agg_value( *oq.aggregate_by)
def execute(self): oq = self.oqparam self.set_param() self.offset = 0 if oq.hazard_calculation_id: # from ruptures self.datastore.parent = util.read(oq.hazard_calculation_id) elif hasattr(self, 'csm'): # from sources self.build_events_from_sources() if (oq.ground_motion_fields is False and oq.hazard_curves_from_gmfs is False): return {} elif 'rupture_model' not in oq.inputs: # download ShakeMap logging.warning( 'There is no rupture_model, the calculator will just ' 'import data without performing any calculation') fake = logictree.FullLogicTree.fake() self.datastore['full_lt'] = fake # needed to expose the outputs return {} else: # scenario self._read_scenario_ruptures() if (oq.ground_motion_fields is False and oq.hazard_curves_from_gmfs is False): return {} N = len(self.sitecol.complete) if oq.ground_motion_fields: M = len(oq.get_primary_imtls()) nrups = len(self.datastore['ruptures']) base.create_gmf_data(self.datastore, M, oq.get_sec_imts()) self.datastore.create_dset('gmf_data/sigma_epsilon', sig_eps_dt(oq.imtls)) self.datastore.create_dset('gmf_data/events_by_sid', U32, (N, )) self.datastore.create_dset('gmf_data/time_by_rup', time_dt, (nrups, ), fillvalue=None) # compute_gmfs in parallel nr = len(self.datastore['ruptures']) self.datastore.swmr_on() logging.info('Reading {:_d} ruptures'.format(nr)) iterargs = ((rgetter, self.param) for rgetter in gen_rupture_getters( self.datastore, oq.concurrent_tasks)) smap = parallel.Starmap(self.core_task.__func__, iterargs, h5=self.datastore.hdf5) smap.monitor.save('srcfilter', self.srcfilter) acc = smap.reduce(self.agg_dicts, self.acc0()) if 'gmf_data' not in self.datastore: return acc if oq.ground_motion_fields: with self.monitor('saving avg_gmf', measuremem=True): self.weights = self.datastore['weights'][:] self.rlzs = self.datastore['events']['rlz_id'] self.num_events = numpy.bincount(self.rlzs) # events by rlz avg_gmf = { imt: numpy.zeros(self.N, F32) for imt in oq.all_imts() } rel_events = self.save_avg_gmf(avg_gmf) self.datastore.create_dframe('avg_gmf', avg_gmf.items()) e = len(rel_events) if e == 0: raise RuntimeError('No GMFs were generated, perhaps they were ' 'all below the minimum_intensity threshold') elif e < len(self.datastore['events']): self.datastore['relevant_events'] = rel_events logging.info('Stored %d relevant event IDs', e) return acc
def _read_risk_data(self): # read the risk model (if any), the exposure (if any) and then the # site collection, possibly extracted from the exposure. oq = self.oqparam self.load_crmodel() # must be called first if (not oq.imtls and 'shakemap' not in oq.inputs and oq.ground_motion_fields): raise InvalidFile('There are no intensity measure types in %s' % oq.inputs['job_ini']) if oq.hazard_calculation_id: with util.read(oq.hazard_calculation_id) as dstore: haz_sitecol = dstore['sitecol'].complete if ('amplification' in oq.inputs and 'ampcode' not in haz_sitecol.array.dtype.names): haz_sitecol.add_col('ampcode', site.ampcode_dt) else: haz_sitecol = readinput.get_site_collection(oq, self.datastore) if hasattr(self, 'rup'): # for scenario we reduce the site collection to the sites # within the maximum distance from the rupture haz_sitecol, _dctx = self.cmaker.filter(haz_sitecol, self.rup) haz_sitecol.make_complete() if 'site_model' in oq.inputs: self.datastore['site_model'] = readinput.get_site_model(oq) oq_hazard = (self.datastore.parent['oqparam'] if self.datastore.parent else None) if 'exposure' in oq.inputs: exposure = self.read_exposure(haz_sitecol) self.datastore['assetcol'] = self.assetcol self.datastore['cost_calculator'] = exposure.cost_calculator if hasattr(readinput.exposure, 'exposures'): self.datastore['assetcol/exposures'] = (numpy.array( exposure.exposures, hdf5.vstr)) elif 'assetcol' in self.datastore.parent: assetcol = self.datastore.parent['assetcol'] if oq.region: region = wkt.loads(oq.region) self.sitecol = haz_sitecol.within(region) if oq.shakemap_id or 'shakemap' in oq.inputs: self.sitecol, self.assetcol = self.read_shakemap( haz_sitecol, assetcol) self.datastore['sitecol'] = self.sitecol self.datastore['assetcol'] = self.assetcol logging.info('Extracted %d/%d assets', len(self.assetcol), len(assetcol)) nsites = len(self.sitecol) if (oq.spatial_correlation != 'no' and nsites > MAXSITES): # hard-coded, heuristic raise ValueError(CORRELATION_MATRIX_TOO_LARGE % nsites) elif hasattr(self, 'sitecol') and general.not_equal( self.sitecol.sids, haz_sitecol.sids): self.assetcol = assetcol.reduce(self.sitecol) self.datastore['assetcol'] = self.assetcol logging.info('Extracted %d/%d assets', len(self.assetcol), len(assetcol)) else: self.assetcol = assetcol else: # no exposure self.sitecol = haz_sitecol if self.sitecol and oq.imtls: logging.info('Read N=%d hazard sites and L=%d hazard levels', len(self.sitecol), oq.imtls.size) if oq_hazard: parent = self.datastore.parent if 'assetcol' in parent: check_time_event(oq, parent['assetcol'].occupancy_periods) elif oq.job_type == 'risk' and 'exposure' not in oq.inputs: raise ValueError('Missing exposure both in hazard and risk!') if oq_hazard.time_event and oq_hazard.time_event != oq.time_event: raise ValueError( 'The risk configuration file has time_event=%s but the ' 'hazard was computed with time_event=%s' % (oq.time_event, oq_hazard.time_event)) if oq.job_type == 'risk': tmap_arr, tmap_lst = logictree.taxonomy_mapping( self.oqparam.inputs.get('taxonomy_mapping'), self.assetcol.tagcol.taxonomy) self.crmodel.tmap = tmap_lst if len(tmap_arr): self.datastore['taxonomy_mapping'] = tmap_arr taxonomies = set(taxo for items in self.crmodel.tmap for taxo, weight in items if taxo != '?') # check that we are covering all the taxonomies in the exposure missing = taxonomies - set(self.crmodel.taxonomies) if self.crmodel and missing: raise RuntimeError('The exposure contains the taxonomies %s ' 'which are not in the risk model' % missing) if len(self.crmodel.taxonomies) > len(taxonomies): logging.info('Reducing risk model from %d to %d taxonomies', len(self.crmodel.taxonomies), len(taxonomies)) self.crmodel = self.crmodel.reduce(taxonomies) self.crmodel.tmap = tmap_lst self.crmodel.reduce_cons_model(self.assetcol.tagcol) if hasattr(self, 'sitecol') and self.sitecol: if 'site_model' in oq.inputs: assoc_dist = (oq.region_grid_spacing * 1.414 if oq.region_grid_spacing else 5 ) # Graeme's 5km sm = readinput.get_site_model(oq) self.sitecol.complete.assoc(sm, assoc_dist) self.datastore['sitecol'] = self.sitecol # store amplification functions if any self.af = None if 'amplification' in oq.inputs: logging.info('Reading %s', oq.inputs['amplification']) df = readinput.get_amplification(oq) check_amplification(df, self.sitecol) self.amplifier = Amplifier(oq.imtls, df, oq.soil_intensities) if oq.amplification_method == 'kernel': # TODO: need to add additional checks on the main calculation # methodology since the kernel method is currently tested only # for classical PSHA self.af = AmplFunction.from_dframe(df) self.amplifier = None else: self.amplifier = None # manage secondary perils sec_perils = oq.get_sec_perils() for sp in sec_perils: sp.prepare(self.sitecol) # add columns as needed mal = { lt: getdefault(oq.minimum_asset_loss, lt) for lt in oq.loss_names } if mal: logging.info('minimum_asset_loss=%s', mal) self.param = dict(individual_curves=oq.individual_curves, ps_grid_spacing=oq.ps_grid_spacing, collapse_level=oq.collapse_level, split_sources=oq.split_sources, avg_losses=oq.avg_losses, amplifier=self.amplifier, sec_perils=sec_perils, ses_seed=oq.ses_seed, minimum_asset_loss=mal) # compute exposure stats if hasattr(self, 'assetcol'): save_agg_values(self.datastore, self.assetcol, oq.loss_names, oq.aggregate_by)
def execute(self): oq = self.oqparam self.set_param() self.offset = 0 if oq.hazard_calculation_id: # from ruptures self.datastore.parent = util.read(oq.hazard_calculation_id) elif hasattr(self, 'csm'): # from sources self.build_events_from_sources() if (oq.ground_motion_fields is False and oq.hazard_curves_from_gmfs is False): return {} elif 'rupture_model' not in oq.inputs: # download ShakeMap logging.warning( 'There is no rupture_model, the calculator will just ' 'import data without performing any calculation') fake = logictree.FullLogicTree.fake() self.datastore['full_lt'] = fake # needed to expose the outputs return {} else: # scenario self._read_scenario_ruptures() if (oq.ground_motion_fields is False and oq.hazard_curves_from_gmfs is False): return {} if not oq.imtls: raise InvalidFile('There are no intensity measure types in %s' % oq.inputs['job_ini']) N = len(self.sitecol.complete) if oq.ground_motion_fields: M = len(oq.imtls) nrups = len(self.datastore['ruptures']) base.create_gmf_data(self.datastore, M, self.param['sec_perils']) self.datastore.create_dset('gmf_data/sigma_epsilon', sig_eps_dt(oq.imtls)) self.datastore.create_dset('gmf_data/events_by_sid', U32, (N, )) self.datastore.create_dset('gmf_data/time_by_rup', time_dt, (nrups, ), fillvalue=None) # compute_gmfs in parallel nr = len(self.datastore['ruptures']) self.datastore.swmr_on() logging.info('Reading {:_d} ruptures'.format(nr)) iterargs = ((rgetter, self.param) for rgetter in gen_rupture_getters( self.datastore, oq.concurrent_tasks)) smap = parallel.Starmap(self.core_task.__func__, iterargs, h5=self.datastore.hdf5, num_cores=oq.num_cores) smap.monitor.save('srcfilter', self.srcfilter) acc = smap.reduce(self.agg_dicts, self.acc0()) if 'gmf_data' not in self.datastore: return acc if oq.ground_motion_fields: eids = self.datastore['gmf_data/eid'][:] rel_events = numpy.unique(eids) e = len(rel_events) if e == 0: raise RuntimeError('No GMFs were generated, perhaps they were ' 'all below the minimum_intensity threshold') elif e < len(self.datastore['events']): self.datastore['relevant_events'] = rel_events logging.info('Stored %d relevant event IDs', e) return acc
def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ oq = self.oqparam if oq.hazard_calculation_id and not oq.compare_with_classical: with util.read(self.oqparam.hazard_calculation_id) as parent: self.full_lt = parent['full_lt'] self.calc_stats() # post-processing return {} mags = self.datastore['source_mags'] # by TRT if len(mags) == 0: # everything was discarded raise RuntimeError('All sources were discarded!?') gsims_by_trt = self.full_lt.get_gsims_by_trt() if oq.pointsource_distance is not None: for trt in gsims_by_trt: oq.pointsource_distance[trt] = getdefault( oq.pointsource_distance, trt) mags_by_trt = {} for trt in mags: mags_by_trt[trt] = mags[trt][()] imts_with_period = [ imt for imt in oq.imtls if imt == 'PGA' or imt.startswith('SA') ] imts_ok = len(imts_with_period) == len(oq.imtls) if (imts_ok and oq.pointsource_distance and oq.pointsource_distance.suggested()) or ( imts_ok and oq.minimum_intensity): aw, self.psd = get_effect(mags_by_trt, self.sitecol.one(), gsims_by_trt, oq) if len(vars(aw)) > 1: # more than _extra self.datastore['effect_by_mag_dst'] = aw elif oq.pointsource_distance: self.psd = oq.pointsource_distance.interp(mags_by_trt) else: self.psd = {} smap = parallel.Starmap(classical, h5=self.datastore.hdf5, num_cores=oq.num_cores) self.submit_tasks(smap) acc0 = self.acc0() # create the rup/ datasets BEFORE swmr_on() self.datastore.swmr_on() smap.h5 = self.datastore.hdf5 self.calc_times = AccumDict(accum=numpy.zeros(3, F32)) try: acc = smap.reduce(self.agg_dicts, acc0) self.store_rlz_info(acc.eff_ruptures) finally: with self.monitor('store source_info'): self.store_source_info(self.calc_times) if self.by_task: logging.info('Storing by_task information') num_tasks = max(self.by_task) + 1, er = self.datastore.create_dset('by_task/eff_ruptures', U32, num_tasks) es = self.datastore.create_dset('by_task/eff_sites', U32, num_tasks) si = self.datastore.create_dset('by_task/srcids', hdf5.vstr, num_tasks, fillvalue=None) for task_no, rec in self.by_task.items(): effrups, effsites, srcids = rec er[task_no] = effrups es[task_no] = effsites si[task_no] = ' '.join(srcids) self.by_task.clear() self.numrups = sum(arr[0] for arr in self.calc_times.values()) numsites = sum(arr[1] for arr in self.calc_times.values()) logging.info('Effective number of ruptures: {:_d}/{:_d}'.format( int(self.numrups), self.totrups)) logging.info('Effective number of sites per rupture: %d', numsites / self.numrups) if self.psd: psdist = max(max(self.psd[trt].values()) for trt in self.psd) if psdist != -1 and self.maxradius >= psdist / 2: logging.warning( 'The pointsource_distance of %d km is too ' 'small compared to a maxradius of %d km', psdist, self.maxradius) self.calc_times.clear() # save a bit of memory return acc
def pre_execute(self): """ Check if there is a previous calculation ID. If yes, read the inputs by retrieving the previous calculation; if not, read the inputs directly. """ oq = self.oqparam if 'gmfs' in oq.inputs: # read hazard from file assert not oq.hazard_calculation_id, ( 'You cannot use --hc together with gmfs_file') self.read_inputs() save_gmfs(self) elif 'hazard_curves' in oq.inputs: # read hazard from file assert not oq.hazard_calculation_id, ( 'You cannot use --hc together with hazard_curves') haz_sitecol = readinput.get_site_collection(oq) # NB: horrible: get_site_collection calls get_pmap_from_nrml # that sets oq.investigation_time, so it must be called first self.load_riskmodel() # must be after get_site_collection self.read_exposure(haz_sitecol) # define .assets_by_site self.datastore['poes/grp-00'] = fix_ones(readinput.pmap) self.datastore['sitecol'] = self.sitecol self.datastore['assetcol'] = self.assetcol self.datastore['csm_info'] = fake = source.CompositionInfo.fake() self.rlzs_assoc = fake.get_rlzs_assoc() elif oq.hazard_calculation_id: parent = util.read(oq.hazard_calculation_id) self.check_precalc(parent['oqparam'].calculation_mode) self.datastore.parent = parent # copy missing parameters from the parent params = { name: value for name, value in vars(parent['oqparam']).items() if name not in vars(self.oqparam) } self.save_params(**params) self.read_inputs() oqp = parent['oqparam'] if oqp.investigation_time != oq.investigation_time: raise ValueError( 'The parent calculation was using investigation_time=%s' ' != %s' % (oqp.investigation_time, oq.investigation_time)) if oqp.minimum_intensity != oq.minimum_intensity: raise ValueError( 'The parent calculation was using minimum_intensity=%s' ' != %s' % (oqp.minimum_intensity, oq.minimum_intensity)) missing_imts = set(oq.risk_imtls) - set(oqp.imtls) if missing_imts: raise ValueError( 'The parent calculation is missing the IMT(s) %s' % ', '.join(missing_imts)) elif self.__class__.precalc: calc = calculators[self.__class__.precalc](self.oqparam, self.datastore.calc_id) calc.run() self.param = calc.param self.sitecol = calc.sitecol self.assetcol = calc.assetcol self.riskmodel = calc.riskmodel self.rlzs_assoc = calc.rlzs_assoc if hasattr(calc, 'csm'): # no scenario self.csm = calc.csm else: self.read_inputs() if self.riskmodel: self.save_riskmodel()
def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ oq = self.oqparam if oq.hazard_calculation_id and not oq.compare_with_classical: with util.read(self.oqparam.hazard_calculation_id) as parent: self.csm_info = parent['csm_info'] self.calc_stats() # post-processing return {} mags = self.datastore['source_mags'][()] if len(mags) == 0: # everything was discarded raise RuntimeError('All sources were discarded!?') gsims_by_trt = self.csm_info.get_gsims_by_trt() dist_bins = {trt: oq.maximum_distance.get_dist_bins(trt) for trt in gsims_by_trt} # computing the effect make sense only if all IMTs have the same # unity of measure; for simplicity we will consider only PGA and SA self.effect = {} imts_with_period = [imt for imt in oq.imtls if imt == 'PGA' or imt.startswith('SA')] imts_ok = len(imts_with_period) == len(oq.imtls) if len(self.sitecol) >= oq.max_sites_disagg and imts_ok: logging.info('Computing effect of the ruptures') mon = self.monitor('rupture effect') effect = parallel.Starmap.apply( get_effect_by_mag, (mags, self.sitecol.one(), gsims_by_trt, oq.maximum_distance, oq.imtls, mon)).reduce() self.datastore['effect_by_mag_dst_trt'] = effect self.datastore.set_attrs('effect_by_mag_dst_trt', **dist_bins) self.effect.update({ trt: Effect({mag: effect[mag][:, t] for mag in effect}, dist_bins[trt]) for t, trt in enumerate(gsims_by_trt)}) minint = oq.minimum_intensity.get('default', 0) for trt, eff in self.effect.items(): if minint: oq.maximum_distance.magdist[trt] = eff.dist_by_mag(minint) # replace pointsource_distance with a dict trt -> mag -> dst if oq.pointsource_distance['default']: oq.pointsource_distance[trt] = eff.dist_by_mag( eff.collapse_value(oq.pointsource_distance['default'])) elif oq.pointsource_distance['default']: # replace pointsource_distance with a dict trt -> mag -> dst for trt in gsims_by_trt: try: dst = getdefault(oq.pointsource_distance, trt) except TypeError: # 'NoneType' object is not subscriptable dst = getdefault(oq.maximum_distance, trt) oq.pointsource_distance[trt] = {mag: dst for mag in mags} smap = parallel.Starmap( self.core_task.__func__, h5=self.datastore.hdf5, num_cores=oq.num_cores) smap.task_queue = list(self.gen_task_queue()) # really fast acc0 = self.acc0() # create the rup/ datasets BEFORE swmr_on() self.datastore.swmr_on() smap.h5 = self.datastore.hdf5 self.calc_times = AccumDict(accum=numpy.zeros(3, F32)) try: acc = smap.get_results().reduce(self.agg_dicts, acc0) self.store_rlz_info(acc.eff_ruptures) finally: with self.monitor('store source_info'): self.store_source_info(self.calc_times) if self.by_task: logging.info('Storing by_task information') num_tasks = max(self.by_task) + 1, er = self.datastore.create_dset('by_task/eff_ruptures', U32, num_tasks) es = self.datastore.create_dset('by_task/eff_sites', U32, num_tasks) si = self.datastore.create_dset('by_task/srcids', hdf5.vuint32, num_tasks, fillvalue=None) for task_no, rec in self.by_task.items(): effrups, effsites, srcids = rec er[task_no] = effrups es[task_no] = effsites si[task_no] = srcids self.by_task.clear() self.numrups = sum(arr[0] for arr in self.calc_times.values()) numsites = sum(arr[1] for arr in self.calc_times.values()) logging.info('Effective number of ruptures: %d/%d', self.numrups, self.totrups) logging.info('Effective number of sites per rupture: %d', numsites / self.numrups) self.calc_times.clear() # save a bit of memory return acc