def combine_mean_curves(calc_big, calc_small): """ Combine the hazard curves coming from two different calculations. The result will be the hazard curves of calc_big, updated on the sites in common with calc_small with the PoEs of calc_small. For instance: calc_big = USA, calc_small = California """ dstore_big = datastore.read(calc_big) dstore_small = datastore.read(calc_small) sitecol_big = dstore_big['sitecol'] sitecol_small = dstore_small['sitecol'] site_id_big = {(lon, lat): sid for sid, lon, lat in zip( sitecol_big.sids, sitecol_big.lons, sitecol_big.lats)} site_id_small = {(lon, lat): sid for sid, lon, lat in zip( sitecol_small.sids, sitecol_small.lons, sitecol_small.lats)} common = set(site_id_big) & set(site_id_small) if not common: raise RuntimeError('There are no common sites between calculation ' '%d and %d' % (calc_big, calc_small)) sids_small = [site_id_small[lonlat] for lonlat in common] pmap_big = PmapGetter(dstore_big).get_mean() # USA pmap_small = PmapGetter(dstore_big, sids=sids_small).get_mean() # Cal for lonlat in common: pmap_big[site_id_big[lonlat]] |= pmap_small.get( site_id_small[lonlat], 0) out = 'combine_%d_%d.hdf5' % (calc_big, calc_small) with hdf5.File(out, 'w') as h5: h5['hcurves/mean'] = pmap_big h5['oqparam'] = dstore_big['oqparam'] h5['sitecol'] = dstore_big['sitecol'] print('Generated %s' % out)
def run_calc(self, testfile, job_ini, **kw): """ Return the outputs of the calculation as a dictionary """ inis = job_ini.split(',') assert len(inis) in (1, 2), inis self.calc = self.get_calc(testfile, inis[0], **kw) self.edir = tempfile.mkdtemp() with self.calc._monitor: result = self.calc.run(export_dir=self.edir) duration = {inis[0]: self.calc._monitor.duration} if len(inis) == 2: hc_id = self.calc.datastore.calc_id calc = self.get_calc( testfile, inis[1], hazard_calculation_id=str(hc_id), **kw) with calc._monitor: exported = calc.run(export_dir=self.edir) result.update(exported) duration[inis[1]] = calc._monitor.duration self.calc = calc # reopen datastore, since some tests need to export from it dstore = datastore.read(self.calc.datastore.calc_id) self.calc.datastore = dstore self.__class__.duration += duration return result
def get_rupdict(self): """ :returns: a dictionary with the parameters of the rupture """ assert len(self.rup_array) == 1, 'Please specify a slice of length 1' dic = {'trt': self.trt, 'samples': self.samples} with datastore.read(self.filename) as dstore: rupgeoms = dstore['rupgeoms'] source_ids = dstore['source_info']['source_id'] rec = self.rup_array[0] geom = rupgeoms[rec['gidx1']:rec['gidx2']].reshape( rec['sy'], rec['sz']) dic['lons'] = geom['lon'] dic['lats'] = geom['lat'] dic['deps'] = geom['depth'] rupclass, surclass = self.code2cls[rec['code']] dic['rupture_class'] = rupclass.__name__ dic['surface_class'] = surclass.__name__ dic['hypo'] = rec['hypo'] dic['occurrence_rate'] = rec['occurrence_rate'] dic['grp_id'] = rec['grp_id'] dic['n_occ'] = rec['n_occ'] dic['serial'] = rec['serial'] dic['mag'] = rec['mag'] dic['srcid'] = source_ids[rec['srcidx']] return dic
def read(calc_id, username=None): """ :param calc_id: a calculation ID :param username: if given, restrict the search to the user's calculations :returns: the associated DataStore instance """ if isinstance(calc_id, str) or calc_id < 0 and not username: # get the last calculation in the datastore of the current user return datastore.read(calc_id) job = logs.dbcmd('get_job', calc_id, username) if job: return datastore.read(job.ds_calc_dir + '.hdf5') else: # calc_id can be present in the datastore and not in the database: # this happens if the calculation was run with `oq run` return datastore.read(calc_id)
def importcalc(calc_id): """ Import a remote calculation into the local database. server, username and password must be specified in an openquake.cfg file. NB: calc_id can be a local pathname to a datastore not already present in the database: in that case it is imported in the db. """ dbserver.ensure_on() try: calc_id = int(calc_id) except ValueError: # assume calc_id is a pathname calc_id, datadir = datastore.extract_calc_id_datadir(calc_id) status = 'complete' remote = False else: remote = True job = logs.dbcmd('get_job', calc_id) if job is not None: sys.exit('There is already a job #%d in the local db' % calc_id) if remote: datadir = datastore.get_datadir() webex = WebExtractor(calc_id) status = webex.status['status'] hc_id = webex.oqparam.hazard_calculation_id if hc_id: sys.exit('The job has a parent (#%d) and cannot be ' 'downloaded' % hc_id) webex.dump('%s/calc_%d.hdf5' % (datadir, calc_id)) webex.close() with datastore.read(calc_id) as dstore: engine.expose_outputs(dstore, status=status) logging.info('Imported calculation %d successfully', calc_id)
def compare_mean_curves(calc_ref, calc, nsigma=3): """ Compare the hazard curves coming from two different calculations. """ dstore_ref = datastore.read(calc_ref) dstore = datastore.read(calc) imtls = dstore_ref['oqparam'].imtls if dstore['oqparam'].imtls != imtls: raise RuntimeError('The IMTs and levels are different between ' 'calculation %d and %d' % (calc_ref, calc)) sitecol_ref = dstore_ref['sitecol'] sitecol = dstore['sitecol'] site_id_ref = {(lon, lat): sid for sid, lon, lat in zip( sitecol_ref.sids, sitecol_ref.lons, sitecol_ref.lats)} site_id = {(lon, lat): sid for sid, lon, lat in zip( sitecol.sids, sitecol.lons, sitecol.lats)} common = set(site_id_ref) & set(site_id) if not common: raise RuntimeError('There are no common sites between calculation ' '%d and %d' % (calc_ref, calc)) pmap_ref = PmapGetter(dstore_ref, sids=[site_id_ref[lonlat] for lonlat in common]).get_mean() pmap = PmapGetter(dstore, sids=[site_id[lonlat] for lonlat in common]).get_mean() for lonlat in common: mean, std = pmap[site_id[lonlat]].array.T # shape (2, N) mean_ref, std_ref = pmap_ref[site_id_ref[lonlat]].array.T err = numpy.sqrt(std**2 + std_ref**2) for imt in imtls: sl = imtls(imt) ok = (numpy.abs(mean[sl] - mean_ref[sl]) < nsigma * err[sl]).all() if not ok: md = (numpy.abs(mean[sl] - mean_ref[sl])).max() plt.title('point=%s, imt=%s, maxdiff=%.2e' % (lonlat, imt, md)) plt.loglog(imtls[imt], mean_ref[sl] + std_ref[sl], label=str(calc_ref), color='black') plt.loglog(imtls[imt], mean_ref[sl] - std_ref[sl], color='black') plt.loglog(imtls[imt], mean[sl] + std[sl], label=str(calc), color='red') plt.loglog(imtls[imt], mean[sl] - std[sl], color='red') plt.legend() plt.show()
def compute_loss_curves_maps(filename, builder, rlzi, monitor): """ :param filename: path to the datastore :param builder: LossCurvesMapsBuilder instance :param rlzi: realization index :param monitor: Monitor instance :returns: rlzi, (curves, maps) """ with datastore.read(filename) as dstore: rlzs = dstore['losses_by_event']['rlzi'] losses = dstore['losses_by_event'][rlzs == rlzi]['loss'] return rlzi, builder.build_curves_maps(losses, rlzi)
def get_ruptures(self, srcfilter=calc.filters.nofilter): """ :returns: a list of EBRuptures filtered by bounding box """ ebrs = [] with datastore.read(self.filename) as dstore: rupgeoms = dstore['rupgeoms'] for rec in self.rup_array: if srcfilter.integration_distance: sids = srcfilter.close_sids(rec, self.trt, rec['mag']) if len(sids) == 0: # the rupture is far away continue else: sids = None mesh = numpy.zeros((3, rec['sy'], rec['sz']), F32) geom = rupgeoms[rec['gidx1']:rec['gidx2']].reshape( rec['sy'], rec['sz']) mesh[0] = geom['lon'] mesh[1] = geom['lat'] mesh[2] = geom['depth'] rupture_cls, surface_cls = self.code2cls[rec['code']] rupture = object.__new__(rupture_cls) rupture.serial = rec['serial'] rupture.surface = object.__new__(surface_cls) rupture.mag = rec['mag'] rupture.rake = rec['rake'] rupture.hypocenter = geo.Point(*rec['hypo']) rupture.occurrence_rate = rec['occurrence_rate'] rupture.tectonic_region_type = self.trt if surface_cls is geo.PlanarSurface: rupture.surface = geo.PlanarSurface.from_array( mesh[:, 0, :]) elif surface_cls is geo.MultiSurface: # mesh has shape (3, n, 4) rupture.surface.__init__([ geo.PlanarSurface.from_array(mesh[:, i, :]) for i in range(mesh.shape[1])]) elif surface_cls is geo.GriddedSurface: # fault surface, strike and dip will be computed rupture.surface.strike = rupture.surface.dip = None rupture.surface.mesh = Mesh(*mesh) else: # fault surface, strike and dip will be computed rupture.surface.strike = rupture.surface.dip = None rupture.surface.__init__(RectangularMesh(*mesh)) grp_id = rec['grp_id'] ebr = EBRupture(rupture, rec['srcidx'], grp_id, rec['n_occ'], self.samples) # not implemented: rupture_slip_direction ebr.sids = sids ebrs.append(ebr) return ebrs
def calc_oqparam(request, job_id): """ Return the calculation parameters as a JSON """ job = logs.dbcmd('get_job', int(job_id)) if job is None: return HttpResponseNotFound() if not utils.user_has_permission(request, job.user_name): return HttpResponseForbidden() with datastore.read(job.ds_calc_dir + '.hdf5') as ds: oq = ds['oqparam'] return HttpResponse(content=json.dumps(vars(oq)), content_type=JSON)
def execute(self): oq = self.oqparam self.set_param() self.offset = 0 self.indices = collections.defaultdict(list) # sid, idx -> indices if oq.hazard_calculation_id and 'ruptures' in self.datastore: # from ruptures self.datastore.parent = datastore.read(oq.hazard_calculation_id) self.init_logic_tree(self.csm_info) else: # from sources self.build_events_from_sources() if oq.ground_motion_fields is False: return {} if not oq.imtls: raise InvalidFile('There are no intensity measure types in %s' % oq.inputs['job_ini']) iterargs = ((rgetter, self.src_filter, self.param) for rgetter in self.gen_rupture_getters()) # call compute_gmfs in parallel acc = parallel.Starmap( self.core_task.__func__, iterargs, self.monitor() ).reduce(self.agg_dicts, self.acc0()) if self.indices: N = len(self.sitecol.complete) logging.info('Saving gmf_data/indices') with self.monitor('saving gmf_data/indices', measuremem=True, autoflush=True): self.datastore['gmf_data/imts'] = ' '.join(oq.imtls) dset = self.datastore.create_dset( 'gmf_data/indices', hdf5.vuint32, shape=(N, 2), fillvalue=None) num_evs = self.datastore.create_dset( 'gmf_data/events_by_sid', U32, (N,)) for sid in self.sitecol.complete.sids: start = numpy.array(self.indices[sid, 0]) stop = numpy.array(self.indices[sid, 1]) dset[sid, 0] = start dset[sid, 1] = stop num_evs[sid] = (stop - start).sum() num_evs = num_evs[()] avg_events_by_sid = num_evs.sum() / N logging.info('Found ~%d GMVs per site', avg_events_by_sid) self.datastore.set_attrs( 'gmf_data', avg_events_by_sid=avg_events_by_sid, max_events_by_sid=num_evs.max()) elif oq.ground_motion_fields: raise RuntimeError('No GMFs were generated, perhaps they were ' 'all below the minimum_intensity threshold') return acc
def reduce_sm(calc_id): """ Reduce the source model of the given (pre)calculation by discarding all sources that do not contribute to the hazard. """ with datastore.read(calc_id) as dstore: oqparam = dstore['oqparam'] info = dstore['source_info'].value ok = info['weight'] > 0 source_ids = set(info[ok]['source_id']) with performance.Monitor() as mon: readinput.reduce_source_model( oqparam.inputs['source_model_logic_tree'], source_ids) print(mon)
def test_ebr(self): # test a single case of `run_job`, but it is the most complex one, # event based risk with post processing job_ini = os.path.join( os.path.dirname(case_master.__file__), 'job.ini') with Print.patch() as p: job_id = run_job(job_ini, log_level='error') self.assertIn('id | name', str(p)) # sanity check on the performance view: make sure that the most # relevant information is stored (it can be lost for instance due # to a wrong refactoring of the safely_call function) with read(job_id) as dstore: perf = view('performance', dstore) self.assertIn('total event_based_risk', perf)
def extract(request, calc_id, what): """ Wrapper over the `oq extract` command. If `setting.LOCKDOWN` is true only calculations owned by the current user can be retrieved. """ job = logs.dbcmd('get_job', int(calc_id)) if job is None: return HttpResponseNotFound() if not utils.user_has_permission(request, job.user_name): return HttpResponseForbidden() try: # read the data and save them on a temporary .npz file with datastore.read(job.ds_calc_dir + '.hdf5') as ds: fd, fname = tempfile.mkstemp( prefix=what.replace('/', '-'), suffix='.npz') os.close(fd) n = len(request.path_info) query_string = unquote_plus(request.get_full_path()[n:]) aw = _extract(ds, what + query_string) a = {} for key, val in vars(aw).items(): key = str(key) # can be a numpy.bytes_ if isinstance(val, str): # without this oq extract would fail a[key] = numpy.array(val.encode('utf-8')) elif isinstance(val, dict): # this is hack: we are losing the values a[key] = list(val) else: a[key] = val numpy.savez_compressed(fname, **a) except Exception as exc: tb = ''.join(traceback.format_tb(exc.__traceback__)) return HttpResponse( content='%s: %s\n%s' % (exc.__class__.__name__, exc, tb), content_type='text/plain', status=500) # stream the data back stream = FileWrapper(open(fname, 'rb')) stream.close = lambda: (FileWrapper.close(stream), os.remove(fname)) response = FileResponse(stream, content_type='application/octet-stream') response['Content-Disposition'] = ( 'attachment; filename=%s' % os.path.basename(fname)) response['Content-Length'] = str(os.path.getsize(fname)) return response
def make_report(isodate='today'): """ Build a HTML report with the computations performed at the given isodate. Return the name of the report, which is saved in the current directory. """ if isodate == 'today': isodate = date.today() else: isodate = date(*time.strptime(isodate, '%Y-%m-%d')[:3]) isodate1 = isodate + timedelta(1) # +1 day tag_ids = [] tag_status = [] tag_contents = [] # the fetcher returns an header which is stripped with [1:] jobs = dbcmd( 'fetch', ALL_JOBS, isodate.isoformat(), isodate1.isoformat()) page = '<h2>%d job(s) finished before midnight of %s</h2>' % ( len(jobs), isodate) for job_id, user, status, ds_calc in jobs: tag_ids.append(job_id) tag_status.append(status) [stats] = dbcmd('fetch', JOB_STATS, job_id) (job_id, user, start_time, stop_time, status, duration) = stats try: ds = read(job_id, datadir=os.path.dirname(ds_calc)) txt = view_fullreport('fullreport', ds) report = html_parts(txt) except Exception as exc: report = dict( html_title='Could not generate report: %s' % cgi.escape( str(exc), quote=True), fragment='') page = report['html_title'] page += html([stats._fields, stats]) page += report['fragment'] tag_contents.append(page) page = make_tabs(tag_ids, tag_status, tag_contents) + ( 'Report last updated: %s' % datetime.now()) fname = 'jobs-%s.html' % isodate with open(fname, 'w') as f: f.write(PAGE_TEMPLATE % page) return fname
def recompute_losses(calc_id, aggregate_by): """Re-run the postprocessing after an event based risk calculation""" parent = datastore.read(calc_id) oqp = parent['oqparam'] aggby = aggregate_by.split(',') for tagname in aggby: if tagname not in oqp.aggregate_by: raise ValueError('%r not in %s' % (tagname, oqp.aggregate_by)) job_id = logs.init('job', level=logging.INFO) if os.environ.get('OQ_DISTRIBUTE') not in ('no', 'processpool'): os.environ['OQ_DISTRIBUTE'] = 'processpool' with logs.handle(job_id, logging.INFO): oqp.hazard_calculation_id = calc_id parallel.Starmap.init() prc = PostRiskCalculator(oqp, job_id) try: prc.run(aggregate_by=aggby) finally: parallel.Starmap.shutdown()
def main(datadir): lst = [] for fname in glob.glob(datadir + '/calc_*.hdf5'): try: dstore = read(fname) except OSError: # already open continue with dstore: try: descr = dstore['oqparam'].description except (KeyError, AttributeError): # not a calculation continue try: tot_ruptures = dstore['csm_info/sg_data']['totrup'].sum() except KeyError: tot_ruptures = 0 else: lst.append((descr, tot_ruptures)) print(rst_table(lst, ['calculation', 'total number of ruptures']))
def zerodict(self): """ Initial accumulator, a dictionary (grp_id, gsim) -> curves """ if self.oqparam.hazard_calculation_id is None: # filter_csm must be called first self.src_filter, self.csm = self.filter_csm() self.csm_info = self.csm.info else: self.datastore.parent = datastore.read( self.oqparam.hazard_calculation_id) self.csm_info = self.datastore.parent['csm_info'] self.rlzs_by_gsim_grp = self.csm_info.get_rlzs_by_gsim_grp() self.L = len(self.oqparam.imtls.array) self.R = self.csm_info.get_num_rlzs() zd = AccumDict({r: ProbabilityMap(self.L) for r in range(self.R)}) zd.eff_ruptures = AccumDict() self.grp_trt = self.csm_info.grp_by("trt") return zd
def extract(request, calc_id, what): """ Wrapper over the `oq extract` command. If setting.LOCKDOWN is true only calculations owned by the current user can be retrieved. """ job = logs.dbcmd('get_job', int(calc_id)) if job is None: return HttpResponseNotFound() if not utils.user_has_permission(request, job.user_name): return HttpResponseForbidden() # read the data and save them on a temporary .pik file with datastore.read(job.ds_calc_dir + '.hdf5') as ds: fd, fname = tempfile.mkstemp(prefix=what.replace('/', '-'), suffix='.npz') os.close(fd) n = len(request.path_info) query_string = unquote_plus(request.get_full_path()[n:]) obj = _extract(ds, what + query_string) if inspect.isgenerator(obj): array, attrs = 0, {k: _array(v) for k, v in obj} elif hasattr(obj, '__toh5__'): array, attrs = obj.__toh5__() else: # assume obj is an array array, attrs = obj, {} a = {} for key, val in attrs.items(): if isinstance(key, bytes): key = key.decode('utf-8') if isinstance(val, str): # without this oq extract would fail a[key] = numpy.array(val.encode('utf-8')) else: a[key] = val numpy.savez_compressed(fname, array=array, **a) # stream the data back stream = FileWrapper(open(fname, 'rb')) stream.close = lambda: (FileWrapper.close(stream), os.remove(fname)) response = FileResponse(stream, content_type='application/octet-stream') response['Content-Disposition'] = ('attachment; filename=%s' % os.path.basename(fname)) return response
def make_report(isodate='today'): """ Build a HTML report with the computations performed at the given isodate. Return the name of the report, which is saved in the current directory. """ if isodate == 'today': isodate = date.today() else: isodate = date(*time.strptime(isodate, '%Y-%m-%d')[:3]) isodate1 = isodate + timedelta(1) # +1 day tag_ids = [] tag_status = [] tag_contents = [] # the fetcher returns an header which is stripped with [1:] jobs = dbcmd('fetch', ALL_JOBS, isodate.isoformat(), isodate1.isoformat()) page = '<h2>%d job(s) finished before midnight of %s</h2>' % (len(jobs), isodate) for job_id, user, status, ds_calc in jobs: tag_ids.append(job_id) tag_status.append(status) [stats] = dbcmd('fetch', JOB_STATS, job_id) (job_id, user, start_time, stop_time, status, duration) = stats try: ds = read(job_id, datadir=os.path.dirname(ds_calc)) txt = view_fullreport('fullreport', ds) report = html_parts(txt) except Exception as exc: report = dict(html_title='Could not generate report: %s' % cgi.escape(str(exc), quote=True), fragment='') page = report['html_title'] page += html([stats._fields, stats]) page += report['fragment'] tag_contents.append(page) page = make_tabs(tag_ids, tag_status, tag_contents) + ( 'Report last updated: %s' % datetime.now()) fname = 'jobs-%s.html' % isodate with open(fname, 'w') as f: f.write(PAGE_TEMPLATE % page) return fname
def test_ebr(self): # test a single case of `run_job`, but it is the most complex one, # event based risk with post processing job_ini = os.path.join(os.path.dirname(case_master.__file__), 'job.ini') with Print.patch() as p: job_id = run_job(job_ini, log_level='error') self.assertIn('id | name', str(p)) # sanity check on the performance views: make sure that the most # relevant information is stored (it can be lost due to a wrong # refactoring of the monitoring and it happened several times) with read(job_id) as dstore: perf = view('performance', dstore) self.assertIn('total event_based_risk', perf) task_info = view('task_info', dstore) self.assertIn('compute_gmfs', task_info) job_info = view('job_info', dstore) self.assertIn('compute_gmfs', job_info)
def plot_uhs(calc_id, sites='0'): """ UHS plotter. """ # read the hazard data dstore = datastore.read(calc_id) getter = getters.PmapGetter(dstore) getter.init() oq = dstore['oqparam'] indices = list(map(int, sites.split(','))) n_sites = len(dstore['sitecol']) if not set(indices) <= set(range(n_sites)): invalid = sorted(set(indices) - set(range(n_sites))) print('The indices %s are invalid: no graph for them' % invalid) valid = sorted(set(range(n_sites)) & set(indices)) print('Found %d site(s); plotting %d of them' % (n_sites, len(valid))) pmaps = getter.get_pmaps(numpy.array(indices)) plt = make_figure(valid, n_sites, oq.imtls, oq.poes, pmaps) plt.show()
def run_calc(self, testfile, job_ini, **kw): """ Return the outputs of the calculation as a dictionary """ inis = job_ini.split(',') assert len(inis) in (1, 2), inis self.calc = self.get_calc(testfile, inis[0], **kw) self.edir = tempfile.mkdtemp() with self.calc._monitor: result = self.calc.run(export_dir=self.edir) if len(inis) == 2: hc_id = self.calc.datastore.calc_id self.calc = self.get_calc( testfile, inis[1], hazard_calculation_id=str(hc_id), **kw) with self.calc._monitor: result.update(self.calc.run(export_dir=self.edir)) # reopen datastore, since some tests need to export from it dstore = datastore.read(self.calc.datastore.calc_id) self.calc.datastore = dstore return result
def checksum(job_file_or_job_id): """ Get the checksum of a calculation from the calculation ID (if already done) or from the job.ini/job.zip file (if not done yet). """ try: job_id = int(job_file_or_job_id) job_file = None except ValueError: job_id = None job_file = job_file_or_job_id if not os.path.exists(job_file): sys.exit('%s does not correspond to an existing file' % job_file) if job_id: dstore = datastore.read(job_id) checksum = dstore['/'].attrs['checksum32'] else: oq = readinput.get_oqparam(job_file) checksum = readinput.get_checksum32(oq) print(checksum)
def pre_execute(self): oq = self.oqparam ds = self.datastore self.reaggreate = False if oq.hazard_calculation_id and not ds.parent: ds.parent = datastore.read(oq.hazard_calculation_id) assetcol = ds['assetcol'] self.aggkey = base.save_agg_values(ds, assetcol, oq.loss_names, oq.aggregate_by) aggby = ds.parent['oqparam'].aggregate_by self.reaggreate = aggby and oq.aggregate_by != aggby if self.reaggreate: self.num_tags = dict( zip(aggby, assetcol.tagcol.agg_shape(aggby))) else: assetcol = ds['assetcol'] self.aggkey = assetcol.tagcol.get_aggkey(oq.aggregate_by) self.L = len(oq.loss_names) size = general.humansize(ds.getsize('agg_loss_table')) logging.info('Stored %s in the agg_loss_table', size)
def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ oq = self.oqparam if oq.hazard_calculation_id and not oq.compare_with_classical: parent = datastore.read(self.oqparam.hazard_calculation_id) self.csm_info = parent['csm_info'] parent.close() self.calc_stats(parent) # post-processing return {} with self.monitor('managing sources', autoflush=True): smap = parallel.Starmap( self.core_task.__func__, monitor=self.monitor()) source_ids = [] data = [] for i, sources in enumerate(self._send_sources(smap)): source_ids.append(get_src_ids(sources)) for src in sources: # collect source data data.append((i, src.nsites, src.num_ruptures, src.weight)) if source_ids: self.datastore['task_sources'] = encode(source_ids) self.datastore.extend( 'source_data', numpy.array(data, source_data_dt)) self.calc_times = AccumDict(accum=numpy.zeros(2, F32)) try: acc = smap.reduce(self.agg_dicts, self.acc0()) self.store_rlz_info(acc.eff_ruptures) finally: with self.monitor('store source_info', autoflush=True): self.store_source_info(self.calc_times) if acc.nsites: src_ids = sorted(acc.nsites) nsites = [acc.nsites[i] for i in src_ids] self.datastore['source_info'][src_ids, 'num_sites'] = nsites if not self.calc_times: raise RuntimeError('All sources were filtered away!') self.calc_times.clear() # save a bit of memory return acc
def __iter__(self): with datastore.read(self.hdf5path) as dstore: rupgeoms = dstore['rupgeoms'] for rec in self.rup_array: mesh = numpy.zeros((3, rec['sy'], rec['sz']), F32) geom = rupgeoms[rec['gidx1']:rec['gidx2']].reshape( rec['sy'], rec['sz']) mesh[0] = geom['lon'] mesh[1] = geom['lat'] mesh[2] = geom['depth'] rupture_cls, surface_cls = self.code2cls[rec['code']] rupture = object.__new__(rupture_cls) rupture.serial = rec['serial'] rupture.surface = object.__new__(surface_cls) rupture.mag = rec['mag'] rupture.rake = rec['rake'] rupture.hypocenter = geo.Point(*rec['hypo']) rupture.occurrence_rate = rec['occurrence_rate'] rupture.tectonic_region_type = self.trt if surface_cls is geo.PlanarSurface: rupture.surface = geo.PlanarSurface.from_array(mesh[:, 0, :]) elif surface_cls is geo.MultiSurface: # mesh has shape (3, n, 4) rupture.surface.__init__([ geo.PlanarSurface.from_array(mesh[:, i, :]) for i in range(mesh.shape[1]) ]) elif surface_cls is geo.GriddedSurface: # fault surface, strike and dip will be computed rupture.surface.strike = rupture.surface.dip = None rupture.surface.mesh = Mesh(*mesh) else: # fault surface, strike and dip will be computed rupture.surface.strike = rupture.surface.dip = None rupture.surface.__init__(RectangularMesh(*mesh)) grp_id = rec['grp_id'] ebr = EBRupture(rupture, rec['srcidx'], grp_id, (), rec['n_occ'], self.samples) # not implemented: rupture_slip_direction yield ebr
def test_spatial_correlation(self): expected = {sc1: [0.99, 0.41], sc2: [0.99, 0.64], sc3: [0.99, 0.22]} for case in expected: self.run_calc(case.__file__, 'job.ini') oq = self.calc.oqparam self.assertEqual(list(oq.imtls), ['PGA']) dstore = read(self.calc.datastore.calc_id) gmf = group_array(dstore['gmf_data/data'], 'sid') gmvs_site_0 = gmf[0]['gmv'] gmvs_site_1 = gmf[1]['gmv'] joint_prob_0_5 = joint_prob_of_occurrence( gmvs_site_0, gmvs_site_1, 0.5, oq.investigation_time, oq.ses_per_logic_tree_path) joint_prob_1_0 = joint_prob_of_occurrence( gmvs_site_0, gmvs_site_1, 1.0, oq.investigation_time, oq.ses_per_logic_tree_path) p05, p10 = expected[case] numpy.testing.assert_almost_equal(joint_prob_0_5, p05, decimal=1) numpy.testing.assert_almost_equal(joint_prob_1_0, p10, decimal=1)
def test_spatial_correlation(self): expected = {sc1: [0.99, 0.41], sc2: [0.99, 0.64], sc3: [0.99, 0.22]} for case in expected: self.run_calc(case.__file__, 'job.ini') oq = self.calc.oqparam self.assertEqual(list(oq.imtls), ['PGA']) dstore = read(self.calc.datastore.calc_id) gmf = dstore.read_df('gmf_data', 'sid') gmvs_site_0 = gmf.loc[0]['gmv_0'] gmvs_site_1 = gmf.loc[1]['gmv_0'] joint_prob_0_5 = joint_prob_of_occurrence( gmvs_site_0, gmvs_site_1, 0.5, oq.investigation_time, oq.ses_per_logic_tree_path) joint_prob_1_0 = joint_prob_of_occurrence( gmvs_site_0, gmvs_site_1, 1.0, oq.investigation_time, oq.ses_per_logic_tree_path) p05, p10 = expected[case] aac(joint_prob_0_5, p05, atol=.1) aac(joint_prob_1_0, p10, atol=.1)
def hmap_png(request, calc_id, imt_id, poe_id): """ Get a PNG image with the relevant mean hazard map, if available """ job = logs.dbcmd('get_job', int(calc_id)) if job is None: return HttpResponseNotFound() if not utils.user_has_permission(request, job.user_name): return HttpResponseForbidden() try: from PIL import Image response = HttpResponse(content_type="image/png") with datastore.read(job.ds_calc_dir + '.hdf5') as ds: arr = ds['png/hmap_%s_%s' % (imt_id, poe_id)][:] Image.fromarray(arr).save(response, format='png') return response except Exception as exc: tb = ''.join(traceback.format_tb(exc.__traceback__)) return HttpResponse( content='%s: %s\n%s' % (exc.__class__.__name__, exc, tb), content_type='text/plain', status=500)
def plot_sites(calc_id=-1): """ Plot the sites and the bounding boxes of the sources, enlarged by the maximum distance """ # NB: matplotlib is imported inside since it is a costly import import matplotlib.pyplot as p from matplotlib.patches import Rectangle logging.basicConfig(level=logging.INFO) dstore = datastore.read(calc_id) oq = dstore['oqparam'] sitecol = dstore['sitecol'] lons, lats = sitecol.lons, sitecol.lats srcfilter = SourceFilter(sitecol.complete, oq.maximum_distance) csm = readinput.get_composite_source_model(oq).pfilter( srcfilter, oq.concurrent_tasks) sources = csm.get_sources() if len(sources) > 100: logging.info('Sampling 100 sources of %d', len(sources)) sources = random.Random(42).sample(sources, 100) fig, ax = p.subplots() ax.grid(True) rects = [srcfilter.get_rectangle(src) for src in sources] lonset = set(lons) for ((lon, lat), width, height) in rects: lonset.add(lon) lonset.add(fix_lon(lon + width)) idl = cross_idl(min(lonset), max(lonset)) if idl: lons = lons % 360 for src, ((lon, lat), width, height) in zip(sources, rects): lonlat = (lon % 360 if idl else lon, lat) ax.add_patch(Rectangle(lonlat, width, height, fill=False)) if hasattr(src.__class__, 'polygon'): xs, ys = fix_polygon(src.polygon, idl) p.plot(xs, ys, marker='.') p.scatter(lons, lats, marker='+') p.show()
def test_ebr(self): # test a single case of `run_jobs`, but it is the most complex one, # event based risk with post processing job_ini = os.path.join(os.path.dirname(case_master.__file__), 'job.ini') with Print.patch() as p: [(job_id, oqparam)] = run_jobs([job_ini], log_level='error') self.assertIn('id | name', str(p)) # check the exported outputs expected = set('''\ Aggregate Event Losses Aggregate Loss Curves Aggregate Loss Curves Statistics Aggregate Losses Aggregate Losses Statistics Average Asset Losses Average Asset Losses Statistics Average Ground Motion Field Earthquake Ruptures Events Full Report Ground Motion Fields Hazard Curves Hazard Maps Input Files Realizations Source Loss Table'''.splitlines()) with Print.patch() as p: sap.runline(f'openquake.commands engine --lo {job_id}') got = set(re.findall(r'\| ([\w ]+)', str(p))) - {'name'} if got != expected: print('Missing output', expected - got, file=sys.stderr) # sanity check on the performance views: make sure that the most # relevant information is stored (it can be lost due to a wrong # refactoring of the monitoring and it happened several times) with read(job_id) as dstore: perf = view('performance', dstore) self.assertIn('total event_based_risk', perf)
def export_from_db(output_key, calc_id, datadir, target): """ :param output_key: a pair (ds_key, fmt) :param calc_id: calculation ID :param datadir: directory containing the datastore :param target: directory, temporary when called from the engine server :returns: the list of exported path names """ makedirs(target) export.from_db = True ds_key, fmt = output_key with datastore.read(calc_id, datadir=datadir) as dstore: dstore.export_dir = target try: exported = export(output_key, dstore) except Exception: etype, err, tb = sys.exc_info() tb_str = ''.join(traceback.format_tb(tb)) version = check_version(dstore) raise DataStoreExportError('Could not export %s in %s%s\n%s%s' % (output_key + (version, tb_str, err))) return exported
def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ oq = self.oqparam if oq.hazard_calculation_id and not oq.compare_with_classical: parent = datastore.read(self.oqparam.hazard_calculation_id) self.csm_info = parent['csm_info'] parent.close() self.calc_stats(parent) # post-processing return {} with self.monitor('managing sources', autoflush=True): smap = parallel.Starmap( self.core_task.__func__, monitor=self.monitor()) source_ids = [] data = [] for i, args in enumerate(self.gen_args()): smap.submit(*args) source_ids.append(get_src_ids(args[0])) for src in args[0]: # collect source data data.append((i, src.nsites, src.num_ruptures, src.weight)) self.datastore['task_sources'] = encode(source_ids) self.datastore.extend( 'source_data', numpy.array(data, source_data_dt)) self.nsites = [] self.calc_times = AccumDict(accum=numpy.zeros(3, F32)) try: acc = smap.reduce(self.agg_dicts, self.acc0()) self.store_rlz_info(acc.eff_ruptures) finally: with self.monitor('store source_info', autoflush=True): self.store_source_info(self.calc_times) self.calc_times.clear() # save a bit of memory if not self.nsites: raise RuntimeError('All sources were filtered out!') logging.info('Effective sites per task: %d', numpy.mean(self.nsites)) return acc
def plot_sites(calc_id=-1): """ Plot the sites and the bounding boxes of the sources, enlarged by the maximum distance """ # NB: matplotlib is imported inside since it is a costly import import matplotlib.pyplot as p from matplotlib.patches import Rectangle logging.basicConfig(level=logging.INFO) dstore = datastore.read(calc_id) oq = dstore['oqparam'] sitecol = dstore['sitecol'] srcfilter = SourceFilter(sitecol, oq.maximum_distance) csm = readinput.get_composite_source_model(oq).filter(srcfilter) fig = p.figure() ax = fig.add_subplot(111) ax.grid(True) for src in csm.get_sources(): llcorner, width, height = srcfilter.get_rectangle(src) ax.add_patch(Rectangle(llcorner, width, height, fill=False)) p.scatter(sitecol.lons, sitecol.lats, marker='+') p.show()
def importcalc(host, calc_id, username, password): """ Import a remote calculation into the local database """ logging.basicConfig(level=logging.INFO) if '/' in host.split('//', 1)[1]: sys.exit('Wrong host ending with /%s' % host.rsplit('/', 1)[1]) calc_url = '/'.join([host, 'v1/calc', str(calc_id)]) dbserver.ensure_on() job = logs.dbcmd('get_job', calc_id) if job is not None: sys.exit('There is already a job #%d in the local db' % calc_id) datadir = datastore.get_datadir() session = login(host, username, password) status = session.get('%s/status' % calc_url) if 'Log in to an existing account' in status.text: sys.exit('Could not login') json = status.json() if json["parent_id"]: sys.exit('The job has a parent (#%(parent_id)d) and cannot be ' 'downloaded' % json) resp = session.get('%s/datastore' % calc_url, stream=True) assert resp.status_code == 200, resp.status_code fname = '%s/calc_%d.hdf5' % (datadir, calc_id) down = 0 with open(fname, 'wb') as f: logging.info('%s -> %s', calc_url, fname) for chunk in resp.iter_content(CHUNKSIZE): f.write(chunk) down += len(chunk) general.println('Downloaded {:,} bytes'.format(down)) print() logs.dbcmd('import_job', calc_id, json['calculation_mode'], json['description'], json['owner'], json['status'], json['parent_id'], datadir) with datastore.read(calc_id) as dstore: engine.expose_outputs(dstore) logging.info('Imported calculation %d successfully', calc_id)
def compute_loss_curves_maps(filename, elt_slice, clp, individual_curves, monitor): """ :param filename: path to the datastore :param elt_slice: slice of the event loss table :param clp: conditional loss poes used to computed the maps :param individual_curves: if True, build the individual curves and maps :param monitor: a Monitor instance :yields: dictionaries with keys idx, agg_curves-rlzs, agg_curves-stats, agg_maps-rlzs, agg_maps-stats """ with datastore.read(filename) as dstore: oq = dstore['oqparam'] stats = oq.hazard_stats() builder = get_loss_builder(dstore) R = len(dstore['weights']) losses = [[] for _ in range(R)] elt = dstore['losses_by_event'][elt_slice] for rec in elt: losses[rec['rlzi']].append(rec['loss']) results = [] for multi_index, _ in numpy.ndenumerate(elt[0]['loss']): result = {} thelosses = [[ls[multi_index] for ls in loss] for loss in losses] result['agg_curves-rlzs'], result['agg_curves-stats'] = ( builder.build_pair(thelosses, stats)) if R > 1 and individual_curves is False: del result['agg_curves-rlzs'] if clp: result['agg_maps-rlzs'], result['agg_maps-stats'] = ( builder.build_loss_maps(thelosses, clp, stats)) if R > 1 and individual_curves is False: del result['agg_maps-rlzs'] for name, arr in result.items(): if arr is not None: results.append((name, multi_index, arr)) return results
def export_from_db(output_key, calc_id, datadir, target): """ :param output_key: a pair (ds_key, fmt) :param calc_id: calculation ID :param datadir: directory containing the datastore :param target: directory, temporary when called from the engine server :returns: the list of exported path names """ makedirs(target) export.from_db = True ds_key, fmt = output_key with datastore.read(calc_id, datadir=datadir) as dstore: dstore.export_dir = target try: exported = export(output_key, dstore) except Exception: etype, err, tb = sys.exc_info() tb_str = ''.join(traceback.format_tb(tb)) version = check_version(dstore) raise DataStoreExportError( 'Could not export %s in %s%s\n%s%s' % (output_key + (version, tb_str, err))) return exported
def basic_pre_execute(self): oq = self.oqparam self.read_risk_data() if 'source' in oq.inputs: wakeup_pool() # fork before reading the source model if oq.hazard_calculation_id: # already stored csm logging.info('Reusing composite source model of calc #%d', oq.hazard_calculation_id) with datastore.read(oq.hazard_calculation_id) as dstore: csm = dstore['composite_source_model'] else: csm = self.read_csm() logging.info('Prefiltering the CompositeSourceModel') with self.monitor('prefiltering source model', autoflush=True, measuremem=True): self.src_filter = SourceFilter(self.sitecol, oq.maximum_distance) self.csm = csm.filter(self.src_filter) csm.info.gsim_lt.check_imts(oq.imtls) self.datastore['csm_info'] = self.csm.info self.rup_data = {} self.init()
def plot_assets(calc_id=-1): """ Plot the sites and the assets """ # NB: matplotlib is imported inside since it is a costly import import matplotlib.pyplot as p from openquake.hmtk.plotting.patch import PolygonPatch dstore = datastore.read(calc_id) oq = dstore['oqparam'] sitecol = dstore['sitecol'] assetcol = dstore['assetcol'].array fig = p.figure() ax = fig.add_subplot(111) if oq.region: pp = PolygonPatch(shapely.wkt.loads(oq.region), alpha=0.01) ax.add_patch(pp) else: ax.grid(True) p.scatter(sitecol.complete.lons, sitecol.complete.lats, marker='.', color='gray') p.scatter(assetcol['lon'], assetcol['lat'], marker='.', color='green') p.scatter(sitecol.lons, sitecol.lats, marker='o', color='black') p.show()
def reduce_sm(calc_id): """ Reduce the source model of the given (pre)calculation by discarding all sources that do not contribute to the hazard. """ with datastore.read(calc_id) as dstore: oqparam = dstore['oqparam'] info = dstore['source_info'][()] num_ids = len(info['source_id']) bad_ids = set(info[info['eff_ruptures'] == 0]['source_id']) if len(bad_ids) == 0: logging.info('Nothing to remove, unless there are ' 'duplicated source IDs preventing the removal') return logging.info('Found %d far away sources', len(bad_ids)) ok = info['eff_ruptures'] > 0 if ok.sum() == 0: raise RuntimeError('All sources were filtered away!') ok_ids = general.group_array(info[ok][['source_id', 'code']], 'source_id') with performance.Monitor() as mon: good, total = readinput.reduce_source_model( oqparam.inputs['source_model_logic_tree'], ok_ids) logging.info('Removed %d/%d sources', total - good, num_ids) print(mon)
def extract(request, calc_id, what): """ Wrapper over the `oq extract` command. If `setting.LOCKDOWN` is true only calculations owned by the current user can be retrieved. """ job = logs.dbcmd('get_job', int(calc_id)) if job is None: return HttpResponseNotFound() if not utils.user_has_permission(request, job.user_name): return HttpResponseForbidden() path = request.get_full_path() n = len(request.path_info) query_string = unquote_plus(path[n:]) try: # read the data and save them on a temporary .npz file with datastore.read(job.ds_calc_dir + '.hdf5') as ds: fd, fname = tempfile.mkstemp( prefix=what.replace('/', '-'), suffix='.npz') os.close(fd) obj = _extract(ds, what + query_string) hdf5.save_npz(obj, fname) except Exception as exc: tb = ''.join(traceback.format_tb(exc.__traceback__)) return HttpResponse( content='%s: %s in %s\n%s' % (exc.__class__.__name__, exc, path, tb), content_type='text/plain', status=500) # stream the data back stream = FileWrapper(open(fname, 'rb')) stream.close = lambda: (FileWrapper.close(stream), os.remove(fname)) response = FileResponse(stream, content_type='application/octet-stream') response['Content-Disposition'] = ( 'attachment; filename=%s' % os.path.basename(fname)) response['Content-Length'] = str(os.path.getsize(fname)) return response
def export_from_db(output_key, calc_id, datadir, target): """ :param output_key: a pair (ds_key, fmt) :param calc_id: calculation ID :param datadir: directory containing the datastore :param target: directory, temporary when called from the engine server """ makedirs(target) export.from_db = True ds_key, fmt = output_key with datastore.read(calc_id, datadir=datadir) as dstore: dstore.export_dir = target try: exported = export(output_key, dstore) except Exception: etype, err, tb = sys.exc_info() tb_str = ''.join(traceback.format_tb(tb)) version = check_version(dstore) raise DataStoreExportError( 'Could not export %s in %s%s\n%s%s' % (output_key + (version, tb_str, err))) if not exported: raise DataStoreExportError( 'Nothing to export for %s' % ds_key) elif len(exported) > 1: # NB: I am hiding the archive by starting its name with a '.', # to avoid confusing the users, since the unzip files are # already in the target directory; the archive is used internally # by the WebUI, so it must be there; it would be nice not to # generate it when not using the Web UI, but I will leave that # feature for after the removal of the old calculators archname = '.' + ds_key + '-' + fmt + '.zip' zipfiles(exported, os.path.join(target, archname)) return os.path.join(target, archname) else: # single file return exported[0]
def test_read(self): # windows does not manage permissions properly. Skip the test if sys.platform == 'win32': raise unittest.SkipTest('Windows') # case of a non-existing directory with self.assertRaises(OSError): read(42, datadir='/fake/directory') # case of a non-existing file with self.assertRaises(IOError): read(42, datadir='/tmp') # case of no read permission tmp = tempfile.mkdtemp() fname = os.path.join(tmp, 'calc_42.hdf5') open(fname, 'w').write('') os.chmod(fname, 0) with self.assertRaises(IOError) as ctx: read(42, datadir=tmp) self.assertIn('permission denied', str(ctx.exception).lower())
def test_read(self): # windows does not manage permissions properly. Skip the test if sys.platform == 'win32': raise unittest.SkipTest('Windows') # case of a non-existing directory with self.assertRaises(OSError): read(42, datadir='/fake/directory') # case of a non-existing file with self.assertRaises(IOError): read(42, datadir='/tmp') # case of no read permission tmp = tempfile.mkdtemp() fname = os.path.join(tmp, 'calc_42.hdf5') open(fname, 'w').write('') os.chmod(fname, 0) with self.assertRaises(IOError) as ctx: read(42, datadir=tmp) self.assertIn('permission denied', str(ctx.exception).lower()) os.remove(fname)
def _calc_risk(hazard, param, monitor): gmfs = numpy.concatenate(hazard['gmfs']) events = numpy.concatenate(hazard['events']) mon_risk = monitor('computing risk', measuremem=False) mon_agg = monitor('aggregating losses', measuremem=False) dstore = datastore.read(param['hdf5path']) with monitor('getting assets'): assetcol = dstore['assetcol'] assets_by_site = assetcol.assets_by_site() with monitor('getting crmodel'): crmodel = riskmodels.CompositeRiskModel.read(dstore) weights = dstore['weights'][()] E = len(events) L = len(param['lba'].loss_names) A = sum(len(assets) for assets in assets_by_site) shape = assetcol.tagcol.agg_shape((E, L), param['aggregate_by']) elt_dt = [('event_id', U32), ('rlzi', U16), ('loss', (F32, shape[1:]))] acc = dict( elt=numpy.zeros(shape, F32), # shape (E, L, T...) alt=numpy.zeros((A, E, L), F32) if param['asset_loss_table'] else None, gmf_info=[], events_per_sid=0, lossbytes=0) arr = acc['elt'] alt = acc['alt'] lba = param['lba'] tempname = param['tempname'] tagnames = param['aggregate_by'] eid2rlz = dict(events[['id', 'rlz_id']]) eid2idx = {eid: idx for idx, eid in enumerate(eid2rlz)} for sid, haz in general.group_array(gmfs, 'sid').items(): assets_on_sid = assets_by_site[sid] if len(assets_on_sid) == 0: continue acc['events_per_sid'] += len(haz) if param['avg_losses']: ws = weights[[eid2rlz[eid] for eid in haz['eid']]] assets_by_taxo = get_assets_by_taxo(assets_on_sid, tempname) eidx = [eid2idx[eid] for eid in haz['eid']] with mon_risk: out = get_output(crmodel, assets_by_taxo, haz) with mon_agg: for a, asset in enumerate(assets_on_sid): aid = asset['ordinal'] tagi = asset[tagnames] if tagnames else () tagidxs = tuple(idx - 1 for idx in tagi) losses_by_lt = {} for lti, lt in enumerate(crmodel.loss_types): lratios = out[lt][a] if lt == 'occupants': losses = lratios * asset['occupants_None'] else: losses = lratios * asset['value-' + lt] if param['asset_loss_table']: alt[aid, eidx, lti] = losses losses_by_lt[lt] = losses for loss_idx, losses in lba.compute(asset, losses_by_lt): arr[(eidx, loss_idx) + tagidxs] += losses if param['avg_losses']: lba.losses_by_A[aid, loss_idx] += (losses @ ws * param['ses_ratio']) acc['lossbytes'] += losses.nbytes if len(gmfs): acc['events_per_sid'] /= len(gmfs) acc['gmf_info'] = numpy.array(hazard['gmf_info'], gmf_info_dt) acc['elt'] = numpy.fromiter( # this is ultra-fast ( (event['id'], event['rlz_id'], losses) # losses (L, T...) for event, losses in zip(events, arr) if losses.sum()), elt_dt) if param['avg_losses']: acc['losses_by_A'] = param['lba'].losses_by_A # without resetting the cache the sequential avg_losses would be wrong! del param['lba'].__dict__['losses_by_A'] if param['asset_loss_table']: acc['alt'] = alt, events['id'] return acc
def ebrisk(rupgetter, srcfilter, param, monitor): """ :param rupgetter: a RuptureGetter instance :param srcfilter: a SourceFilter instance :param param: a dictionary of parameters :param monitor: :class:`openquake.baselib.performance.Monitor` instance :returns: an ArrayWrapper with shape (E, L, T, ...) """ riskmodel = param['riskmodel'] E = rupgetter.num_events L = len(riskmodel.lti) N = len(srcfilter.sitecol.complete) e1 = rupgetter.first_event with monitor('getting assets', measuremem=False): with datastore.read(srcfilter.filename) as dstore: assetcol = dstore['assetcol'] assets_by_site = assetcol.assets_by_site() A = len(assetcol) getter = getters.GmfGetter(rupgetter, srcfilter, param['oqparam']) with monitor('getting hazard'): getter.init() # instantiate the computers hazard = getter.get_hazard() # sid -> (sid, eid, gmv) mon_risk = monitor('computing risk', measuremem=False) mon_agg = monitor('aggregating losses', measuremem=False) events = rupgetter.get_eid_rlz() # numpy.testing.assert_equal(events['eid'], sorted(events['eid'])) eid2idx = dict(zip(events['eid'], range(e1, e1 + E))) tagnames = param['aggregate_by'] shape = assetcol.tagcol.agg_shape((E, L), tagnames) elt_dt = [('eid', U64), ('rlzi', U16), ('loss', (F32, shape[1:]))] if param['asset_loss_table']: alt = numpy.zeros((A, E, L), F32) acc = numpy.zeros(shape, F32) # shape (E, L, T...) if param['avg_losses']: losses_by_A = numpy.zeros((A, L), F32) else: losses_by_A = 0 # NB: IMT-dependent weights are not supported in ebrisk times = numpy.zeros(N) # risk time per site_id num_events_per_sid = 0 epspath = param['epspath'] gmf_nbytes = 0 for sid, haz in hazard.items(): gmf_nbytes += haz.nbytes t0 = time.time() assets_on_sid = assets_by_site[sid] if len(assets_on_sid) == 0: continue num_events_per_sid += len(haz) if param['avg_losses']: weights = getter.weights[ [getter.eid2rlz[eid] for eid in haz['eid']]] assets_by_taxo = get_assets_by_taxo(assets_on_sid, epspath) eidx = numpy.array([eid2idx[eid] for eid in haz['eid']]) - e1 haz['eid'] = eidx + e1 with mon_risk: out = riskmodel.get_output(assets_by_taxo, haz) with mon_agg: for a, asset in enumerate(assets_on_sid): aid = asset['ordinal'] tagi = asset[tagnames] if tagnames else () tagidxs = tuple(idx - 1 for idx in tagi) for lti, lt in enumerate(riskmodel.loss_types): lratios = out[lt][a] if lt == 'occupants': losses = lratios * asset['occupants_None'] else: losses = lratios * asset['value-' + lt] if param['asset_loss_table']: alt[aid, eidx, lti] = losses acc[(eidx, lti) + tagidxs] += losses if param['avg_losses']: losses_by_A[aid, lti] += losses @ weights times[sid] = time.time() - t0 if hazard: num_events_per_sid /= len(hazard) with monitor('building event loss table'): elt = numpy.fromiter( ((event['eid'], event['rlz'], losses) for event, losses in zip(events, acc) if losses.sum()), elt_dt) agg = general.AccumDict(accum=numpy.zeros(shape[1:], F32)) # rlz->agg for rec in elt: agg[rec['rlzi']] += rec['loss'] * param['ses_ratio'] res = {'elt': elt, 'agg_losses': agg, 'times': times, 'events_per_sid': num_events_per_sid, 'gmf_nbytes': gmf_nbytes} if param['avg_losses']: res['losses_by_A'] = losses_by_A * param['ses_ratio'] if param['asset_loss_table']: eidx = numpy.array([eid2idx[eid] for eid in events['eid']]) res['alt_eidx'] = alt, eidx return res
def _read_risk_data(self): # read the exposure (if any), the risk model (if any) and then the # site collection, possibly extracted from the exposure. oq = self.oqparam self.load_riskmodel() # must be called first with self.monitor('reading site collection', autoflush=True): if oq.hazard_calculation_id: with datastore.read(oq.hazard_calculation_id) as dstore: haz_sitecol = dstore['sitecol'].complete else: haz_sitecol = readinput.get_site_collection(oq) if hasattr(self, 'rup'): # for scenario we reduce the site collection to the sites # within the maximum distance from the rupture haz_sitecol, _dctx = self.cmaker.filter( haz_sitecol, self.rup) haz_sitecol.make_complete() oq_hazard = (self.datastore.parent['oqparam'] if self.datastore.parent else None) if 'exposure' in oq.inputs: self.read_exposure(haz_sitecol) self.datastore['assetcol'] = self.assetcol elif 'assetcol' in self.datastore.parent: assetcol = self.datastore.parent['assetcol'] if oq.region: region = wkt.loads(self.oqparam.region) self.sitecol = haz_sitecol.within(region) if oq.shakemap_id or 'shakemap' in oq.inputs: self.sitecol, self.assetcol = self.read_shakemap( haz_sitecol, assetcol) self.datastore['assetcol'] = self.assetcol logging.info('Extracted %d/%d assets', len(self.assetcol), len(assetcol)) elif hasattr(self, 'sitecol') and general.not_equal( self.sitecol.sids, haz_sitecol.sids): self.assetcol = assetcol.reduce(self.sitecol) self.datastore['assetcol'] = self.assetcol logging.info('Extracted %d/%d assets', len(self.assetcol), len(assetcol)) else: self.assetcol = assetcol else: # no exposure self.sitecol = haz_sitecol logging.info('Read %d hazard sites', len(self.sitecol)) if oq_hazard: parent = self.datastore.parent if 'assetcol' in parent: check_time_event(oq, parent['assetcol'].occupancy_periods) if oq_hazard.time_event and oq_hazard.time_event != oq.time_event: raise ValueError( 'The risk configuration file has time_event=%s but the ' 'hazard was computed with time_event=%s' % (oq.time_event, oq_hazard.time_event)) if self.oqparam.job_type == 'risk': taxonomies = set(taxo for taxo in self.assetcol.tagcol.taxonomy if taxo != '?') # check that we are covering all the taxonomies in the exposure missing = taxonomies - set(self.riskmodel.taxonomies) if self.riskmodel and missing: raise RuntimeError('The exposure contains the taxonomies %s ' 'which are not in the risk model' % missing) # same check for the consequence models, if any consequence_models = riskmodels.get_risk_models( self.oqparam, 'consequence') for lt, cm in consequence_models.items(): missing = taxonomies - set(cm) if missing: raise ValueError('Missing consequenceFunctions for %s' % ' '.join(missing)) if hasattr(self, 'sitecol'): self.datastore['sitecol'] = self.sitecol.complete self.param = {} # used in the risk calculators
def pre_execute(self, pre_calculator=None): """ Check if there is a previous calculation ID. If yes, read the inputs by retrieving the previous calculation; if not, read the inputs directly. """ oq = self.oqparam if 'gmfs' in oq.inputs: # read hazard from file assert not oq.hazard_calculation_id, ( 'You cannot use --hc together with gmfs_file') self.read_inputs() save_gmfs(self) elif 'hazard_curves' in oq.inputs: # read hazard from file assert not oq.hazard_calculation_id, ( 'You cannot use --hc together with hazard_curves') haz_sitecol = readinput.get_site_collection(oq) # NB: horrible: get_site_collection calls get_pmap_from_nrml # that sets oq.investigation_time, so it must be called first self.load_riskmodel() # must be after get_site_collection self.read_exposure(haz_sitecol) # define .assets_by_site self.datastore['poes/grp-00'] = readinput.pmap self.datastore['sitecol'] = self.sitecol self.datastore['assetcol'] = self.assetcol self.datastore['csm_info'] = fake = source.CompositionInfo.fake() self.rlzs_assoc = fake.get_rlzs_assoc() elif oq.hazard_calculation_id: parent = datastore.read(oq.hazard_calculation_id) check_precalc_consistency(oq.calculation_mode, parent['oqparam'].calculation_mode) self.datastore.parent = parent # copy missing parameters from the parent params = { name: value for name, value in vars(parent['oqparam']).items() if name not in vars(self.oqparam) } self.save_params(**params) self.read_inputs() oqp = parent['oqparam'] if oqp.investigation_time != oq.investigation_time: raise ValueError( 'The parent calculation was using investigation_time=%s' ' != %s' % (oqp.investigation_time, oq.investigation_time)) if oqp.minimum_intensity != oq.minimum_intensity: raise ValueError( 'The parent calculation was using minimum_intensity=%s' ' != %s' % (oqp.minimum_intensity, oq.minimum_intensity)) elif pre_calculator: calc = calculators[pre_calculator](self.oqparam) calc.run(close=False) self.set_log_format() self.dynamic_parent = self.datastore.parent = calc.datastore self.oqparam.hazard_calculation_id = self.dynamic_parent.calc_id self.datastore['oqparam'] = self.oqparam self.param = calc.param self.sitecol = calc.sitecol self.assetcol = calc.assetcol self.riskmodel = calc.riskmodel self.rlzs_assoc = calc.rlzs_assoc else: self.read_inputs()