def gmf_to_hazard_curve_arg_gen(job): """ Generate a sequence of args for the GMF to hazard curve post-processing job for a given ``job``. These are task args. Yielded arguments are as follows: * job ID * point geometry * logic tree realization ID * IMT * IMLs * hazard curve "collection" ID * investigation time * duration * SA period * SA damping See :func:`gmf_to_hazard_curve_task` for more information about these arguments. As a side effect, :class:`openquake.engine.db.models.HazardCurve` records are created for each :class:`openquake.engine.db.models.LtRealization` and IMT. :param job: :class:`openquake.engine.db.models.OqJob` instance. """ hc = job.hazard_calculation sites = models.HazardSite.objects.filter(hazard_calculation=hc) lt_realizations = models.LtRealization.objects.filter( hazard_calculation=hc.id) invest_time = hc.investigation_time duration = hc.ses_per_logic_tree_path * invest_time for raw_imt, imls in hc.intensity_measure_types_and_levels.iteritems(): imt, sa_period, sa_damping = models.parse_imt(raw_imt) for lt_rlz in lt_realizations: hc_output = models.Output.objects.create_output( job, HAZ_CURVE_DISP_NAME_FMT % dict(imt=raw_imt, rlz=lt_rlz.id), 'hazard_curve') # Create the hazard curve "collection": hc_coll = models.HazardCurve.objects.create( output=hc_output, lt_realization=lt_rlz, investigation_time=invest_time, imt=imt, imls=imls, sa_period=sa_period, sa_damping=sa_damping) for site in sites: yield (job.id, site, lt_rlz.id, imt, imls, hc_coll.id, invest_time, duration, sa_period, sa_damping)
def __init__(self, hazard_output_id, imt): imt, sa_period, sa_damping = models.parse_imt(imt) self._imt = imt self._sa_period = sa_period self._sa_damping = sa_damping self._hazard_output_id = hazard_output_id self._cache = {}
def finalize_hazard_curves(self): """ Create the final output records for hazard curves. This is done by copying the temporary results from `htemp.hazard_curve_progress` to `hzrdr.hazard_curve` (for metadata) and `hzrdr.hazard_curve_data` (for the actual curve PoE values). Foreign keys are made from `hzrdr.hazard_curve` to `hzrdr.lt_realization` (realization information is need to export the full hazard curve results). """ with transaction.commit_on_success(using='reslt_writer'): im = self.hc.intensity_measure_types_and_levels points = self.computation_mesh realizations = models.LtRealization.objects.filter( hazard_calculation=self.hc.id) for rlz in realizations: # create a new `HazardCurve` 'container' record for each # realization for each intensity measure type for imt, imls in im.items(): hc_im_type, sa_period, sa_damping = models.parse_imt(imt) hco = models.Output( owner=self.hc.owner, oq_job=self.job, display_name="hc-rlz-%s" % rlz.id, output_type='hazard_curve', ) hco.save() haz_curve = models.HazardCurve( output=hco, lt_realization=rlz, investigation_time=self.hc.investigation_time, imt=hc_im_type, imls=imls, sa_period=sa_period, sa_damping=sa_damping, ) haz_curve.save() [hc_progress] = models.HazardCurveProgress.objects.filter( lt_realization=rlz.id, imt=imt) hc_data_inserter = writer.BulkInserter( models.HazardCurveData) for i, location in enumerate(points): poes = hc_progress.result_matrix[i] hc_data_inserter.add_entry( hazard_curve_id=haz_curve.id, poes=poes.tolist(), location=location.wkt2d, weight=rlz.weight ) hc_data_inserter.flush()
def setup(self): """ Initialize private variables of an hazard getter. Called by ``__init__`` and by ``__setstate``. """ self._assets_extent = geo.mesh.Mesh.from_points_list([ geo.point.Point(asset.site.x, asset.site.y) for asset in self.assets]).get_convex_hull() self._imt, self._sa_period, self._sa_damping = ( models.parse_imt(self.imt)) self.asset_dict = dict((asset.id, asset) for asset in self.assets)
def __init__(self, hazard_id, imt, assets, max_distance): self.hazard_id = hazard_id self.imt = imt self.assets = assets self.max_distance = max_distance self._assets_mesh = geo.mesh.Mesh.from_points_list( [geo.point.Point(asset.site.x, asset.site.y) for asset in self.assets] ) self._imt, self._sa_period, self._sa_damping = models.parse_imt(self.imt) self.asset_dict = dict((asset.id, asset) for asset in self.assets) self._cache = {}
def __init__(self, hazard_outputs, assets, max_distance, imt): self.hazard_outputs = hazard_outputs self.assets = assets self.max_distance = max_distance self.imt = imt self.imt_type, self.sa_period, self.sa_damping = models.parse_imt(imt) # FIXME(lp). It is better to directly store the convex hull # instead of the mesh. We are not doing it because # hazardlib.Polygon is not (yet) pickeable self._assets_mesh = geo.mesh.Mesh.from_points_list([ geo.point.Point(asset.site.x, asset.site.y) for asset in self.assets]) self.asset_dict = dict((asset.id, asset) for asset in self.assets)
def __init__(self, hazard_outputs, assets, max_distance, imt): self.hazard_outputs = hazard_outputs self.assets = assets self.max_distance = max_distance self.imt = imt self.imt_type, self.sa_period, self.sa_damping = models.parse_imt(imt) # FIXME(lp). It is better to directly store the convex hull # instead of the mesh. We are not doing it because # hazardlib.Polygon is not (yet) pickeable self._assets_mesh = geo.mesh.Mesh.from_points_list([ geo.point.Point(asset.site.x, asset.site.y) for asset in self.assets ]) self.asset_dict = dict((asset.id, asset) for asset in self.assets)
def hazard_outputs(self, hazard_calculation): """ :returns: a list of :class:`openquake.engine.db.models.HazardCurve` object that stores the hazard curves associated to `hazard_calculation` that are associated with a realization """ imt, sa_period, sa_damping = models.parse_imt(self.imt) return hazard_calculation.oqjob_set.filter(status="complete").latest( 'last_update').output_set.filter( output_type='hazard_curve', hazardcurve__imt=imt, hazardcurve__sa_period=sa_period, hazardcurve__sa_damping=sa_damping, hazardcurve__lt_realization__isnull=False).order_by('id')
def __init__(self, hazard_output, assets, max_distance, imt): self.hazard_output = hazard_output hazard = hazard_output.output_container self.hazard_id = hazard.id self.assets = assets self.max_distance = max_distance self.imt = imt self.imt_type, self.sa_period, self.sa_damping = models.parse_imt(imt) if hasattr(hazard, 'lt_realization') and hazard.lt_realization: self.weight = hazard.lt_realization.weight else: self.weight = None # FIXME(lp). It is better to directly store the convex hull # instead of the mesh. We are not doing it because # hazardlib.Polygon is not (yet) pickeable self._assets_mesh = geo.mesh.Mesh.from_points_list([ geo.point.Point(asset.site.x, asset.site.y) for asset in self.assets]) self.asset_dict = dict((asset.id, asset) for asset in self.assets)
def import_gmf_scenario(fileobj, user=None): """ Parse the file with the GMF fields and import it into the table gmf_scenario. It also creates a new output record, unrelated to a job. Works both with XML files and tab-separated files with format (imt, gmvs, location). :returns: the generated :class:`openquake.engine.db.models.Output` object and the generated :class:`openquake.engine.db.models.HazardCalculation` object. """ fname = fileobj.name owner = models.OqUser.objects.get(user_name=user) \ if user else get_current_user() hc = models.HazardCalculation.objects.create( owner=owner, base_path=os.path.dirname(fname), description='Scenario importer, file %s' % os.path.basename(fname), calculation_mode='scenario', maximum_distance=100) # XXX: probably the maximum_distance should be entered by the user out = models.Output.objects.create( owner=owner, display_name='Imported from %r' % fname, output_type='gmf_scenario') gmf_coll = models.Gmf.objects.create(output=out) rows = [] if fname.endswith('.xml'): # convert the XML into a tab-separated StringIO for imt, gmvs, loc in GMFScenarioParser(fileobj).parse(): imt_type, sa_period, sa_damping = models.parse_imt(imt) sa_period = '\N' if sa_period is None else str(sa_period) sa_damping = '\N' if sa_damping is None else str(sa_damping) gmvs = '{%s}' % str(gmvs)[1:-1] rows.append([imt_type, sa_period, sa_damping, gmvs, loc]) else: # assume a tab-separated file for line in fileobj: rows.append(line.split('\t')) import_rows(hc, gmf_coll, rows) return out, hc
def import_gmf_scenario(fileobj): """ Parse the file with the GMF fields and import it into the table gmf_scenario. It also creates a new output record, unrelated to a job. Works both with XML files and tab-separated files with format (imt, gmvs, location). :returns: the generated :class:`openquake.engine.db.models.Output` object and the generated :class:`openquake.engine.db.models.HazardCalculation` object. """ fname = fileobj.name hc = models.HazardCalculation.objects.create( base_path=os.path.dirname(fname), description="Scenario importer, file %s" % os.path.basename(fname), calculation_mode="scenario", maximum_distance=100, ) # XXX: probably the maximum_distance should be entered by the user out = models.Output.objects.create(display_name="Imported from %r" % fname, output_type="gmf_scenario") gmf_coll = models.Gmf.objects.create(output=out) rows = [] if fname.endswith(".xml"): # convert the XML into a tab-separated StringIO for imt, gmvs, loc in GMFScenarioParser(fileobj).parse(): imt_type, sa_period, sa_damping = models.parse_imt(imt) sa_period = "\N" if sa_period is None else str(sa_period) sa_damping = "\N" if sa_damping is None else str(sa_damping) gmvs = "{%s}" % str(gmvs)[1:-1] rows.append([imt_type, sa_period, sa_damping, gmvs, loc]) else: # assume a tab-separated file for line in fileobj: rows.append(line.split("\t")) import_rows(hc, gmf_coll, rows) return out, hc
def import_gmf_scenario(fileobj): """ Parse the file with the GMF fields and import it into the table gmf_scenario. It also creates a new output record, unrelated to a job. Works both with XML files and tab-separated files with format (imt, gmvs, location). :returns: the generated :class:`openquake.engine.db.models.Output` object and the generated :class:`openquake.engine.db.models.HazardCalculation` object. """ fname = fileobj.name hc = models.HazardCalculation.objects.create( base_path=os.path.dirname(fname), description='Scenario importer, file %s' % os.path.basename(fname), calculation_mode='scenario', maximum_distance=100) # XXX: probably the maximum_distance should be entered by the user out = models.Output.objects.create(display_name='Imported from %r' % fname, output_type='gmf_scenario') gmf_coll = models.Gmf.objects.create(output=out) rows = [] if fname.endswith('.xml'): # convert the XML into a tab-separated StringIO for imt, gmvs, loc in GMFScenarioParser(fileobj).parse(): imt_type, sa_period, sa_damping = models.parse_imt(imt) sa_period = '\N' if sa_period is None else str(sa_period) sa_damping = '\N' if sa_damping is None else str(sa_damping) gmvs = '{%s}' % str(gmvs)[1:-1] rows.append([imt_type, sa_period, sa_damping, gmvs, loc]) else: # assume a tab-separated file for line in fileobj: rows.append(line.split('\t')) import_rows(hc, gmf_coll, rows) return out, hc
def extract(hc_id, a_writer): hc = models.HazardCalculation.objects.get(pk=hc_id) for lt in models.LtRealization.objects.filter(hazard_calculation=hc): for imt in hc.intensity_measure_types: imt_type, sa_period, _ = models.parse_imt(imt) if imt_type == "PGA": imt_type_fix = "SA" sa_period_fix = 0 else: imt_type_fix = imt_type sa_period_fix = sa_period ruptures = sorted([ r.id for r in models.SESRupture.objects.filter( ses__ses_collection__lt_realization=lt) ]) for site in hc.hazardsite_set.all().order_by('id'): gmvs = [] gmvs_data = dict() for ses in models.SES.objects.filter( ses_collection__lt_realization=lt).order_by('id'): for gmf in models.GmfData.objects.filter( ses=ses, site=site, imt=imt_type, sa_period=sa_period): gmvs_data.update(dict(zip(gmf.rupture_ids, gmf.gmvs))) gmvs.extend([gmvs_data.get(r, 0.0) for r in ruptures]) a_writer.writerow([ lt.id, site.location.x, site.location.y, imt_type_fix, sa_period_fix ] + gmvs)
def extract(hc_id, a_writer): hc = models.HazardCalculation.objects.get(pk=hc_id) for lt in models.LtRealization.objects.filter( hazard_calculation=hc): for imt in hc.intensity_measure_types: imt_type, sa_period, _ = models.parse_imt(imt) if imt_type == "PGA": imt_type_fix = "SA" sa_period_fix = 0 else: imt_type_fix = imt_type sa_period_fix = sa_period ruptures = sorted( [r.id for r in models.SESRupture.objects.filter( ses__ses_collection__lt_realization=lt)]) for site in hc.hazardsite_set.all().order_by('id'): gmvs = [] gmvs_data = dict() for ses in models.SES.objects.filter( ses_collection__lt_realization=lt).order_by('id'): for gmf in models.GmfData.objects.filter( ses=ses, site=site, imt=imt_type, sa_period=sa_period): gmvs_data.update(dict(zip(gmf.rupture_ids, gmf.gmvs))) gmvs.extend([gmvs_data.get(r, 0.0) for r in ruptures]) a_writer.writerow([lt.id, site.location.x, site.location.y, imt_type_fix, sa_period_fix] + gmvs)
def test_sa(self): hc_im_type, sa_period, sa_damping = models.parse_imt("SA(0.1)") self.assertEqual("SA", hc_im_type) self.assertEqual(0.1, sa_period) self.assertEqual(models.DEFAULT_SA_DAMPING, sa_damping)
def test_pga(self): hc_im_type, sa_period, sa_damping = models.parse_imt("PGA") self.assertEqual("PGA", hc_im_type) self.assertEqual(None, sa_period) self.assertEqual(None, sa_damping)
def finalize_hazard_curves(self): """ Create the final output records for hazard curves. This is done by copying the temporary results from `htemp.hazard_curve_progress` to `hzrdr.hazard_curve` (for metadata) and `hzrdr.hazard_curve_data` (for the actual curve PoE values). Foreign keys are made from `hzrdr.hazard_curve` to `hzrdr.lt_realization` (realization information is need to export the full hazard curve results). """ im = self.hc.intensity_measure_types_and_levels points = self.hc.points_to_compute() # prepare site locations for the stored function call lons = '{%s}' % ', '.join(str(v) for v in points.lons) lats = '{%s}' % ', '.join(str(v) for v in points.lats) realizations = models.LtRealization.objects.filter( hazard_calculation=self.hc.id) for rlz in realizations: # create a new `HazardCurve` 'container' record for each # realization (virtual container for multiple imts) models.HazardCurve.objects.create( output=models.Output.objects.create_output( self.job, "hc-multi-imt-rlz-%s" % rlz.id, "hazard_curve_multi"), lt_realization=rlz, imt=None, investigation_time=self.hc.investigation_time) # create a new `HazardCurve` 'container' record for each # realization for each intensity measure type for imt, imls in im.items(): hc_im_type, sa_period, sa_damping = models.parse_imt(imt) hco = models.Output.objects.create( oq_job=self.job, display_name="Hazard Curve rlz-%s" % rlz.id, output_type='hazard_curve', ) haz_curve = models.HazardCurve( output=hco, lt_realization=rlz, investigation_time=self.hc.investigation_time, imt=hc_im_type, imls=imls, sa_period=sa_period, sa_damping=sa_damping, ) haz_curve.save() with transaction.commit_on_success(using='reslt_writer'): cursor = connections['reslt_writer'].cursor() # TODO(LB): I don't like the fact that we have to pass # potentially huge arguments (100k sites, for example). # I would like to be able to fetch this site data from # the stored function, but at the moment, the only form # available is a pickled `SiteCollection` object, and I've # experienced problems trying to import third-party libs # in a DB function context and could not get it to reliably # work. # As a fix, in addition to caching the pickled # SiteCollection in the DB, we could store also arrays for # lons and lats. It's duplicated information, but we have a # relatively low number of HazardCalculation records, so it # shouldn't be a big deal. cursor.execute( """ SELECT hzrdr.finalize_hazard_curves( %s, %s, %s, %s, %s, %s) """, [self.hc.id, rlz.id, haz_curve.id, imt, lons, lats] )
def do_aggregate_post_proc(self): """ Grab hazard data for all realizations and sites from the database and compute mean and/or quantile aggregates (depending on which options are enabled in the calculation). Post-processing results will be stored directly into the database. """ num_rlzs = models.LtRealization.objects.filter( hazard_calculation=self.hc).count() num_site_blocks_per_incr = int(CURVE_CACHE_SIZE) / int(num_rlzs) if num_site_blocks_per_incr == 0: # This means we have `num_rlzs` >= `CURVE_CACHE_SIZE`. # The minimum number of sites should be 1. num_site_blocks_per_incr = 1 slice_incr = num_site_blocks_per_incr * num_rlzs # unit: num records if self.hc.mean_hazard_curves: # create a new `HazardCurve` 'container' record for mean # curves (virtual container for multiple imts) models.HazardCurve.objects.create( output=models.Output.objects.create_output( self.job, "mean-curves-multi-imt", "hazard_curve_multi"), statistics="mean", imt=None, investigation_time=self.hc.investigation_time) if self.hc.quantile_hazard_curves: for quantile in self.hc.quantile_hazard_curves: # create a new `HazardCurve` 'container' record for quantile # curves (virtual container for multiple imts) models.HazardCurve.objects.create( output=models.Output.objects.create_output( self.job, 'quantile(%s)-curves' % quantile, "hazard_curve_multi"), statistics="quantile", imt=None, quantile=quantile, investigation_time=self.hc.investigation_time) for imt, imls in self.hc.intensity_measure_types_and_levels.items(): im_type, sa_period, sa_damping = models.parse_imt(imt) # prepare `output` and `hazard_curve` containers in the DB: container_ids = dict() if self.hc.mean_hazard_curves: mean_output = models.Output.objects.create_output( job=self.job, display_name='Mean Hazard Curves %s' % imt, output_type='hazard_curve' ) mean_hc = models.HazardCurve.objects.create( output=mean_output, investigation_time=self.hc.investigation_time, imt=im_type, imls=imls, sa_period=sa_period, sa_damping=sa_damping, statistics='mean' ) container_ids['mean'] = mean_hc.id if self.hc.quantile_hazard_curves: for quantile in self.hc.quantile_hazard_curves: q_output = models.Output.objects.create_output( job=self.job, display_name=( '%s quantile Hazard Curves %s' % (quantile, imt) ), output_type='hazard_curve' ) q_hc = models.HazardCurve.objects.create( output=q_output, investigation_time=self.hc.investigation_time, imt=im_type, imls=imls, sa_period=sa_period, sa_damping=sa_damping, statistics='quantile', quantile=quantile ) container_ids['q%s' % quantile] = q_hc.id all_curves_for_imt = models.order_by_location( models.HazardCurveData.objects.all_curves_for_imt( self.job.id, im_type, sa_period, sa_damping)) with transaction.commit_on_success(using='reslt_writer'): inserter = writer.CacheInserter( models.HazardCurveData, CURVE_CACHE_SIZE) for chunk in models.queryset_iter(all_curves_for_imt, slice_incr): # slice each chunk by `num_rlzs` into `site_chunk` # and compute the aggregate for site_chunk in block_splitter(chunk, num_rlzs): site = site_chunk[0].location curves_poes = [x.poes for x in site_chunk] curves_weights = [x.weight for x in site_chunk] # do means and quantiles # quantiles first: if self.hc.quantile_hazard_curves: for quantile in self.hc.quantile_hazard_curves: if self.hc.number_of_logic_tree_samples == 0: # explicitly weighted quantiles q_curve = weighted_quantile_curve( curves_poes, curves_weights, quantile ) else: # implicitly weighted quantiles q_curve = quantile_curve( curves_poes, quantile ) inserter.add( models.HazardCurveData( hazard_curve_id=( container_ids['q%s' % quantile]), poes=q_curve.tolist(), location=site.wkt) ) # then means if self.hc.mean_hazard_curves: m_curve = mean_curve( curves_poes, weights=curves_weights ) inserter.add( models.HazardCurveData( hazard_curve_id=container_ids['mean'], poes=m_curve.tolist(), location=site.wkt) ) inserter.flush()
def compute_disagg(job_id, sites, lt_rlz_id, ltp): """ Calculate disaggregation histograms and saving the results to the database. Here is the basic calculation workflow: 1. Get all sources 2. Get IMTs 3. Get the hazard curve for each point, IMT, and realization 4. For each `poes_disagg`, interpolate the IML for each curve. 5. Get GSIMs, TOM (Temporal Occurence Model), and truncation level. 6. Get histogram bin edges. 7. Prepare calculation args. 8. Call the hazardlib calculator (see :func:`openquake.hazardlib.calc.disagg.disaggregation` for more info). :param int job_id: ID of the currently running :class:`openquake.engine.db.models.OqJob` :param list sites: `list` of :class:`openquake.hazardlib.site.Site` objects, which indicate the locations (and associated soil parameters) for which we need to compute disaggregation histograms. :param int lt_rlz_id: ID of the :class:`openquake.engine.db.models.LtRealization` for which we want to compute disaggregation histograms. This realization will determine which hazard curve results to use as a basis for the calculation. :param ltp: a :class:`openquake.engine.input.LogicTreeProcessor` instance """ # Silencing 'Too many local variables' # pylint: disable=R0914 logs.LOG.debug( '> computing disaggregation for %(np)s sites for realization %(rlz)s' % dict(np=len(sites), rlz=lt_rlz_id)) job = models.OqJob.objects.get(id=job_id) hc = job.hazard_calculation lt_rlz = models.LtRealization.objects.get(id=lt_rlz_id) apply_uncertainties = ltp.parse_source_model_logictree_path( lt_rlz.sm_lt_path) gsims = ltp.parse_gmpe_logictree_path(lt_rlz.gsim_lt_path) src_ids = models.SourceProgress.objects.filter(lt_realization=lt_rlz)\ .order_by('id').values_list('parsed_source_id', flat=True) sources = [ apply_uncertainties(s.nrml) for s in models.ParsedSource.objects.filter(pk__in=src_ids) ] # Make filters for distance to source and distance to rupture: # a better approach would be to filter the sources on distance # before, see the comment in the classical calculator src_site_filter = openquake.hazardlib.calc.filters.\ source_site_distance_filter(hc.maximum_distance) rup_site_filter = openquake.hazardlib.calc.filters.\ rupture_site_distance_filter(hc.maximum_distance) for imt, imls in hc.intensity_measure_types_and_levels.iteritems(): hazardlib_imt = haz_general.imt_to_hazardlib(imt) hc_im_type, sa_period, sa_damping = models.parse_imt(imt) imls = numpy.array(imls[::-1]) # loop over sites for site in sites: # get curve for this point/IMT/realization [curve] = models.HazardCurveData.objects.filter( location=site.location.wkt2d, hazard_curve__lt_realization=lt_rlz_id, hazard_curve__imt=hc_im_type, hazard_curve__sa_period=sa_period, hazard_curve__sa_damping=sa_damping, ) # If the hazard curve is all zeros, don't even do the # disagg calculation. if all([x == 0.0 for x in curve.poes]): logs.LOG.debug( '* hazard curve contained all 0 probability values; ' 'skipping') continue for poe in hc.poes_disagg: iml = numpy.interp(poe, curve.poes[::-1], imls) calc_kwargs = { 'sources': sources, 'site': site, 'imt': hazardlib_imt, 'iml': iml, 'gsims': gsims, 'time_span': hc.investigation_time, 'truncation_level': hc.truncation_level, 'n_epsilons': hc.num_epsilon_bins, 'mag_bin_width': hc.mag_bin_width, 'dist_bin_width': hc.distance_bin_width, 'coord_bin_width': hc.coordinate_bin_width, 'source_site_filter': src_site_filter, 'rupture_site_filter': rup_site_filter, } with EnginePerformanceMonitor('computing disaggregation', job_id, disagg_task): bin_edges, diss_matrix = openquake.hazardlib.calc.\ disagg.disaggregation_poissonian(**calc_kwargs) if not bin_edges: # no ruptures generated continue with EnginePerformanceMonitor('saving disaggregation', job_id, disagg_task): _save_disagg_matrix(job, site, bin_edges, diss_matrix, lt_rlz, hc.investigation_time, hc_im_type, iml, poe, sa_period, sa_damping) with transaction.commit_on_success(): # Update realiation progress, # mark realization as complete if it is done haz_general.update_realization(lt_rlz_id, len(sites)) logs.LOG.debug('< done computing disaggregation')
def test_complete_event_based_calculation_cycle(self): # * Run `pre_execute()`. # * Execute the `stochastic_event_sets` task as a normal function. # * Check that the proper results (GMF, SES) were computed. # * Finally, call `post_execute()` and verify that `complete logic # tree` artifacts were created. # There 4 sources in the test input model; we can test them all with 1 # task. sources_per_task = 4 self.calc.pre_execute() # Test the job stats: job_stats = models.JobStats.objects.get(oq_job=self.job.id) # num sources * num lt samples / block size (items per task): self.assertEqual(8, job_stats.num_tasks) self.assertEqual(121, job_stats.num_sites) self.assertEqual(2, job_stats.num_realizations) self.job.is_running = True self.job.status = 'executing' self.job.save() hc = self.job.hazard_calculation rlz1, rlz2 = models.LtRealization.objects.filter( hazard_calculation=hc.id).order_by('ordinal') task_arg_gen = self.calc.task_arg_gen(sources_per_task) task_arg_list = list(task_arg_gen) self.assertEqual(2, len(task_arg_list)) # Now test the completion signal messaging of the task: def test_callback(body, message): self.assertEqual( dict(job_id=self.job.id, num_items=sources_per_task), body) message.ack() exchange, conn_args = base.exchange_and_conn_args() routing_key = base.ROUTING_KEY_FMT % dict(job_id=self.job.id) task_signal_queue = kombu.Queue( 'htasks.job.%s' % self.job.id, exchange=exchange, routing_key=routing_key, durable=False, auto_delete=True) with kombu.BrokerConnection(**conn_args) as conn: task_signal_queue(conn.channel()).declare() with conn.Consumer(task_signal_queue, callbacks=[test_callback]): # call the task as a normal function for args in task_arg_list: core.ses_and_gmfs(*args) # wait for the completion signal conn.drain_events() # Check the 'total' counter (computed by the task arg generator): # 2 realizations * 4 sources = 8 total self.assertEqual(8, self.calc.progress['total']) # Now check that we saved the right number of ruptures to the DB. ruptures1 = models.SESRupture.objects.filter( ses__ses_collection__lt_realization=rlz1) self.assertEqual(118, ruptures1.count()) ruptures2 = models.SESRupture.objects.filter( ses__ses_collection__lt_realization=rlz2) self.assertEqual(92, ruptures2.count()) # Check that we have the right number of gmf_sets. # The correct number is (num_realizations * ses_per_logic_tree_path). gmf_sets = models.GmfSet.objects.filter( gmf_collection__output__oq_job=self.job.id, complete_logic_tree_gmf=False) # 2 realizations, 5 ses_per_logic_tree_path self.assertEqual(10, gmf_sets.count()) for imt in hc.intensity_measure_types: imt, sa_period, sa_damping = models.parse_imt(imt) # Now check that we have the right number of GMFs in the DB. for gmf_set in gmf_sets: # For each gmf_set, we should have a number of GMF records # equal to the numbers of sites in the calculation, _per_ IMT. # In this case, that's 121. gmfs = models.Gmf.objects.filter( gmf_set=gmf_set, imt=imt, sa_period=sa_period, sa_damping=sa_damping) # Sanity check: make sure they all came from the same task: task_ord = gmfs[0].result_grp_ordinal self.assertTrue( all(x.result_grp_ordinal == task_ord for x in gmfs)) # Expected number of ruptures: exp_n_rups = models.SESRupture.objects.filter( ses__ses_collection__output__oq_job=self.job.id, ses__ordinal=gmf_set.ses_ordinal, result_grp_ordinal=task_ord).count() self.assertEqual(121, gmfs.count()) self.assertTrue(all(len(x.gmvs) == exp_n_rups for x in gmfs)) # TODO: At some point, we'll need to test the actual values of these # ruptures. We'll need to collect QA test data for this. # Check the complete logic tree SES and make sure it contains # all of the ruptures. complete_lt_ses = models.SES.objects.get( ses_collection__output__oq_job=self.job.id, ses_collection__output__output_type='complete_lt_ses', complete_logic_tree_ses=True) clt_ses_ruptures = models.SESRupture.objects.filter( ses=complete_lt_ses.id) self.assertEqual(210, clt_ses_ruptures.count()) # Test the computed `investigation_time` # 2 lt realizations * 5 ses_per_logic_tree_path * 50.0 years self.assertEqual(500.0, complete_lt_ses.investigation_time) self.assertIsNone(complete_lt_ses.ordinal)
def finalize_hazard_curves(self): """ Create the final output records for hazard curves. This is done by copying the temporary results from `htemp.hazard_curve_progress` to `hzrdr.hazard_curve` (for metadata) and `hzrdr.hazard_curve_data` (for the actual curve PoE values). Foreign keys are made from `hzrdr.hazard_curve` to `hzrdr.lt_realization` (realization information is need to export the full hazard curve results). """ im = self.hc.intensity_measure_types_and_levels points = self.hc.points_to_compute() # prepare site locations for the stored function call lons = '{%s}' % ', '.join(str(v) for v in points.lons) lats = '{%s}' % ', '.join(str(v) for v in points.lats) realizations = models.LtRealization.objects.filter( hazard_calculation=self.hc.id) for rlz in realizations: # create a new `HazardCurve` 'container' record for each # realization (virtual container for multiple imts) models.HazardCurve.objects.create( output=models.Output.objects.create_output( self.job, "hc-multi-imt-rlz-%s" % rlz.id, "hazard_curve_multi"), lt_realization=rlz, imt=None, investigation_time=self.hc.investigation_time) # create a new `HazardCurve` 'container' record for each # realization for each intensity measure type for imt, imls in im.items(): hc_im_type, sa_period, sa_damping = models.parse_imt(imt) hco = models.Output.objects.create( oq_job=self.job, display_name="hc-rlz-%s" % rlz.id, output_type='hazard_curve', ) haz_curve = models.HazardCurve( output=hco, lt_realization=rlz, investigation_time=self.hc.investigation_time, imt=hc_im_type, imls=imls, sa_period=sa_period, sa_damping=sa_damping, ) haz_curve.save() with transaction.commit_on_success(using='reslt_writer'): cursor = connections['reslt_writer'].cursor() # TODO(LB): I don't like the fact that we have to pass # potentially huge arguments (100k sites, for example). # I would like to be able to fetch this site data from # the stored function, but at the moment, the only form # available is a pickled `SiteCollection` object, and I've # experienced problems trying to import third-party libs # in a DB function context and could not get it to reliably # work. # As a fix, in addition to caching the pickled # SiteCollection in the DB, we could store also arrays for # lons and lats. It's duplicated information, but we have a # relatively low number of HazardCalculation records, so it # shouldn't be a big deal. cursor.execute( """ SELECT hzrdr.finalize_hazard_curves( %s, %s, %s, %s, %s, %s) """, [self.hc.id, rlz.id, haz_curve.id, imt, lons, lats])
def do_aggregate_post_proc(self): """ Grab hazard data for all realizations and sites from the database and compute mean and/or quantile aggregates (depending on which options are enabled in the calculation). Post-processing results will be stored directly into the database. """ num_rlzs = models.LtRealization.objects.filter( hazard_calculation=self.hc).count() num_site_blocks_per_incr = int(CURVE_CACHE_SIZE) / int(num_rlzs) if num_site_blocks_per_incr == 0: # This means we have `num_rlzs` >= `CURVE_CACHE_SIZE`. # The minimum number of sites should be 1. num_site_blocks_per_incr = 1 slice_incr = num_site_blocks_per_incr * num_rlzs # unit: num records if self.hc.mean_hazard_curves: # create a new `HazardCurve` 'container' record for mean # curves (virtual container for multiple imts) models.HazardCurve.objects.create( output=models.Output.objects.create_output( self.job, "mean-curves-multi-imt", "hazard_curve_multi"), statistics="mean", imt=None, investigation_time=self.hc.investigation_time) if self.hc.quantile_hazard_curves: for quantile in self.hc.quantile_hazard_curves: # create a new `HazardCurve` 'container' record for quantile # curves (virtual container for multiple imts) models.HazardCurve.objects.create( output=models.Output.objects.create_output( self.job, 'quantile(%s)-curves' % quantile, "hazard_curve_multi"), statistics="quantile", imt=None, quantile=quantile, investigation_time=self.hc.investigation_time) for imt, imls in self.hc.intensity_measure_types_and_levels.items(): im_type, sa_period, sa_damping = models.parse_imt(imt) # prepare `output` and `hazard_curve` containers in the DB: container_ids = dict() if self.hc.mean_hazard_curves: mean_output = models.Output.objects.create_output( job=self.job, display_name='mean-curves-%s' % imt, output_type='hazard_curve') mean_hc = models.HazardCurve.objects.create( output=mean_output, investigation_time=self.hc.investigation_time, imt=im_type, imls=imls, sa_period=sa_period, sa_damping=sa_damping, statistics='mean') container_ids['mean'] = mean_hc.id if self.hc.quantile_hazard_curves: for quantile in self.hc.quantile_hazard_curves: q_output = models.Output.objects.create_output( job=self.job, display_name=('quantile(%s)-curves-%s' % (quantile, imt)), output_type='hazard_curve') q_hc = models.HazardCurve.objects.create( output=q_output, investigation_time=self.hc.investigation_time, imt=im_type, imls=imls, sa_period=sa_period, sa_damping=sa_damping, statistics='quantile', quantile=quantile) container_ids['q%s' % quantile] = q_hc.id all_curves_for_imt = models.order_by_location( models.HazardCurveData.objects.all_curves_for_imt( self.job.id, im_type, sa_period, sa_damping)) with transaction.commit_on_success(using='reslt_writer'): inserter = writer.CacheInserter(models.HazardCurveData, CURVE_CACHE_SIZE) for chunk in models.queryset_iter(all_curves_for_imt, slice_incr): # slice each chunk by `num_rlzs` into `site_chunk` # and compute the aggregate for site_chunk in block_splitter(chunk, num_rlzs): site = site_chunk[0].location curves_poes = [x.poes for x in site_chunk] curves_weights = [x.weight for x in site_chunk] # do means and quantiles # quantiles first: if self.hc.quantile_hazard_curves: for quantile in self.hc.quantile_hazard_curves: if self.hc.number_of_logic_tree_samples == 0: # explicitly weighted quantiles q_curve = weighted_quantile_curve( curves_poes, curves_weights, quantile) else: # implicitly weighted quantiles q_curve = quantile_curve( curves_poes, quantile) inserter.add( models.HazardCurveData( hazard_curve_id=( container_ids['q%s' % quantile]), poes=q_curve.tolist(), location=site.wkt)) # then means if self.hc.mean_hazard_curves: m_curve = mean_curve(curves_poes, weights=curves_weights) inserter.add( models.HazardCurveData( hazard_curve_id=container_ids['mean'], poes=m_curve.tolist(), location=site.wkt)) inserter.flush()
def compute_disagg(job_id, sites, lt_rlz_id, ltp): """ Calculate disaggregation histograms and saving the results to the database. Here is the basic calculation workflow: 1. Get all sources 2. Get IMTs 3. Get the hazard curve for each point, IMT, and realization 4. For each `poes_disagg`, interpolate the IML for each curve. 5. Get GSIMs, TOM (Temporal Occurence Model), and truncation level. 6. Get histogram bin edges. 7. Prepare calculation args. 8. Call the hazardlib calculator (see :func:`openquake.hazardlib.calc.disagg.disaggregation` for more info). :param int job_id: ID of the currently running :class:`openquake.engine.db.models.OqJob` :param list sites: `list` of :class:`openquake.hazardlib.site.Site` objects, which indicate the locations (and associated soil parameters) for which we need to compute disaggregation histograms. :param int lt_rlz_id: ID of the :class:`openquake.engine.db.models.LtRealization` for which we want to compute disaggregation histograms. This realization will determine which hazard curve results to use as a basis for the calculation. :param ltp: a :class:`openquake.engine.input.LogicTreeProcessor` instance """ # Silencing 'Too many local variables' # pylint: disable=R0914 logs.LOG.debug( '> computing disaggregation for %(np)s sites for realization %(rlz)s' % dict(np=len(sites), rlz=lt_rlz_id)) job = models.OqJob.objects.get(id=job_id) hc = job.hazard_calculation lt_rlz = models.LtRealization.objects.get(id=lt_rlz_id) apply_uncertainties = ltp.parse_source_model_logictree_path( lt_rlz.sm_lt_path) gsims = ltp.parse_gmpe_logictree_path(lt_rlz.gsim_lt_path) src_ids = models.SourceProgress.objects.filter(lt_realization=lt_rlz)\ .order_by('id').values_list('parsed_source_id', flat=True) sources = [apply_uncertainties(s.nrml) for s in models.ParsedSource.objects.filter(pk__in=src_ids)] # Make filters for distance to source and distance to rupture: # a better approach would be to filter the sources on distance # before, see the comment in the classical calculator src_site_filter = openquake.hazardlib.calc.filters.\ source_site_distance_filter(hc.maximum_distance) rup_site_filter = openquake.hazardlib.calc.filters.\ rupture_site_distance_filter(hc.maximum_distance) for imt, imls in hc.intensity_measure_types_and_levels.iteritems(): hazardlib_imt = haz_general.imt_to_hazardlib(imt) hc_im_type, sa_period, sa_damping = models.parse_imt(imt) imls = numpy.array(imls[::-1]) # loop over sites for site in sites: # get curve for this point/IMT/realization [curve] = models.HazardCurveData.objects.filter( location=site.location.wkt2d, hazard_curve__lt_realization=lt_rlz_id, hazard_curve__imt=hc_im_type, hazard_curve__sa_period=sa_period, hazard_curve__sa_damping=sa_damping, ) # If the hazard curve is all zeros, don't even do the # disagg calculation. if all(x == 0.0 for x in curve.poes): logs.LOG.debug( '* hazard curve contained all 0 probability values; ' 'skipping') continue for poe in hc.poes_disagg: iml = numpy.interp(poe, curve.poes[::-1], imls) calc_kwargs = { 'sources': sources, 'site': site, 'imt': hazardlib_imt, 'iml': iml, 'gsims': gsims, 'time_span': hc.investigation_time, 'truncation_level': hc.truncation_level, 'n_epsilons': hc.num_epsilon_bins, 'mag_bin_width': hc.mag_bin_width, 'dist_bin_width': hc.distance_bin_width, 'coord_bin_width': hc.coordinate_bin_width, 'source_site_filter': src_site_filter, 'rupture_site_filter': rup_site_filter, } with EnginePerformanceMonitor( 'computing disaggregation', job_id, disagg_task): bin_edges, diss_matrix = openquake.hazardlib.calc.\ disagg.disaggregation_poissonian(**calc_kwargs) if not bin_edges: # no ruptures generated continue with EnginePerformanceMonitor( 'saving disaggregation', job_id, disagg_task): _save_disagg_matrix( job, site, bin_edges, diss_matrix, lt_rlz, hc.investigation_time, hc_im_type, iml, poe, sa_period, sa_damping ) with transaction.commit_on_success(): # Update realiation progress, # mark realization as complete if it is done haz_general.update_realization(lt_rlz_id, len(sites)) logs.LOG.debug('< done computing disaggregation')