Esempio n. 1
0
def gmf_to_hazard_curve_arg_gen(job):
    """
    Generate a sequence of args for the GMF to hazard curve post-processing job
    for a given ``job``. These are task args.

    Yielded arguments are as follows:

    * job ID
    * point geometry
    * logic tree realization ID
    * IMT
    * IMLs
    * hazard curve "collection" ID
    * investigation time
    * duration
    * SA period
    * SA damping

    See :func:`gmf_to_hazard_curve_task` for more information about these
    arguments.

    As a side effect, :class:`openquake.engine.db.models.HazardCurve`
    records are
    created for each :class:`openquake.engine.db.models.LtRealization` and IMT.

    :param job:
        :class:`openquake.engine.db.models.OqJob` instance.
    """
    hc = job.hazard_calculation
    sites = models.HazardSite.objects.filter(hazard_calculation=hc)

    lt_realizations = models.LtRealization.objects.filter(
        hazard_calculation=hc.id)

    invest_time = hc.investigation_time
    duration = hc.ses_per_logic_tree_path * invest_time

    for raw_imt, imls in hc.intensity_measure_types_and_levels.iteritems():
        imt, sa_period, sa_damping = models.parse_imt(raw_imt)

        for lt_rlz in lt_realizations:
            hc_output = models.Output.objects.create_output(
                job,
                HAZ_CURVE_DISP_NAME_FMT % dict(imt=raw_imt, rlz=lt_rlz.id),
                'hazard_curve')

            # Create the hazard curve "collection":
            hc_coll = models.HazardCurve.objects.create(
                output=hc_output,
                lt_realization=lt_rlz,
                investigation_time=invest_time,
                imt=imt,
                imls=imls,
                sa_period=sa_period,
                sa_damping=sa_damping)

            for site in sites:
                yield (job.id, site, lt_rlz.id, imt, imls, hc_coll.id,
                       invest_time, duration, sa_period, sa_damping)
Esempio n. 2
0
    def __init__(self, hazard_output_id, imt):
        imt, sa_period, sa_damping = models.parse_imt(imt)

        self._imt = imt
        self._sa_period = sa_period
        self._sa_damping = sa_damping
        self._hazard_output_id = hazard_output_id
        self._cache = {}
Esempio n. 3
0
    def finalize_hazard_curves(self):
        """
        Create the final output records for hazard curves. This is done by
        copying the temporary results from `htemp.hazard_curve_progress` to
        `hzrdr.hazard_curve` (for metadata) and `hzrdr.hazard_curve_data` (for
        the actual curve PoE values). Foreign keys are made from
        `hzrdr.hazard_curve` to `hzrdr.lt_realization` (realization information
        is need to export the full hazard curve results).
        """
        with transaction.commit_on_success(using='reslt_writer'):
            im = self.hc.intensity_measure_types_and_levels
            points = self.computation_mesh

            realizations = models.LtRealization.objects.filter(
                hazard_calculation=self.hc.id)

            for rlz in realizations:
                # create a new `HazardCurve` 'container' record for each
                # realization for each intensity measure type
                for imt, imls in im.items():
                    hc_im_type, sa_period, sa_damping = models.parse_imt(imt)

                    hco = models.Output(
                        owner=self.hc.owner,
                        oq_job=self.job,
                        display_name="hc-rlz-%s" % rlz.id,
                        output_type='hazard_curve',
                    )
                    hco.save()

                    haz_curve = models.HazardCurve(
                        output=hco,
                        lt_realization=rlz,
                        investigation_time=self.hc.investigation_time,
                        imt=hc_im_type,
                        imls=imls,
                        sa_period=sa_period,
                        sa_damping=sa_damping,
                    )
                    haz_curve.save()

                    [hc_progress] = models.HazardCurveProgress.objects.filter(
                        lt_realization=rlz.id, imt=imt)

                    hc_data_inserter = writer.BulkInserter(
                        models.HazardCurveData)
                    for i, location in enumerate(points):
                        poes = hc_progress.result_matrix[i]
                        hc_data_inserter.add_entry(
                            hazard_curve_id=haz_curve.id,
                            poes=poes.tolist(),
                            location=location.wkt2d,
                            weight=rlz.weight
                        )

                    hc_data_inserter.flush()
Esempio n. 4
0
 def setup(self):
     """
     Initialize private variables of an hazard getter. Called by
     ``__init__`` and by ``__setstate``.
     """
     self._assets_extent = geo.mesh.Mesh.from_points_list([
         geo.point.Point(asset.site.x, asset.site.y)
         for asset in self.assets]).get_convex_hull()
     self._imt, self._sa_period, self._sa_damping = (
         models.parse_imt(self.imt))
     self.asset_dict = dict((asset.id, asset) for asset in self.assets)
Esempio n. 5
0
    def __init__(self, hazard_id, imt, assets, max_distance):
        self.hazard_id = hazard_id
        self.imt = imt
        self.assets = assets
        self.max_distance = max_distance

        self._assets_mesh = geo.mesh.Mesh.from_points_list(
            [geo.point.Point(asset.site.x, asset.site.y) for asset in self.assets]
        )
        self._imt, self._sa_period, self._sa_damping = models.parse_imt(self.imt)
        self.asset_dict = dict((asset.id, asset) for asset in self.assets)
        self._cache = {}
Esempio n. 6
0
 def __init__(self, hazard_outputs, assets, max_distance, imt):
     self.hazard_outputs = hazard_outputs
     self.assets = assets
     self.max_distance = max_distance
     self.imt = imt
     self.imt_type, self.sa_period, self.sa_damping = models.parse_imt(imt)
     # FIXME(lp). It is better to directly store the convex hull
     # instead of the mesh. We are not doing it because
     # hazardlib.Polygon is not (yet) pickeable
     self._assets_mesh = geo.mesh.Mesh.from_points_list([
         geo.point.Point(asset.site.x, asset.site.y)
         for asset in self.assets])
     self.asset_dict = dict((asset.id, asset) for asset in self.assets)
Esempio n. 7
0
 def __init__(self, hazard_outputs, assets, max_distance, imt):
     self.hazard_outputs = hazard_outputs
     self.assets = assets
     self.max_distance = max_distance
     self.imt = imt
     self.imt_type, self.sa_period, self.sa_damping = models.parse_imt(imt)
     # FIXME(lp). It is better to directly store the convex hull
     # instead of the mesh. We are not doing it because
     # hazardlib.Polygon is not (yet) pickeable
     self._assets_mesh = geo.mesh.Mesh.from_points_list([
         geo.point.Point(asset.site.x, asset.site.y)
         for asset in self.assets
     ])
     self.asset_dict = dict((asset.id, asset) for asset in self.assets)
Esempio n. 8
0
    def hazard_outputs(self, hazard_calculation):
        """
        :returns: a list of :class:`openquake.engine.db.models.HazardCurve`
        object that stores the hazard curves associated to
        `hazard_calculation` that are associated with a realization
        """

        imt, sa_period, sa_damping = models.parse_imt(self.imt)
        return hazard_calculation.oqjob_set.filter(status="complete").latest(
            'last_update').output_set.filter(
                output_type='hazard_curve',
                hazardcurve__imt=imt,
                hazardcurve__sa_period=sa_period,
                hazardcurve__sa_damping=sa_damping,
                hazardcurve__lt_realization__isnull=False).order_by('id')
Esempio n. 9
0
 def __init__(self, hazard_output, assets, max_distance, imt):
     self.hazard_output = hazard_output
     hazard = hazard_output.output_container
     self.hazard_id = hazard.id
     self.assets = assets
     self.max_distance = max_distance
     self.imt = imt
     self.imt_type, self.sa_period, self.sa_damping = models.parse_imt(imt)
     if hasattr(hazard, 'lt_realization') and hazard.lt_realization:
         self.weight = hazard.lt_realization.weight
     else:
         self.weight = None
     # FIXME(lp). It is better to directly store the convex hull
     # instead of the mesh. We are not doing it because
     # hazardlib.Polygon is not (yet) pickeable
     self._assets_mesh = geo.mesh.Mesh.from_points_list([
         geo.point.Point(asset.site.x, asset.site.y)
         for asset in self.assets])
     self.asset_dict = dict((asset.id, asset) for asset in self.assets)
Esempio n. 10
0
def import_gmf_scenario(fileobj, user=None):
    """
    Parse the file with the GMF fields and import it into the table
    gmf_scenario. It also creates a new output record, unrelated to a job.
    Works both with XML files and tab-separated files with format
    (imt, gmvs, location).
    :returns: the generated :class:`openquake.engine.db.models.Output` object
    and the generated :class:`openquake.engine.db.models.HazardCalculation`
    object.
    """
    fname = fileobj.name

    owner = models.OqUser.objects.get(user_name=user) \
        if user else get_current_user()

    hc = models.HazardCalculation.objects.create(
        owner=owner,
        base_path=os.path.dirname(fname),
        description='Scenario importer, file %s' % os.path.basename(fname),
        calculation_mode='scenario', maximum_distance=100)
    # XXX: probably the maximum_distance should be entered by the user

    out = models.Output.objects.create(
        owner=owner, display_name='Imported from %r' % fname,
        output_type='gmf_scenario')

    gmf_coll = models.Gmf.objects.create(output=out)

    rows = []
    if fname.endswith('.xml'):
        # convert the XML into a tab-separated StringIO
        for imt, gmvs, loc in GMFScenarioParser(fileobj).parse():
            imt_type, sa_period, sa_damping = models.parse_imt(imt)
            sa_period = '\N' if sa_period is None else str(sa_period)
            sa_damping = '\N' if sa_damping is None else str(sa_damping)
            gmvs = '{%s}' % str(gmvs)[1:-1]
            rows.append([imt_type, sa_period, sa_damping, gmvs, loc])
    else:  # assume a tab-separated file
        for line in fileobj:
            rows.append(line.split('\t'))
    import_rows(hc, gmf_coll, rows)
    return out, hc
Esempio n. 11
0
def import_gmf_scenario(fileobj):
    """
    Parse the file with the GMF fields and import it into the table
    gmf_scenario. It also creates a new output record, unrelated to a job.
    Works both with XML files and tab-separated files with format
    (imt, gmvs, location).
    :returns: the generated :class:`openquake.engine.db.models.Output` object
    and the generated :class:`openquake.engine.db.models.HazardCalculation`
    object.
    """
    fname = fileobj.name

    hc = models.HazardCalculation.objects.create(
        base_path=os.path.dirname(fname),
        description="Scenario importer, file %s" % os.path.basename(fname),
        calculation_mode="scenario",
        maximum_distance=100,
    )
    # XXX: probably the maximum_distance should be entered by the user

    out = models.Output.objects.create(display_name="Imported from %r" % fname, output_type="gmf_scenario")

    gmf_coll = models.Gmf.objects.create(output=out)

    rows = []
    if fname.endswith(".xml"):
        # convert the XML into a tab-separated StringIO
        for imt, gmvs, loc in GMFScenarioParser(fileobj).parse():
            imt_type, sa_period, sa_damping = models.parse_imt(imt)
            sa_period = "\N" if sa_period is None else str(sa_period)
            sa_damping = "\N" if sa_damping is None else str(sa_damping)
            gmvs = "{%s}" % str(gmvs)[1:-1]
            rows.append([imt_type, sa_period, sa_damping, gmvs, loc])
    else:  # assume a tab-separated file
        for line in fileobj:
            rows.append(line.split("\t"))
    import_rows(hc, gmf_coll, rows)
    return out, hc
Esempio n. 12
0
def import_gmf_scenario(fileobj):
    """
    Parse the file with the GMF fields and import it into the table
    gmf_scenario. It also creates a new output record, unrelated to a job.
    Works both with XML files and tab-separated files with format
    (imt, gmvs, location).
    :returns: the generated :class:`openquake.engine.db.models.Output` object
    and the generated :class:`openquake.engine.db.models.HazardCalculation`
    object.
    """
    fname = fileobj.name

    hc = models.HazardCalculation.objects.create(
        base_path=os.path.dirname(fname),
        description='Scenario importer, file %s' % os.path.basename(fname),
        calculation_mode='scenario',
        maximum_distance=100)
    # XXX: probably the maximum_distance should be entered by the user

    out = models.Output.objects.create(display_name='Imported from %r' % fname,
                                       output_type='gmf_scenario')

    gmf_coll = models.Gmf.objects.create(output=out)

    rows = []
    if fname.endswith('.xml'):
        # convert the XML into a tab-separated StringIO
        for imt, gmvs, loc in GMFScenarioParser(fileobj).parse():
            imt_type, sa_period, sa_damping = models.parse_imt(imt)
            sa_period = '\N' if sa_period is None else str(sa_period)
            sa_damping = '\N' if sa_damping is None else str(sa_damping)
            gmvs = '{%s}' % str(gmvs)[1:-1]
            rows.append([imt_type, sa_period, sa_damping, gmvs, loc])
    else:  # assume a tab-separated file
        for line in fileobj:
            rows.append(line.split('\t'))
    import_rows(hc, gmf_coll, rows)
    return out, hc
Esempio n. 13
0
def extract(hc_id, a_writer):
    hc = models.HazardCalculation.objects.get(pk=hc_id)

    for lt in models.LtRealization.objects.filter(hazard_calculation=hc):

        for imt in hc.intensity_measure_types:
            imt_type, sa_period, _ = models.parse_imt(imt)

            if imt_type == "PGA":
                imt_type_fix = "SA"
                sa_period_fix = 0
            else:
                imt_type_fix = imt_type
                sa_period_fix = sa_period

            ruptures = sorted([
                r.id for r in models.SESRupture.objects.filter(
                    ses__ses_collection__lt_realization=lt)
            ])

            for site in hc.hazardsite_set.all().order_by('id'):
                gmvs = []
                gmvs_data = dict()

                for ses in models.SES.objects.filter(
                        ses_collection__lt_realization=lt).order_by('id'):

                    for gmf in models.GmfData.objects.filter(
                            ses=ses, site=site, imt=imt_type,
                            sa_period=sa_period):

                        gmvs_data.update(dict(zip(gmf.rupture_ids, gmf.gmvs)))
                gmvs.extend([gmvs_data.get(r, 0.0) for r in ruptures])
                a_writer.writerow([
                    lt.id, site.location.x, site.location.y, imt_type_fix,
                    sa_period_fix
                ] + gmvs)
Esempio n. 14
0
def extract(hc_id, a_writer):
    hc = models.HazardCalculation.objects.get(pk=hc_id)

    for lt in models.LtRealization.objects.filter(
            hazard_calculation=hc):

        for imt in hc.intensity_measure_types:
            imt_type, sa_period, _ = models.parse_imt(imt)

            if imt_type == "PGA":
                imt_type_fix = "SA"
                sa_period_fix = 0
            else:
                imt_type_fix = imt_type
                sa_period_fix = sa_period

            ruptures = sorted(
                [r.id
                 for r in models.SESRupture.objects.filter(
                         ses__ses_collection__lt_realization=lt)])

            for site in hc.hazardsite_set.all().order_by('id'):
                gmvs = []
                gmvs_data = dict()

                for ses in models.SES.objects.filter(
                        ses_collection__lt_realization=lt).order_by('id'):

                    for gmf in models.GmfData.objects.filter(
                            ses=ses,
                            site=site,
                            imt=imt_type, sa_period=sa_period):

                        gmvs_data.update(dict(zip(gmf.rupture_ids, gmf.gmvs)))
                gmvs.extend([gmvs_data.get(r, 0.0) for r in ruptures])
                a_writer.writerow([lt.id, site.location.x, site.location.y,
                                   imt_type_fix, sa_period_fix] + gmvs)
Esempio n. 15
0
 def test_sa(self):
     hc_im_type, sa_period, sa_damping = models.parse_imt("SA(0.1)")
     self.assertEqual("SA", hc_im_type)
     self.assertEqual(0.1, sa_period)
     self.assertEqual(models.DEFAULT_SA_DAMPING, sa_damping)
Esempio n. 16
0
 def test_pga(self):
     hc_im_type, sa_period, sa_damping = models.parse_imt("PGA")
     self.assertEqual("PGA", hc_im_type)
     self.assertEqual(None, sa_period)
     self.assertEqual(None, sa_damping)
Esempio n. 17
0
    def finalize_hazard_curves(self):
        """
        Create the final output records for hazard curves. This is done by
        copying the temporary results from `htemp.hazard_curve_progress` to
        `hzrdr.hazard_curve` (for metadata) and `hzrdr.hazard_curve_data` (for
        the actual curve PoE values). Foreign keys are made from
        `hzrdr.hazard_curve` to `hzrdr.lt_realization` (realization information
        is need to export the full hazard curve results).
        """
        im = self.hc.intensity_measure_types_and_levels
        points = self.hc.points_to_compute()

        # prepare site locations for the stored function call
        lons = '{%s}' % ', '.join(str(v) for v in points.lons)
        lats = '{%s}' % ', '.join(str(v) for v in points.lats)

        realizations = models.LtRealization.objects.filter(
            hazard_calculation=self.hc.id)

        for rlz in realizations:
            # create a new `HazardCurve` 'container' record for each
            # realization (virtual container for multiple imts)
            models.HazardCurve.objects.create(
                output=models.Output.objects.create_output(
                    self.job, "hc-multi-imt-rlz-%s" % rlz.id,
                    "hazard_curve_multi"),
                lt_realization=rlz,
                imt=None,
                investigation_time=self.hc.investigation_time)

            # create a new `HazardCurve` 'container' record for each
            # realization for each intensity measure type
            for imt, imls in im.items():
                hc_im_type, sa_period, sa_damping = models.parse_imt(imt)

                hco = models.Output.objects.create(
                    oq_job=self.job,
                    display_name="Hazard Curve rlz-%s" % rlz.id,
                    output_type='hazard_curve',
                )

                haz_curve = models.HazardCurve(
                    output=hco,
                    lt_realization=rlz,
                    investigation_time=self.hc.investigation_time,
                    imt=hc_im_type,
                    imls=imls,
                    sa_period=sa_period,
                    sa_damping=sa_damping,
                )
                haz_curve.save()

                with transaction.commit_on_success(using='reslt_writer'):
                    cursor = connections['reslt_writer'].cursor()

                    # TODO(LB): I don't like the fact that we have to pass
                    # potentially huge arguments (100k sites, for example).
                    # I would like to be able to fetch this site data from
                    # the stored function, but at the moment, the only form
                    # available is a pickled `SiteCollection` object, and I've
                    # experienced problems trying to import third-party libs
                    # in a DB function context and could not get it to reliably
                    # work.
                    # As a fix, in addition to caching the pickled
                    # SiteCollection in the DB, we could store also arrays for
                    # lons and lats. It's duplicated information, but we have a
                    # relatively low number of HazardCalculation records, so it
                    # shouldn't be a big deal.
                    cursor.execute(
                        """
                        SELECT hzrdr.finalize_hazard_curves(
                            %s, %s, %s, %s, %s, %s)
                        """,
                        [self.hc.id, rlz.id, haz_curve.id, imt, lons, lats]
                    )
Esempio n. 18
0
    def do_aggregate_post_proc(self):
        """
        Grab hazard data for all realizations and sites from the database and
        compute mean and/or quantile aggregates (depending on which options are
        enabled in the calculation).

        Post-processing results will be stored directly into the database.
        """
        num_rlzs = models.LtRealization.objects.filter(
            hazard_calculation=self.hc).count()

        num_site_blocks_per_incr = int(CURVE_CACHE_SIZE) / int(num_rlzs)
        if num_site_blocks_per_incr == 0:
            # This means we have `num_rlzs` >= `CURVE_CACHE_SIZE`.
            # The minimum number of sites should be 1.
            num_site_blocks_per_incr = 1
        slice_incr = num_site_blocks_per_incr * num_rlzs  # unit: num records

        if self.hc.mean_hazard_curves:
            # create a new `HazardCurve` 'container' record for mean
            # curves (virtual container for multiple imts)
            models.HazardCurve.objects.create(
                output=models.Output.objects.create_output(
                    self.job, "mean-curves-multi-imt",
                    "hazard_curve_multi"),
                statistics="mean",
                imt=None,
                investigation_time=self.hc.investigation_time)

        if self.hc.quantile_hazard_curves:
            for quantile in self.hc.quantile_hazard_curves:
                # create a new `HazardCurve` 'container' record for quantile
                # curves (virtual container for multiple imts)
                models.HazardCurve.objects.create(
                    output=models.Output.objects.create_output(
                        self.job, 'quantile(%s)-curves' % quantile,
                        "hazard_curve_multi"),
                    statistics="quantile",
                    imt=None,
                    quantile=quantile,
                    investigation_time=self.hc.investigation_time)

        for imt, imls in self.hc.intensity_measure_types_and_levels.items():
            im_type, sa_period, sa_damping = models.parse_imt(imt)

            # prepare `output` and `hazard_curve` containers in the DB:
            container_ids = dict()
            if self.hc.mean_hazard_curves:
                mean_output = models.Output.objects.create_output(
                    job=self.job,
                    display_name='Mean Hazard Curves %s' % imt,
                    output_type='hazard_curve'
                )
                mean_hc = models.HazardCurve.objects.create(
                    output=mean_output,
                    investigation_time=self.hc.investigation_time,
                    imt=im_type,
                    imls=imls,
                    sa_period=sa_period,
                    sa_damping=sa_damping,
                    statistics='mean'
                )
                container_ids['mean'] = mean_hc.id

            if self.hc.quantile_hazard_curves:
                for quantile in self.hc.quantile_hazard_curves:
                    q_output = models.Output.objects.create_output(
                        job=self.job,
                        display_name=(
                            '%s quantile Hazard Curves %s' % (quantile, imt)
                        ),
                        output_type='hazard_curve'
                    )
                    q_hc = models.HazardCurve.objects.create(
                        output=q_output,
                        investigation_time=self.hc.investigation_time,
                        imt=im_type,
                        imls=imls,
                        sa_period=sa_period,
                        sa_damping=sa_damping,
                        statistics='quantile',
                        quantile=quantile
                    )
                    container_ids['q%s' % quantile] = q_hc.id

            all_curves_for_imt = models.order_by_location(
                models.HazardCurveData.objects.all_curves_for_imt(
                    self.job.id, im_type, sa_period, sa_damping))

            with transaction.commit_on_success(using='reslt_writer'):
                inserter = writer.CacheInserter(
                    models.HazardCurveData, CURVE_CACHE_SIZE)

                for chunk in models.queryset_iter(all_curves_for_imt,
                                                  slice_incr):
                    # slice each chunk by `num_rlzs` into `site_chunk`
                    # and compute the aggregate
                    for site_chunk in block_splitter(chunk, num_rlzs):
                        site = site_chunk[0].location
                        curves_poes = [x.poes for x in site_chunk]
                        curves_weights = [x.weight for x in site_chunk]

                        # do means and quantiles
                        # quantiles first:
                        if self.hc.quantile_hazard_curves:
                            for quantile in self.hc.quantile_hazard_curves:
                                if self.hc.number_of_logic_tree_samples == 0:
                                    # explicitly weighted quantiles
                                    q_curve = weighted_quantile_curve(
                                        curves_poes, curves_weights, quantile
                                    )
                                else:
                                    # implicitly weighted quantiles
                                    q_curve = quantile_curve(
                                        curves_poes, quantile
                                    )
                                inserter.add(
                                    models.HazardCurveData(
                                        hazard_curve_id=(
                                            container_ids['q%s' % quantile]),
                                        poes=q_curve.tolist(),
                                        location=site.wkt)
                                )

                        # then means
                        if self.hc.mean_hazard_curves:
                            m_curve = mean_curve(
                                curves_poes, weights=curves_weights
                            )
                            inserter.add(
                                models.HazardCurveData(
                                    hazard_curve_id=container_ids['mean'],
                                    poes=m_curve.tolist(),
                                    location=site.wkt)
                            )
                inserter.flush()
Esempio n. 19
0
def compute_disagg(job_id, sites, lt_rlz_id, ltp):
    """
    Calculate disaggregation histograms and saving the results to the database.

    Here is the basic calculation workflow:

    1. Get all sources
    2. Get IMTs
    3. Get the hazard curve for each point, IMT, and realization
    4. For each `poes_disagg`, interpolate the IML for each curve.
    5. Get GSIMs, TOM (Temporal Occurence Model), and truncation level.
    6. Get histogram bin edges.
    7. Prepare calculation args.
    8. Call the hazardlib calculator
       (see :func:`openquake.hazardlib.calc.disagg.disaggregation`
       for more info).

    :param int job_id:
        ID of the currently running :class:`openquake.engine.db.models.OqJob`
    :param list sites:
        `list` of :class:`openquake.hazardlib.site.Site` objects, which
        indicate the locations (and associated soil parameters) for which we
        need to compute disaggregation histograms.
    :param int lt_rlz_id:
        ID of the :class:`openquake.engine.db.models.LtRealization` for which
        we want to compute disaggregation histograms. This realization will
        determine which hazard curve results to use as a basis for the
        calculation.
    :param ltp:
        a :class:`openquake.engine.input.LogicTreeProcessor` instance
    """
    # Silencing 'Too many local variables'
    # pylint: disable=R0914
    logs.LOG.debug(
        '> computing disaggregation for %(np)s sites for realization %(rlz)s' %
        dict(np=len(sites), rlz=lt_rlz_id))

    job = models.OqJob.objects.get(id=job_id)
    hc = job.hazard_calculation
    lt_rlz = models.LtRealization.objects.get(id=lt_rlz_id)
    apply_uncertainties = ltp.parse_source_model_logictree_path(
        lt_rlz.sm_lt_path)
    gsims = ltp.parse_gmpe_logictree_path(lt_rlz.gsim_lt_path)

    src_ids = models.SourceProgress.objects.filter(lt_realization=lt_rlz)\
        .order_by('id').values_list('parsed_source_id', flat=True)
    sources = [
        apply_uncertainties(s.nrml)
        for s in models.ParsedSource.objects.filter(pk__in=src_ids)
    ]

    # Make filters for distance to source and distance to rupture:
    # a better approach would be to filter the sources on distance
    # before, see the comment in the classical calculator
    src_site_filter = openquake.hazardlib.calc.filters.\
        source_site_distance_filter(hc.maximum_distance)
    rup_site_filter = openquake.hazardlib.calc.filters.\
        rupture_site_distance_filter(hc.maximum_distance)

    for imt, imls in hc.intensity_measure_types_and_levels.iteritems():
        hazardlib_imt = haz_general.imt_to_hazardlib(imt)
        hc_im_type, sa_period, sa_damping = models.parse_imt(imt)

        imls = numpy.array(imls[::-1])

        # loop over sites
        for site in sites:
            # get curve for this point/IMT/realization
            [curve] = models.HazardCurveData.objects.filter(
                location=site.location.wkt2d,
                hazard_curve__lt_realization=lt_rlz_id,
                hazard_curve__imt=hc_im_type,
                hazard_curve__sa_period=sa_period,
                hazard_curve__sa_damping=sa_damping,
            )

            # If the hazard curve is all zeros, don't even do the
            # disagg calculation.
            if all([x == 0.0 for x in curve.poes]):
                logs.LOG.debug(
                    '* hazard curve contained all 0 probability values; '
                    'skipping')
                continue

            for poe in hc.poes_disagg:
                iml = numpy.interp(poe, curve.poes[::-1], imls)
                calc_kwargs = {
                    'sources': sources,
                    'site': site,
                    'imt': hazardlib_imt,
                    'iml': iml,
                    'gsims': gsims,
                    'time_span': hc.investigation_time,
                    'truncation_level': hc.truncation_level,
                    'n_epsilons': hc.num_epsilon_bins,
                    'mag_bin_width': hc.mag_bin_width,
                    'dist_bin_width': hc.distance_bin_width,
                    'coord_bin_width': hc.coordinate_bin_width,
                    'source_site_filter': src_site_filter,
                    'rupture_site_filter': rup_site_filter,
                }
                with EnginePerformanceMonitor('computing disaggregation',
                                              job_id, disagg_task):
                    bin_edges, diss_matrix = openquake.hazardlib.calc.\
                        disagg.disaggregation_poissonian(**calc_kwargs)
                    if not bin_edges:  # no ruptures generated
                        continue

                with EnginePerformanceMonitor('saving disaggregation', job_id,
                                              disagg_task):
                    _save_disagg_matrix(job, site, bin_edges, diss_matrix,
                                        lt_rlz, hc.investigation_time,
                                        hc_im_type, iml, poe, sa_period,
                                        sa_damping)

    with transaction.commit_on_success():
        # Update realiation progress,
        # mark realization as complete if it is done
        haz_general.update_realization(lt_rlz_id, len(sites))

    logs.LOG.debug('< done computing disaggregation')
Esempio n. 20
0
    def test_complete_event_based_calculation_cycle(self):
        # * Run `pre_execute()`.
        # * Execute the `stochastic_event_sets` task as a normal function.
        # * Check that the proper results (GMF, SES) were computed.
        # * Finally, call `post_execute()` and verify that `complete logic
        #   tree` artifacts were created.

        # There 4 sources in the test input model; we can test them all with 1
        # task.
        sources_per_task = 4

        self.calc.pre_execute()
        # Test the job stats:
        job_stats = models.JobStats.objects.get(oq_job=self.job.id)
        # num sources * num lt samples / block size (items per task):
        self.assertEqual(8, job_stats.num_tasks)
        self.assertEqual(121, job_stats.num_sites)
        self.assertEqual(2, job_stats.num_realizations)

        self.job.is_running = True
        self.job.status = 'executing'
        self.job.save()

        hc = self.job.hazard_calculation

        rlz1, rlz2 = models.LtRealization.objects.filter(
            hazard_calculation=hc.id).order_by('ordinal')

        task_arg_gen = self.calc.task_arg_gen(sources_per_task)
        task_arg_list = list(task_arg_gen)

        self.assertEqual(2, len(task_arg_list))

        # Now test the completion signal messaging of the task:
        def test_callback(body, message):
            self.assertEqual(
                dict(job_id=self.job.id, num_items=sources_per_task), body)
            message.ack()

        exchange, conn_args = base.exchange_and_conn_args()

        routing_key = base.ROUTING_KEY_FMT % dict(job_id=self.job.id)
        task_signal_queue = kombu.Queue(
            'htasks.job.%s' % self.job.id, exchange=exchange,
            routing_key=routing_key, durable=False, auto_delete=True)

        with kombu.BrokerConnection(**conn_args) as conn:
            task_signal_queue(conn.channel()).declare()
            with conn.Consumer(task_signal_queue, callbacks=[test_callback]):
                # call the task as a normal function
                for args in task_arg_list:
                    core.ses_and_gmfs(*args)

                    # wait for the completion signal
                    conn.drain_events()

        # Check the 'total' counter (computed by the task arg generator):
        # 2 realizations * 4 sources = 8 total
        self.assertEqual(8, self.calc.progress['total'])

        # Now check that we saved the right number of ruptures to the DB.
        ruptures1 = models.SESRupture.objects.filter(
            ses__ses_collection__lt_realization=rlz1)
        self.assertEqual(118, ruptures1.count())

        ruptures2 = models.SESRupture.objects.filter(
            ses__ses_collection__lt_realization=rlz2)
        self.assertEqual(92, ruptures2.count())

        # Check that we have the right number of gmf_sets.
        # The correct number is (num_realizations * ses_per_logic_tree_path).
        gmf_sets = models.GmfSet.objects.filter(
            gmf_collection__output__oq_job=self.job.id,
            complete_logic_tree_gmf=False)
        # 2 realizations, 5 ses_per_logic_tree_path
        self.assertEqual(10, gmf_sets.count())

        for imt in hc.intensity_measure_types:
            imt, sa_period, sa_damping = models.parse_imt(imt)
            # Now check that we have the right number of GMFs in the DB.
            for gmf_set in gmf_sets:

                # For each gmf_set, we should have a number of GMF records
                # equal to the numbers of sites in the calculation, _per_ IMT.
                # In this case, that's 121.
                gmfs = models.Gmf.objects.filter(
                    gmf_set=gmf_set, imt=imt, sa_period=sa_period,
                    sa_damping=sa_damping)

                # Sanity check: make sure they all came from the same task:
                task_ord = gmfs[0].result_grp_ordinal
                self.assertTrue(
                    all(x.result_grp_ordinal == task_ord for x in gmfs))

                # Expected number of ruptures:
                exp_n_rups = models.SESRupture.objects.filter(
                    ses__ses_collection__output__oq_job=self.job.id,
                    ses__ordinal=gmf_set.ses_ordinal,
                    result_grp_ordinal=task_ord).count()

                self.assertEqual(121, gmfs.count())
                self.assertTrue(all(len(x.gmvs) == exp_n_rups for x in gmfs))

        # TODO: At some point, we'll need to test the actual values of these
        # ruptures. We'll need to collect QA test data for this.

        # Check the complete logic tree SES and make sure it contains
        # all of the ruptures.
        complete_lt_ses = models.SES.objects.get(
            ses_collection__output__oq_job=self.job.id,
            ses_collection__output__output_type='complete_lt_ses',
            complete_logic_tree_ses=True)

        clt_ses_ruptures = models.SESRupture.objects.filter(
            ses=complete_lt_ses.id)

        self.assertEqual(210, clt_ses_ruptures.count())

        # Test the computed `investigation_time`
        # 2 lt realizations * 5 ses_per_logic_tree_path * 50.0 years
        self.assertEqual(500.0, complete_lt_ses.investigation_time)

        self.assertIsNone(complete_lt_ses.ordinal)
Esempio n. 21
0
    def finalize_hazard_curves(self):
        """
        Create the final output records for hazard curves. This is done by
        copying the temporary results from `htemp.hazard_curve_progress` to
        `hzrdr.hazard_curve` (for metadata) and `hzrdr.hazard_curve_data` (for
        the actual curve PoE values). Foreign keys are made from
        `hzrdr.hazard_curve` to `hzrdr.lt_realization` (realization information
        is need to export the full hazard curve results).
        """
        im = self.hc.intensity_measure_types_and_levels
        points = self.hc.points_to_compute()

        # prepare site locations for the stored function call
        lons = '{%s}' % ', '.join(str(v) for v in points.lons)
        lats = '{%s}' % ', '.join(str(v) for v in points.lats)

        realizations = models.LtRealization.objects.filter(
            hazard_calculation=self.hc.id)

        for rlz in realizations:
            # create a new `HazardCurve` 'container' record for each
            # realization (virtual container for multiple imts)
            models.HazardCurve.objects.create(
                output=models.Output.objects.create_output(
                    self.job, "hc-multi-imt-rlz-%s" % rlz.id,
                    "hazard_curve_multi"),
                lt_realization=rlz,
                imt=None,
                investigation_time=self.hc.investigation_time)

            # create a new `HazardCurve` 'container' record for each
            # realization for each intensity measure type
            for imt, imls in im.items():
                hc_im_type, sa_period, sa_damping = models.parse_imt(imt)

                hco = models.Output.objects.create(
                    oq_job=self.job,
                    display_name="hc-rlz-%s" % rlz.id,
                    output_type='hazard_curve',
                )

                haz_curve = models.HazardCurve(
                    output=hco,
                    lt_realization=rlz,
                    investigation_time=self.hc.investigation_time,
                    imt=hc_im_type,
                    imls=imls,
                    sa_period=sa_period,
                    sa_damping=sa_damping,
                )
                haz_curve.save()

                with transaction.commit_on_success(using='reslt_writer'):
                    cursor = connections['reslt_writer'].cursor()

                    # TODO(LB): I don't like the fact that we have to pass
                    # potentially huge arguments (100k sites, for example).
                    # I would like to be able to fetch this site data from
                    # the stored function, but at the moment, the only form
                    # available is a pickled `SiteCollection` object, and I've
                    # experienced problems trying to import third-party libs
                    # in a DB function context and could not get it to reliably
                    # work.
                    # As a fix, in addition to caching the pickled
                    # SiteCollection in the DB, we could store also arrays for
                    # lons and lats. It's duplicated information, but we have a
                    # relatively low number of HazardCalculation records, so it
                    # shouldn't be a big deal.
                    cursor.execute(
                        """
                        SELECT hzrdr.finalize_hazard_curves(
                            %s, %s, %s, %s, %s, %s)
                        """,
                        [self.hc.id, rlz.id, haz_curve.id, imt, lons, lats])
Esempio n. 22
0
    def do_aggregate_post_proc(self):
        """
        Grab hazard data for all realizations and sites from the database and
        compute mean and/or quantile aggregates (depending on which options are
        enabled in the calculation).

        Post-processing results will be stored directly into the database.
        """
        num_rlzs = models.LtRealization.objects.filter(
            hazard_calculation=self.hc).count()

        num_site_blocks_per_incr = int(CURVE_CACHE_SIZE) / int(num_rlzs)
        if num_site_blocks_per_incr == 0:
            # This means we have `num_rlzs` >= `CURVE_CACHE_SIZE`.
            # The minimum number of sites should be 1.
            num_site_blocks_per_incr = 1
        slice_incr = num_site_blocks_per_incr * num_rlzs  # unit: num records

        if self.hc.mean_hazard_curves:
            # create a new `HazardCurve` 'container' record for mean
            # curves (virtual container for multiple imts)
            models.HazardCurve.objects.create(
                output=models.Output.objects.create_output(
                    self.job, "mean-curves-multi-imt", "hazard_curve_multi"),
                statistics="mean",
                imt=None,
                investigation_time=self.hc.investigation_time)

        if self.hc.quantile_hazard_curves:
            for quantile in self.hc.quantile_hazard_curves:
                # create a new `HazardCurve` 'container' record for quantile
                # curves (virtual container for multiple imts)
                models.HazardCurve.objects.create(
                    output=models.Output.objects.create_output(
                        self.job, 'quantile(%s)-curves' % quantile,
                        "hazard_curve_multi"),
                    statistics="quantile",
                    imt=None,
                    quantile=quantile,
                    investigation_time=self.hc.investigation_time)

        for imt, imls in self.hc.intensity_measure_types_and_levels.items():
            im_type, sa_period, sa_damping = models.parse_imt(imt)

            # prepare `output` and `hazard_curve` containers in the DB:
            container_ids = dict()
            if self.hc.mean_hazard_curves:
                mean_output = models.Output.objects.create_output(
                    job=self.job,
                    display_name='mean-curves-%s' % imt,
                    output_type='hazard_curve')
                mean_hc = models.HazardCurve.objects.create(
                    output=mean_output,
                    investigation_time=self.hc.investigation_time,
                    imt=im_type,
                    imls=imls,
                    sa_period=sa_period,
                    sa_damping=sa_damping,
                    statistics='mean')
                container_ids['mean'] = mean_hc.id

            if self.hc.quantile_hazard_curves:
                for quantile in self.hc.quantile_hazard_curves:
                    q_output = models.Output.objects.create_output(
                        job=self.job,
                        display_name=('quantile(%s)-curves-%s' %
                                      (quantile, imt)),
                        output_type='hazard_curve')
                    q_hc = models.HazardCurve.objects.create(
                        output=q_output,
                        investigation_time=self.hc.investigation_time,
                        imt=im_type,
                        imls=imls,
                        sa_period=sa_period,
                        sa_damping=sa_damping,
                        statistics='quantile',
                        quantile=quantile)
                    container_ids['q%s' % quantile] = q_hc.id

            all_curves_for_imt = models.order_by_location(
                models.HazardCurveData.objects.all_curves_for_imt(
                    self.job.id, im_type, sa_period, sa_damping))

            with transaction.commit_on_success(using='reslt_writer'):
                inserter = writer.CacheInserter(models.HazardCurveData,
                                                CURVE_CACHE_SIZE)

                for chunk in models.queryset_iter(all_curves_for_imt,
                                                  slice_incr):
                    # slice each chunk by `num_rlzs` into `site_chunk`
                    # and compute the aggregate
                    for site_chunk in block_splitter(chunk, num_rlzs):
                        site = site_chunk[0].location
                        curves_poes = [x.poes for x in site_chunk]
                        curves_weights = [x.weight for x in site_chunk]

                        # do means and quantiles
                        # quantiles first:
                        if self.hc.quantile_hazard_curves:
                            for quantile in self.hc.quantile_hazard_curves:
                                if self.hc.number_of_logic_tree_samples == 0:
                                    # explicitly weighted quantiles
                                    q_curve = weighted_quantile_curve(
                                        curves_poes, curves_weights, quantile)
                                else:
                                    # implicitly weighted quantiles
                                    q_curve = quantile_curve(
                                        curves_poes, quantile)
                                inserter.add(
                                    models.HazardCurveData(
                                        hazard_curve_id=(
                                            container_ids['q%s' % quantile]),
                                        poes=q_curve.tolist(),
                                        location=site.wkt))

                        # then means
                        if self.hc.mean_hazard_curves:
                            m_curve = mean_curve(curves_poes,
                                                 weights=curves_weights)
                            inserter.add(
                                models.HazardCurveData(
                                    hazard_curve_id=container_ids['mean'],
                                    poes=m_curve.tolist(),
                                    location=site.wkt))
                inserter.flush()
Esempio n. 23
0
 def test_pga(self):
     hc_im_type, sa_period, sa_damping = models.parse_imt("PGA")
     self.assertEqual("PGA", hc_im_type)
     self.assertEqual(None, sa_period)
     self.assertEqual(None, sa_damping)
Esempio n. 24
0
def compute_disagg(job_id, sites, lt_rlz_id, ltp):
    """
    Calculate disaggregation histograms and saving the results to the database.

    Here is the basic calculation workflow:

    1. Get all sources
    2. Get IMTs
    3. Get the hazard curve for each point, IMT, and realization
    4. For each `poes_disagg`, interpolate the IML for each curve.
    5. Get GSIMs, TOM (Temporal Occurence Model), and truncation level.
    6. Get histogram bin edges.
    7. Prepare calculation args.
    8. Call the hazardlib calculator
       (see :func:`openquake.hazardlib.calc.disagg.disaggregation`
       for more info).

    :param int job_id:
        ID of the currently running :class:`openquake.engine.db.models.OqJob`
    :param list sites:
        `list` of :class:`openquake.hazardlib.site.Site` objects, which
        indicate the locations (and associated soil parameters) for which we
        need to compute disaggregation histograms.
    :param int lt_rlz_id:
        ID of the :class:`openquake.engine.db.models.LtRealization` for which
        we want to compute disaggregation histograms. This realization will
        determine which hazard curve results to use as a basis for the
        calculation.
    :param ltp:
        a :class:`openquake.engine.input.LogicTreeProcessor` instance
    """
    # Silencing 'Too many local variables'
    # pylint: disable=R0914
    logs.LOG.debug(
        '> computing disaggregation for %(np)s sites for realization %(rlz)s'
        % dict(np=len(sites), rlz=lt_rlz_id))

    job = models.OqJob.objects.get(id=job_id)
    hc = job.hazard_calculation
    lt_rlz = models.LtRealization.objects.get(id=lt_rlz_id)
    apply_uncertainties = ltp.parse_source_model_logictree_path(
        lt_rlz.sm_lt_path)
    gsims = ltp.parse_gmpe_logictree_path(lt_rlz.gsim_lt_path)

    src_ids = models.SourceProgress.objects.filter(lt_realization=lt_rlz)\
        .order_by('id').values_list('parsed_source_id', flat=True)
    sources = [apply_uncertainties(s.nrml)
               for s in models.ParsedSource.objects.filter(pk__in=src_ids)]

    # Make filters for distance to source and distance to rupture:
    # a better approach would be to filter the sources on distance
    # before, see the comment in the classical calculator
    src_site_filter = openquake.hazardlib.calc.filters.\
        source_site_distance_filter(hc.maximum_distance)
    rup_site_filter = openquake.hazardlib.calc.filters.\
        rupture_site_distance_filter(hc.maximum_distance)

    for imt, imls in hc.intensity_measure_types_and_levels.iteritems():
        hazardlib_imt = haz_general.imt_to_hazardlib(imt)
        hc_im_type, sa_period, sa_damping = models.parse_imt(imt)

        imls = numpy.array(imls[::-1])

        # loop over sites
        for site in sites:
            # get curve for this point/IMT/realization
            [curve] = models.HazardCurveData.objects.filter(
                location=site.location.wkt2d,
                hazard_curve__lt_realization=lt_rlz_id,
                hazard_curve__imt=hc_im_type,
                hazard_curve__sa_period=sa_period,
                hazard_curve__sa_damping=sa_damping,
            )

            # If the hazard curve is all zeros, don't even do the
            # disagg calculation.
            if all(x == 0.0 for x in curve.poes):
                logs.LOG.debug(
                    '* hazard curve contained all 0 probability values; '
                    'skipping')
                continue

            for poe in hc.poes_disagg:
                iml = numpy.interp(poe, curve.poes[::-1], imls)
                calc_kwargs = {
                    'sources': sources,
                    'site': site,
                    'imt': hazardlib_imt,
                    'iml': iml,
                    'gsims': gsims,
                    'time_span': hc.investigation_time,
                    'truncation_level': hc.truncation_level,
                    'n_epsilons': hc.num_epsilon_bins,
                    'mag_bin_width': hc.mag_bin_width,
                    'dist_bin_width': hc.distance_bin_width,
                    'coord_bin_width': hc.coordinate_bin_width,
                    'source_site_filter': src_site_filter,
                    'rupture_site_filter': rup_site_filter,
                }
                with EnginePerformanceMonitor(
                        'computing disaggregation', job_id, disagg_task):
                    bin_edges, diss_matrix = openquake.hazardlib.calc.\
                        disagg.disaggregation_poissonian(**calc_kwargs)
                    if not bin_edges:  # no ruptures generated
                        continue

                with EnginePerformanceMonitor(
                        'saving disaggregation', job_id, disagg_task):
                    _save_disagg_matrix(
                        job, site, bin_edges, diss_matrix, lt_rlz,
                        hc.investigation_time, hc_im_type, iml, poe, sa_period,
                        sa_damping
                    )

    with transaction.commit_on_success():
        # Update realiation progress,
        # mark realization as complete if it is done
        haz_general.update_realization(lt_rlz_id, len(sites))

    logs.LOG.debug('< done computing disaggregation')
Esempio n. 25
0
 def test_sa(self):
     hc_im_type, sa_period, sa_damping = models.parse_imt("SA(0.1)")
     self.assertEqual("SA", hc_im_type)
     self.assertEqual(0.1, sa_period)
     self.assertEqual(models.DEFAULT_SA_DAMPING, sa_damping)