Example #1
0
class ScenarioRiskCalculator(base.RiskCalculator):
    """
    Run a scenario risk calculation
    """
    core_func = scenario_risk
    epsilon_matrix = datastore.persistent_attribute('epsilon_matrix')
    losses_by_key = datastore.persistent_attribute('losses_by_key')
    gmf_by_trt_gsim = datastore.persistent_attribute('gmf_by_trt_gsim')
    pre_calculator = 'scenario'
    is_stochastic = True

    def pre_execute(self):
        """
        Compute the GMFs, build the epsilons, the riskinputs, and a dictionary
        with the unit of measure, used in the export phase.
        """
        if 'gmfs' in self.oqparam.inputs:
            self.pre_calculator = None
        base.RiskCalculator.pre_execute(self)

        logging.info('Building the epsilons')
        eps_dict = self.make_eps_dict(
            self.oqparam.number_of_ground_motion_fields)
        self.epsilon_matrix = numpy.array(
            [eps_dict[a['asset_ref']] for a in self.assetcol])
        self.riskinputs = self.build_riskinputs(base.get_gmfs(self), eps_dict)

    def post_execute(self, result):
        """
        Export the loss curves and the aggregated losses in CSV format
        """
        self.losses_by_key = result
Example #2
0
class ClassicalDamageCalculator(classical_risk.ClassicalRiskCalculator):
    """
    Scenario damage calculator
    """
    core_task = classical_damage
    damages = datastore.persistent_attribute('damages-rlzs')

    def check_poes(self, curves_by_trt_gsim):
        """
        Raise an error if one PoE = 1, since it would produce a log(0) in
        :class:`openquake.risklib.scientific.annual_frequency_of_exceedence`
        """
        for key, curves in curves_by_trt_gsim.items():
            for imt in self.oqparam.imtls:
                for sid, poes in enumerate(curves[imt]):
                    if (poes == 1).any():
                        raise ValueError('Found a PoE=1 for site_id=%d, %s'
                                         % (sid, imt))

    def post_execute(self, result):
        """
        Export the result in CSV format.

        :param result:
            a dictionary asset -> fractions per damage state
        """
        damages_dt = numpy.dtype([(ds, numpy.float32)
                                  for ds in self.riskmodel.damage_states])
        damages = numpy.zeros((self.N, self.R), damages_dt)
        for r in result:
            for aid, fractions in result[r].items():
                damages[aid, r] = tuple(fractions)
        self.damages = damages
Example #3
0
class ClassicalRiskCalculator(base.RiskCalculator):
    """
    Classical Risk calculator
    """
    pre_calculator = 'classical'
    avg_losses = datastore.persistent_attribute('avg_losses/rlzs')
    core_func = classical_risk

    def pre_execute(self):
        """
        Associate the assets to the sites and build the riskinputs.
        """
        super(ClassicalRiskCalculator, self).pre_execute()
        hazard_from_csv = 'hazard_curves' in self.oqparam.inputs
        if hazard_from_csv:
            self.sitecol, hcurves_by_imt = readinput.get_sitecol_hcurves(
                self.oqparam)
            self.sitecol, self.assets_by_site = \
                self.assoc_assets_sites(self.sitecol)

        logging.info('Preparing the risk input')
        curves_by_trt_gsim = {}
        for dset in self.datastore['curves_by_sm'].values():
            for key, curves in dset.items():
                trt_id, gsim = key.split('-')
                curves_by_trt_gsim[int(trt_id), gsim] = curves.value
        self.riskinputs = self.build_riskinputs(curves_by_trt_gsim)

    def post_execute(self, result):
        """
        Save the losses in a compact form.

        :param result:
            a dictionary rlz_idx -> (loss_type, asset_id) -> (avg, ins)
        """
        fields = []
        for loss_type in self.riskmodel.get_loss_types():
            fields.append(('avg_loss~%s' % loss_type, float))
            fields.append(('ins_loss~%s' % loss_type, float))
        avg_loss_dt = numpy.dtype(fields)
        num_rlzs = len(self.rlzs_assoc.realizations)
        assets = riskinput.sorted_assets(self.assets_by_site)
        self.asset_no_by_id = {a.id: no for no, a in enumerate(assets)}
        avg_losses = numpy.zeros((num_rlzs, len(self.asset_no_by_id)),
                                 avg_loss_dt)

        for rlz_no in result:
            losses_by_lt_asset = result[rlz_no]
            by_asset = operator.itemgetter(1)
            for asset, keys in general.groupby(losses_by_lt_asset,
                                               by_asset).items():
                asset_no = self.asset_no_by_id[asset]
                losses = []
                for (loss_type, _) in keys:
                    losses.extend(losses_by_lt_asset[loss_type, asset])
                avg_losses[rlz_no][asset_no] = tuple(losses)

        self.avg_losses = avg_losses
Example #4
0
class ScenarioRiskCalculator(base.RiskCalculator):
    """
    Run a scenario risk calculation
    """
    core_func = scenario_risk
    epsilon_matrix = datastore.persistent_attribute('epsilon_matrix')
    pre_calculator = 'scenario'
    is_stochastic = True

    def pre_execute(self):
        """
        Compute the GMFs, build the epsilons, the riskinputs, and a dictionary
        with the unit of measure, used in the export phase.
        """
        if 'gmfs' in self.oqparam.inputs:
            self.pre_calculator = None
        base.RiskCalculator.pre_execute(self)
        logging.info('Building the epsilons')
        self.epsilon_matrix = self.make_eps(
            self.oqparam.number_of_ground_motion_fields)
        sitecol, gmfs = base.get_gmfs(self)
        self.riskinputs = self.build_riskinputs(gmfs, self.epsilon_matrix)

    def post_execute(self, result):
        """
        Compute stats for the aggregated distributions and save
        the results on the datastore.
        """
        ltypes = self.riskmodel.loss_types
        multi_stat_dt = numpy.dtype([(lt, stat_dt) for lt in ltypes])
        with self.monitor('saving outputs', autoflush=True):
            R = len(self.rlzs_assoc.realizations)
            N = len(self.assetcol)

            # agg losses
            agglosses = numpy.zeros(R, multi_stat_dt)
            mean, std = scientific.mean_std(result['agg'])
            for l, lt in enumerate(ltypes):
                agg = agglosses[lt]
                agg['mean'] = mean[l, :, 0]
                agg['stddev'] = std[l, :, 0]
                agg['mean_ins'] = mean[l, :, 1]
                agg['stddev_ins'] = std[l, :, 1]

            # average losses
            avglosses = numpy.zeros((N, R), multi_stat_dt)
            for (l, r, aid, stat) in result['avg']:
                avglosses[ltypes[l]][aid, r] = stat
            self.datastore['avglosses-rlzs'] = avglosses
            self.datastore['agglosses-rlzs'] = agglosses
Example #5
0
class ScenarioRiskCalculator(base.RiskCalculator):
    """
    Run a scenario risk calculation
    """
    core_func = scenario_risk
    epsilon_matrix = datastore.persistent_attribute('epsilon_matrix')
    pre_calculator = 'scenario'
    is_stochastic = True

    def pre_execute(self):
        """
        Compute the GMFs, build the epsilons, the riskinputs, and a dictionary
        with the unit of measure, used in the export phase.
        """
        if 'gmfs' in self.oqparam.inputs:
            self.pre_calculator = None
        base.RiskCalculator.pre_execute(self)
        logging.info('Building the epsilons')
        eps_dict = self.make_eps_dict(
            self.oqparam.number_of_ground_motion_fields)
        self.epsilon_matrix = numpy.array(
            [eps_dict[a['asset_ref']] for a in self.assetcol])
        self.riskinputs = self.build_riskinputs(self.gmfs, eps_dict)

    def post_execute(self, result):
        """
        Compute stats for the aggregated distributions and save
        the results on the datastore.
        """
        with self.monitor('saving outputs', autoflush=True):
            L = len(self.riskmodel.loss_types)
            R = len(self.rlzs_assoc.realizations)
            N = len(self.assetcol)
            arr = dict(avg=numpy.zeros((N, L, R), stat_dt),
                       agg=numpy.zeros((L, R), stat_dt))
            for (l, r), res in result.items():
                for keytype, key in res:
                    if keytype == 'agg':
                        agg_losses = arr[keytype][l, r]
                        mean, std = scientific.mean_std(res[keytype, key])
                        if key == 0:
                            agg_losses['mean'] = mean
                            agg_losses['stddev'] = std
                        else:
                            agg_losses['mean_ins'] = mean
                            agg_losses['stddev_ins'] = std
                    else:
                        arr[keytype][key, l, r] = res[keytype, key]
            self.datastore['avglosses'] = arr['avg']
            self.datastore['agglosses'] = arr['agg']
Example #6
0
class ScenarioDamageCalculator(base.RiskCalculator):
    """
    Scenario damage calculator
    """
    pre_calculator = 'scenario'
    core_func = scenario_damage
    damages_by_key = datastore.persistent_attribute('damages_by_key')
    is_stochastic = True

    def pre_execute(self):
        if 'gmfs' in self.oqparam.inputs:
            self.pre_calculator = None
        base.RiskCalculator.pre_execute(self)
        self.riskinputs = self.build_riskinputs(base.get_gmfs(self))

    def post_execute(self, result):
        self.damages_by_key = result
Example #7
0
class ClassicalDamageCalculator(base.RiskCalculator):
    """
    Scenario damage calculator
    """
    core_func = classical_damage
    damages_by_rlz = datastore.persistent_attribute('damages_by_rlz')

    def pre_execute(self):
        """
        Read the curves and build the riskinputs.
        """
        super(ClassicalDamageCalculator, self).pre_execute()

        logging.info('Reading hazard curves from CSV')
        sites, hcurves_by_imt = readinput.get_sitecol_hcurves(self.oqparam)

        with self.monitor('assoc_assets_sites'):
            sitecol, assets_by_site = self.assoc_assets_sites(sites)
        num_assets = sum(len(assets) for assets in assets_by_site)
        num_sites = len(sitecol)
        logging.info('Associated %d assets to %d sites', num_assets, num_sites)

        logging.info('Preparing the risk input')
        self.riskinputs = self.build_riskinputs({
            (0, 'FromFile'): hcurves_by_imt
        })
        fake_rlz = logictree.Realization(value=('FromFile', ),
                                         weight=1,
                                         lt_path=('', ),
                                         ordinal=0,
                                         lt_uid=('*', ))
        self.rlzs_assoc = logictree.RlzsAssoc([fake_rlz])

    def post_execute(self, result):
        """
        Export the result in CSV format.

        :param result:
            a dictionary asset -> fractions per damage state
        """
        self.damages_by_rlz = result
Example #8
0
class ClassicalDamageCalculator(classical_risk.ClassicalRiskCalculator):
    """
    Scenario damage calculator
    """
    core_func = classical_damage
    damages = datastore.persistent_attribute('damages-rlzs')

    def post_execute(self, result):
        """
        Export the result in CSV format.

        :param result:
            a dictionary asset -> fractions per damage state
        """
        damages_dt = numpy.dtype([(ds, numpy.float32)
                                  for ds in self.riskmodel.damage_states])
        damages = numpy.zeros((self.N, self.R), damages_dt)
        for r in result:
            for aid, fractions in result[r].items():
                damages[aid, r] = tuple(fractions)
        self.damages = damages
Example #9
0
class EventBasedRuptureCalculator(base.HazardCalculator):
    """
    Event based PSHA calculator generating the ruptures only
    """
    core_func = compute_ruptures
    tags = datastore.persistent_attribute('tags')
    sescollection = datastore.persistent_attribute('sescollection')
    num_ruptures = datastore.persistent_attribute('num_ruptures')
    counts_per_rlz = datastore.persistent_attribute('counts_per_rlz')
    is_stochastic = True

    def pre_execute(self):
        """
        Set a seed on each source
        """
        super(EventBasedRuptureCalculator, self).pre_execute()
        rnd = random.Random()
        rnd.seed(self.oqparam.random_seed)
        for src in self.csm.get_sources():
            src.seed = rnd.randint(0, MAX_INT)

    def execute(self):
        """
        Run in parallel `core_func(sources, sitecol, info, monitor)`, by
        parallelizing on the sources according to their weight and
        tectonic region type.
        """
        monitor = self.monitor(self.core_func.__name__)
        monitor.oqparam = self.oqparam
        sources = self.csm.get_sources()
        ruptures_by_trt = parallel.apply_reduce(
            self.core_func.__func__,
            (sources, self.sitecol, self.rlzs_assoc.csm_info, monitor),
            concurrent_tasks=self.oqparam.concurrent_tasks,
            weight=operator.attrgetter('weight'),
            key=operator.attrgetter('trt_model_id'))

        store_source_chunks(self.datastore)
        logging.info('Generated %d SESRuptures',
                     sum(len(v) for v in ruptures_by_trt.values()))

        self.rlzs_assoc = self.csm.get_rlzs_assoc(
            lambda trt: len(ruptures_by_trt.get(trt.id, [])))

        return ruptures_by_trt

    def post_execute(self, result):
        """
        Save the SES collection and the array counts_per_rlz
        """
        nc = self.rlzs_assoc.csm_info.num_collections
        sescollection = numpy.array([{} for col_id in range(nc)])
        tags = []
        ordinal = 0
        for trt_id in sorted(result):
            for sr in sorted(result[trt_id]):
                sr.ordinal = ordinal
                ordinal += 1
                sescollection[sr.col_id][sr.tag] = sr
                tags.append(sr.tag)
                if len(sr.tag) > 100:
                    logging.error(
                        'The tag %s is long %d characters, it will be '
                        'truncated to 100 characters in the /tags array',
                        sr.tag, len(sr.tag))
        logging.info('Saving the SES collection')
        with self.monitor('saving ruptures', autoflush=True):
            self.tags = numpy.array(tags, (bytes, 100))
            self.sescollection = sescollection
        with self.monitor('counts_per_rlz'):
            self.num_ruptures = numpy.array(list(map(len, sescollection)))
            self.counts_per_rlz = counts_per_rlz(len(self.sitecol),
                                                 self.rlzs_assoc,
                                                 sescollection)
            self.datastore['counts_per_rlz'].attrs[
                'gmfs_nbytes'] = get_gmfs_nbytes(len(self.sitecol),
                                                 len(self.oqparam.imtls),
                                                 self.rlzs_assoc,
                                                 sescollection)
Example #10
0
class ClassicalRiskCalculator(base.RiskCalculator):
    """
    Classical Risk calculator
    """
    pre_calculator = 'classical'
    avg_losses = datastore.persistent_attribute('avg_losses-rlzs')
    core_func = classical_risk

    def pre_execute(self):
        """
        Associate the assets to the sites and build the riskinputs.
        """
        if 'hazard_curves' in self.oqparam.inputs:  # read hazard from file
            haz_sitecol, haz_curves = readinput.get_hcurves(self.oqparam)
            self.read_exposure()  # define .assets_by_site
            self.load_riskmodel()
            self.sitecol, self.assets_by_site = self.assoc_assets_sites(
                haz_sitecol)
            curves_by_trt_gsim = {(0, 'FromFile'): haz_curves}
            self.rlzs_assoc = logictree.trivial_rlzs_assoc()
            self.save_mesh()
        else:  # compute hazard
            super(ClassicalRiskCalculator, self).pre_execute()
            logging.info('Preparing the risk input')
            curves_by_trt_gsim = {}
            for dset in self.datastore['curves_by_sm'].values():
                for key, curves in dset.items():
                    trt_id, gsim = key.split('-')
                    curves_by_trt_gsim[int(trt_id), gsim] = curves.value
        self.assetcol = riskinput.build_asset_collection(
            self.assets_by_site, self.oqparam.time_event)
        self.riskinputs = self.build_riskinputs(curves_by_trt_gsim)
        self.monitor.oqparam = self.oqparam

        self.N = sum(len(assets) for assets in self.assets_by_site)
        self.L = len(self.riskmodel.loss_types)
        self.R = len(self.rlzs_assoc.realizations)
        self.I = self.oqparam.insured_losses
        self.Q1 = len(self.oqparam.quantile_loss_curves) + 1

    def post_execute(self, result):
        """
        Save the losses in a compact form.
        """
        self.loss_curve_dt, self.loss_maps_dt = (
            self.riskmodel.build_loss_dtypes(
                self.oqparam.conditional_loss_poes, self.I))

        self.save_loss_curves(result)
        if self.oqparam.conditional_loss_poes:
            self.save_loss_maps(result)

    def save_loss_curves(self, result):
        """
        Saving loss curves in the datastore.

        :param result: aggregated result of the task classical_risk
        """
        ltypes = self.riskmodel.loss_types
        loss_curves = numpy.zeros((self.N, self.R), self.loss_curve_dt)
        for l, r, aid, lcurve in result['loss_curves']:
            loss_curves_lt = loss_curves[ltypes[l]]
            for i, name in enumerate(loss_curves_lt.dtype.names):
                loss_curves_lt[name][aid, r] = lcurve[i]
        self.datastore['loss_curves-rlzs'] = loss_curves

        # loss curves stats
        if self.R > 1:
            stat_curves = numpy.zeros((self.Q1, self.N), self.loss_curve_dt)
            for l, aid, statcurve in result['stat_curves']:
                stat_curves_lt = stat_curves[ltypes[l]]
                for name in stat_curves_lt.dtype.names:
                    for s in range(self.Q1):
                        stat_curves_lt[name][s, aid] = statcurve[name][s]
            self.datastore['loss_curves-stats'] = stat_curves

    def save_loss_maps(self, result):
        """
        Saving loss maps in the datastore.

        :param result: aggregated result of the task classical_risk
        """
        ltypes = self.riskmodel.loss_types
        loss_maps = numpy.zeros((self.N, self.R), self.loss_maps_dt)
        for l, r, aid, lmaps in result['loss_maps']:
            loss_maps_lt = loss_maps[ltypes[l]]
            for i, name in enumerate(loss_maps_lt.dtype.names):
                loss_maps_lt[name][aid, r] = lmaps[i]
        self.datastore['loss_maps-rlzs'] = loss_maps

        # loss maps stats
        if self.R > 1:
            stat_maps = numpy.zeros((self.Q1, self.N), self.loss_maps_dt)
            for l, aid, statmaps in result['stat_maps']:
                statmaps_lt = stat_maps[ltypes[l]]
                for name in statmaps_lt.dtype.names:
                    for s in range(self.Q1):
                        statmaps_lt[name][s, aid] = statmaps[name][s]
            self.datastore['loss_maps-stats'] = stat_maps
Example #11
0
class EventBasedRiskCalculator(base.RiskCalculator):
    """
    Event based PSHA calculator generating the event loss table and
    fixed ratios loss curves.
    """
    pre_calculator = 'event_based'
    core_func = event_based_risk

    epsilon_matrix = datastore.persistent_attribute('epsilon_matrix')
    spec_indices = datastore.persistent_attribute('spec_indices')
    is_stochastic = True

    def pre_execute(self):
        """
        Read the precomputed ruptures (or compute them on the fly) and
        prepare some datasets in the datastore.
        """
        super(EventBasedRiskCalculator, self).pre_execute()
        if not self.riskmodel:  # there is no riskmodel, exit early
            self.execute = lambda: None
            self.post_execute = lambda result: None
            return
        oq = self.oqparam
        if self.riskmodel.covs:
            epsilon_sampling = oq.epsilon_sampling
        else:
            epsilon_sampling = 1  # only one ignored epsilon
        correl_model = readinput.get_correl_model(oq)
        gsims_by_col = self.rlzs_assoc.get_gsims_by_col()
        assets_by_site = self.assets_by_site
        # the following is needed to set the asset idx attribute
        self.assetcol = riskinput.build_asset_collection(
            assets_by_site, oq.time_event)
        self.spec_indices = numpy.array(
            [a['asset_ref'] in oq.specific_assets for a in self.assetcol])

        logging.info('Populating the risk inputs')
        rup_by_tag = sum(self.datastore['sescollection'], AccumDict())
        all_ruptures = [rup_by_tag[tag] for tag in sorted(rup_by_tag)]
        for i, rup in enumerate(all_ruptures):
            rup.ordinal = i
        num_samples = min(len(all_ruptures), epsilon_sampling)
        self.epsilon_matrix = eps = riskinput.make_eps(assets_by_site,
                                                       num_samples,
                                                       oq.master_seed,
                                                       oq.asset_correlation)
        logging.info('Generated %d epsilons', num_samples * len(eps))
        self.riskinputs = list(
            self.riskmodel.build_inputs_from_ruptures(
                self.sitecol.complete, all_ruptures, gsims_by_col,
                oq.truncation_level, correl_model, eps, oq.concurrent_tasks
                or 1))
        logging.info('Built %d risk inputs', len(self.riskinputs))

        # preparing empty datasets
        loss_types = self.riskmodel.loss_types
        self.L = len(loss_types)
        self.R = len(self.rlzs_assoc.realizations)
        self.outs = OUTPUTS
        self.datasets = {}
        # ugly: attaching an attribute needed in the task function
        self.monitor.num_outputs = len(self.outs)
        self.monitor.num_assets = self.count_assets()
        for o, out in enumerate(self.outs):
            self.datastore.hdf5.create_group(out)
            for l, loss_type in enumerate(loss_types):
                for r, rlz in enumerate(self.rlzs_assoc.realizations):
                    key = '/%s/%s' % (loss_type, rlz.uid)
                    if o == AGGLOSS:  # loss tables
                        dset = self.datastore.create_dset(out + key, elt_dt)
                    elif o == SPECLOSS:  # specific losses
                        dset = self.datastore.create_dset(out + key, ela_dt)
                    self.datasets[o, l, r] = dset

    def execute(self):
        """
        Run the event_based_risk calculator and aggregate the results
        """
        return apply_reduce(self.core_func.__func__,
                            (self.riskinputs, self.riskmodel, self.rlzs_assoc,
                             self.assets_by_site, self.epsilon_matrix,
                             self.oqparam.specific_assets, self.monitor),
                            concurrent_tasks=self.oqparam.concurrent_tasks,
                            agg=self.agg,
                            acc=cube(self.monitor.num_outputs, self.L, self.R,
                                     list),
                            weight=operator.attrgetter('weight'),
                            key=operator.attrgetter('col_id'))

    def agg(self, acc, result):
        """
        Aggregate list of arrays in longer lists.

        :param acc: accumulator array of shape (O, L, R)
        :param result: a numpy array of shape (O, L, R)
        """
        for idx, arrays in numpy.ndenumerate(result):
            # TODO: special case for avg_losses, they can be summed directly
            if idx[0] == AVGLOSS:  # arrays has only 1 element
                acc[idx] = [sum(acc[idx] + arrays)]
            else:
                acc[idx].extend(arrays)
        return acc

    def post_execute(self, result):
        """
        Save the event loss table in the datastore.

        :param result:
            a numpy array of shape (O, L, R) containing lists of arrays
        """
        insured_losses = self.oqparam.insured_losses
        ses_ratio = self.oqparam.ses_ratio
        saved = {out: 0 for out in self.outs}
        N = len(self.assetcol)
        R = len(self.rlzs_assoc.realizations)
        ltypes = self.riskmodel.loss_types

        # average losses
        multi_avg_dt = numpy.dtype([(lt, (F32, 2)) for lt in ltypes])
        avg_losses = numpy.zeros((N, R), multi_avg_dt)

        # loss curves
        multi_lr_dt = numpy.dtype([
            (ltype, (F32, cbuilder.curve_resolution))
            for ltype, cbuilder in zip(ltypes, self.riskmodel.curve_builders)
        ])
        rcurves = numpy.zeros((N, R), multi_lr_dt)
        icurves = numpy.zeros((N, R), multi_lr_dt)

        with self.monitor('saving loss table', autoflush=True,
                          measuremem=True):
            for (o, l, r), data in numpy.ndenumerate(result):
                if not data:  # empty list
                    continue
                elif o == IC and not insured_losses:  # no insured curves
                    continue
                lt = self.riskmodel.loss_types[l]
                cb = self.riskmodel.curve_builders[l]
                if o in (AGGLOSS, SPECLOSS):  # data is a list of arrays
                    losses = numpy.concatenate(data)
                    self.datasets[o, l, r].extend(losses)
                    saved[self.outs[o]] += losses.nbytes
                elif o == AVGLOSS:  # average losses
                    avg_losses_lt = avg_losses[lt]
                    asset_values = self.assetcol[lt]
                    [avgloss] = data
                    for i, avalue in enumerate(asset_values):
                        avg_losses_lt[i, r] = tuple(avgloss[i] * avalue)
                elif cb.user_provided:  # risk curves
                    # data is a list of dicts asset idx -> counts
                    poes = cb.build_poes(N, data, ses_ratio)
                    if o == RC:
                        rcurves[lt][:, r] = poes
                    elif insured_losses:
                        icurves[lt][:, r] = poes
                    saved[self.outs[o]] += poes.nbytes
                self.datastore.hdf5.flush()

        self.datastore['avg_losses-rlzs'] = avg_losses
        saved['avg_losses-rlzs'] = avg_losses.nbytes
        self.datastore['rcurves-rlzs'] = rcurves
        if insured_losses:
            self.datastore['icurves-rlzs'] = icurves
        self.datastore.hdf5.flush()

        for out in self.outs:
            nbytes = saved[out]
            if nbytes:
                self.datastore[out].attrs['nbytes'] = nbytes
                logging.info('Saved %s in %s', humansize(nbytes), out)
            else:  # remove empty outputs
                del self.datastore[out]

        if self.oqparam.specific_assets:
            self.build_specific_loss_curves(
                self.datastore['specific-losses-rlzs'])

        rlzs = self.rlzs_assoc.realizations
        if len(rlzs) > 1:
            self.compute_store_stats(rlzs, '')  # generic
            self.compute_store_stats(rlzs, '_specific')

        if (self.oqparam.conditional_loss_poes
                and 'rcurves-rlzs' in self.datastore):
            self.build_loss_maps('rcurves-rlzs', 'rmaps-rlzs')
        if (self.oqparam.conditional_loss_poes
                and 'icurves-rlzs' in self.datastore):
            self.build_loss_maps('icurves-rlzs', 'imaps-rlzs')

    def build_specific_loss_curves(self, group, kind='loss'):
        ses_ratio = self.oqparam.ses_ratio
        assetcol = self.assetcol[self.spec_indices]
        for cb in self.riskmodel.curve_builders:
            for rlz, dset in group[cb.loss_type].items():
                losses_by_aid = collections.defaultdict(list)
                for ela in dset.value:
                    losses_by_aid[ela['ass_id']].append(ela[kind])
                curves = cb.build_loss_curves(assetcol, losses_by_aid,
                                              ses_ratio)
                key = 'specific-loss_curves-rlzs/%s/%s' % (cb.loss_type, rlz)
                self.datastore[key] = curves

    def build_loss_maps(self, curves_key, maps_key):
        """
        Build loss maps from the loss curves
        """
        oq = self.oqparam
        rlzs = self.datastore['rlzs_assoc'].realizations
        curves = self.datastore[curves_key].value
        N = len(self.assetcol)
        R = len(rlzs)
        P = len(oq.conditional_loss_poes)
        loss_map_dt = numpy.dtype([(lt, (F32, P))
                                   for lt in self.riskmodel.loss_types])
        maps = numpy.zeros((N, R), loss_map_dt)
        for cb in self.riskmodel.curve_builders:
            asset_values = self.assetcol[cb.loss_type]
            curves_lt = curves[cb.loss_type]
            maps_lt = maps[cb.loss_type]
            for rlz in rlzs:
                loss_maps = scientific.calc_loss_maps(
                    oq.conditional_loss_poes, asset_values, cb.ratios,
                    curves_lt[:, rlz.ordinal])
                for i in range(N):
                    # NB: it does not work without the loop, there is a
                    # ValueError: could not broadcast input array from shape
                    # (N,1) into shape (N)
                    maps_lt[i, rlz.ordinal] = loss_maps[i]
        self.datastore[maps_key] = maps

    # ################### methods to compute statistics  #################### #

    def _collect_all_data(self):
        # return a list of list of outputs
        if 'rcurves-rlzs' not in self.datastore:
            return []
        all_data = []
        assets = self.assetcol['asset_ref']
        rlzs = self.rlzs_assoc.realizations
        avg_losses = self.datastore['avg_losses-rlzs'].value
        r_curves = self.datastore['rcurves-rlzs'].value
        insured_losses = self.oqparam.insured_losses
        i_curves = (self.datastore['icurves-rlzs'].value
                    if insured_losses else None)
        for loss_type, cbuilder in zip(self.riskmodel.loss_types,
                                       self.riskmodel.curve_builders):
            avglosses = avg_losses[loss_type]
            rcurves = r_curves[loss_type]
            asset_values = self.assetcol[loss_type]
            data = []
            for rlz in rlzs:
                average_losses = avglosses[:, rlz.ordinal]
                out = scientific.Output(
                    assets,
                    loss_type,
                    rlz.ordinal,
                    rlz.weight,
                    loss_curves=old_loss_curves(asset_values, rcurves,
                                                rlz.ordinal, cbuilder.ratios),
                    insured_curves=old_loss_curves(
                        asset_values, i_curves[loss_type], rlz.ordinal,
                        cbuilder.ratios) if i_curves else None,
                    average_losses=average_losses[:, 0],
                    average_insured_losses=average_losses[:, 1])
                data.append(out)
            all_data.append(data)
        return all_data

    def _collect_specific_data(self):
        # return a list of list of outputs
        if not self.oqparam.specific_assets:
            return []

        specific_assets = set(self.oqparam.specific_assets)
        assetcol = self.assetcol
        specific_ids = []
        for i, a in enumerate(self.assetcol):
            if a['asset_ref'] in specific_assets:
                specific_ids.append(i)

        assets = assetcol['asset_ref']
        rlzs = self.rlzs_assoc.realizations
        specific_data = []
        avglosses = self.datastore['avg_losses-rlzs'][specific_ids]
        for loss_type in self.riskmodel.loss_types:
            group = self.datastore['/specific-loss_curves-rlzs/%s' % loss_type]
            data = []
            avglosses_lt = avglosses[loss_type]
            for rlz, dataset in zip(rlzs, group.values()):
                average_losses = avglosses_lt[:, rlz.ordinal]
                lcs = dataset.value
                losses_poes = numpy.array(  # -> shape (N, 2, C)
                    [lcs['losses'], lcs['poes']]).transpose(1, 0, 2)
                out = scientific.Output(
                    assets,
                    loss_type,
                    rlz.ordinal,
                    rlz.weight,
                    loss_curves=losses_poes,
                    insured_curves=None,  # FIXME: why None?
                    average_losses=average_losses[:, 0],
                    average_insured_losses=average_losses[:, 1])
                data.append(out)
            specific_data.append(data)
        return specific_data

    def compute_store_stats(self, rlzs, kind):
        """
        Compute and store the statistical outputs
        """
        oq = self.oqparam
        builder = scientific.StatsBuilder(oq.quantile_loss_curves,
                                          oq.conditional_loss_poes, [],
                                          scientific.normalize_curves_eb)

        if kind == '_specific':
            all_stats = [
                builder.build(data, prefix='specific-')
                for data in self._collect_specific_data()
            ]
        else:
            all_stats = map(builder.build, self._collect_all_data())
        for stat in all_stats:
            # there is one stat for each loss_type
            curves, ins_curves, maps = scientific.get_stat_curves(stat)
            for i, path in enumerate(stat.paths):
                # there are paths like
                # %s-stats/structural/mean
                # %s-stats/structural/quantile-0.1
                # ...
                self.datastore[path % 'loss_curves'] = curves[i]
                if oq.insured_losses:
                    self.datastore[path % 'ins_curves'] = ins_curves[i]
                if oq.conditional_loss_poes:
                    self.datastore[path % 'loss_maps'] = maps[i]

        stats = scientific.SimpleStats(rlzs, oq.quantile_loss_curves)
        nbytes = stats.compute('avg_losses-rlzs', self.datastore)
        self.datastore['avg_losses-stats'].attrs['nbytes'] = nbytes
        self.datastore.hdf5.flush()
Example #12
0
class ClassicalCalculator(base.HazardCalculator):
    """
    Classical PSHA calculator
    """
    core_task = classical
    source_info = datastore.persistent_attribute('source_info')

    def agg_dicts(self, acc, val):
        """
        Aggregate dictionaries of hazard curves by updating the accumulator.

        :param acc: accumulator dictionary
        :param val: a nested dictionary trt_id -> ProbabilityMap
        """
        with self.monitor('aggregate curves', autoflush=True):
            if hasattr(val, 'calc_times'):
                acc.calc_times.extend(val.calc_times)
            if hasattr(val, 'eff_ruptures'):
                acc.eff_ruptures += val.eff_ruptures
            for bb in getattr(val, 'bbs', []):
                acc.bb_dict[bb.lt_model_id, bb.site_id].update_bb(bb)
            acc |= val
        self.datastore.flush()
        return acc

    def count_eff_ruptures(self, result_dict, trt_model):
        """
        Returns the number of ruptures in the trt_model (after filtering)
        or 0 if the trt_model has been filtered away.

        :param result_dict: a dictionary with keys (trt_id, gsim)
        :param trt_model: a TrtModel instance
        """
        return (result_dict.eff_ruptures.get(trt_model.id, 0) / self.num_tiles)

    def zerodict(self):
        """
        Initial accumulator, an empty ProbabilityMap
        """
        zd = ProbabilityMap()
        zd.calc_times = []
        zd.eff_ruptures = AccumDict()  # trt_id -> eff_ruptures
        zd.bb_dict = {(smodel.ordinal, sid): BoundingBox(smodel.ordinal, sid)
                      for sid in self.sitecol.sids
                      for smodel in self.csm.source_models
                      } if self.oqparam.poes_disagg else {}
        return zd

    def execute(self):
        """
        Run in parallel `core_task(sources, sitecol, monitor)`, by
        parallelizing on the sources according to their weight and
        tectonic region type.
        """
        monitor = self.monitor.new(self.core_task.__name__)
        monitor.oqparam = self.oqparam
        curves_by_trt_id = self.taskman.reduce(self.agg_dicts, self.zerodict())
        self.save_data_transfer(self.taskman)
        with self.monitor('store source_info', autoflush=True):
            self.store_source_info(curves_by_trt_id)
        self.rlzs_assoc = self.csm.info.get_rlzs_assoc(
            partial(self.count_eff_ruptures, curves_by_trt_id))
        self.datastore['csm_info'] = self.csm.info
        return curves_by_trt_id

    def store_source_info(self, curves_by_trt_id):
        # store the information about received data
        received = self.taskman.received
        if received:
            tname = self.taskman.name
            self.datastore.save(
                'job_info', {
                    tname + '_max_received_per_task': max(received),
                    tname + '_tot_received': sum(received),
                    tname + '_num_tasks': len(received)
                })
        # then save the calculation times per each source
        calc_times = getattr(curves_by_trt_id, 'calc_times', [])
        if calc_times:
            sources = self.csm.get_sources()
            info_dict = {(rec['trt_model_id'], rec['source_id']): rec
                         for rec in self.source_info}
            for src_idx, dt in calc_times:
                src = sources[src_idx]
                info = info_dict[src.trt_model_id, src.source_id]
                info['calc_time'] += dt
            self.source_info = numpy.array(
                sorted(info_dict.values(),
                       key=operator.itemgetter(7),
                       reverse=True), source.source_info_dt)
        self.datastore.hdf5.flush()

    def post_execute(self, curves_by_trt_id):
        """
        Collect the hazard curves by realization and export them.

        :param curves_by_trt_id:
            a dictionary trt_id -> hazard curves
        """
        nsites = len(self.sitecol)
        imtls = self.oqparam.imtls
        curves_by_trt_gsim = {}

        with self.monitor('saving probability maps', autoflush=True):
            for trt_id in curves_by_trt_id:
                key = 'poes/%04d' % trt_id
                self.datastore[key] = curves_by_trt_id[trt_id]
                self.datastore.set_attrs(key,
                                         trt=self.csm.info.get_trt(trt_id))
                gsims = self.rlzs_assoc.gsims_by_trt_id[trt_id]
                for i, gsim in enumerate(gsims):
                    curves_by_trt_gsim[trt_id, gsim] = (
                        curves_by_trt_id[trt_id].extract(i))
            self.datastore.set_nbytes('poes')

        with self.monitor('combine curves_by_rlz', autoflush=True):
            curves_by_rlz = self.rlzs_assoc.combine_curves(curves_by_trt_gsim)

        self.save_curves({
            rlz: array_of_curves(curves, nsites, imtls)
            for rlz, curves in curves_by_rlz.items()
        })

    def save_curves(self, curves_by_rlz):
        """
        Save the dictionary curves_by_rlz
        """
        oq = self.oqparam
        rlzs = self.rlzs_assoc.realizations
        nsites = len(self.sitecol)
        if oq.individual_curves:
            with self.monitor('save curves_by_rlz', autoflush=True):
                for rlz, curves in curves_by_rlz.items():
                    self.store_curves('rlz-%03d' % rlz.ordinal, curves, rlz)

            if len(rlzs) == 1:  # cannot compute statistics
                [self.mean_curves] = curves_by_rlz.values()
                return

        with self.monitor('compute and save statistics', autoflush=True):
            weights = (None if oq.number_of_logic_tree_samples else
                       [rlz.weight for rlz in rlzs])

            # mean curves are always computed but stored only on request
            zc = zero_curves(nsites, oq.imtls)
            self.mean_curves = numpy.array(zc)
            for imt in oq.imtls:
                self.mean_curves[imt] = scientific.mean_curve(
                    [curves_by_rlz.get(rlz, zc)[imt] for rlz in rlzs], weights)

            self.quantile = {}
            for q in oq.quantile_hazard_curves:
                self.quantile[q] = qc = numpy.array(zc)
                for imt in oq.imtls:
                    curves = [curves_by_rlz[rlz][imt] for rlz in rlzs]
                    qc[imt] = scientific.quantile_curve(curves, q,
                                                        weights).reshape(
                                                            (nsites, -1))

            if oq.mean_hazard_curves:
                self.store_curves('mean', self.mean_curves)
            for q in self.quantile:
                self.store_curves('quantile-%s' % q, self.quantile[q])

    def hazard_maps(self, curves):
        """
        Compute the hazard maps associated to the curves
        """
        maps = zero_maps(len(self.sitecol), self.oqparam.imtls,
                         self.oqparam.poes)
        for imt in curves.dtype.fields:
            # build a matrix of size (N, P)
            data = calc.compute_hazard_maps(curves[imt],
                                            self.oqparam.imtls[imt],
                                            self.oqparam.poes)
            for poe, hmap in zip(self.oqparam.poes, data.T):
                maps['%s-%s' % (imt, poe)] = hmap
        return maps

    def store_curves(self, kind, curves, rlz=None):
        """
        Store all kind of curves, optionally computing maps and uhs curves.

        :param kind: the kind of curves to store
        :param curves: an array of N curves to store
        :param rlz: hazard realization, if any
        """
        oq = self.oqparam
        self._store('hcurves/' + kind, curves, rlz, nbytes=curves.nbytes)
        self.datastore['hcurves'].attrs['imtls'] = [
            (imt, len(imls)) for imt, imls in self.oqparam.imtls.items()
        ]
        if oq.hazard_maps or oq.uniform_hazard_spectra:
            # hmaps is a composite array of shape (N, P)
            hmaps = self.hazard_maps(curves)
            self._store('hmaps/' + kind,
                        hmaps,
                        rlz,
                        poes=oq.poes,
                        nbytes=hmaps.nbytes)

    def _store(self, name, curves, rlz, **kw):
        self.datastore.hdf5[name] = curves
        dset = self.datastore.hdf5[name]
        if rlz is not None:
            dset.attrs['uid'] = rlz.uid
        for k, v in kw.items():
            dset.attrs[k] = v
class EventBasedRiskCalculator(base.RiskCalculator):
    """
    Event based PSHA calculator generating the event loss table and
    fixed ratios loss curves.
    """
    pre_calculator = 'event_based_rupture'
    core_func = event_based_risk

    epsilon_matrix = datastore.persistent_attribute('epsilon_matrix')
    spec_indices = datastore.persistent_attribute('spec_indices')
    is_stochastic = True

    def pre_execute(self):
        """
        Read the precomputed ruptures (or compute them on the fly) and
        prepare some datasets in the datastore.
        """
        super(EventBasedRiskCalculator, self).pre_execute()
        if not self.riskmodel:  # there is no riskmodel, exit early
            self.execute = lambda: None
            self.post_execute = lambda result: None
            return
        oq = self.oqparam
        if self.riskmodel.covs:
            epsilon_sampling = oq.epsilon_sampling
        else:
            epsilon_sampling = 1  # only one ignored epsilon
        correl_model = readinput.get_correl_model(oq)
        gsims_by_col = self.rlzs_assoc.get_gsims_by_col()
        assets_by_site = self.assets_by_site
        # the following is needed to set the asset idx attribute
        self.assetcol = riskinput.build_asset_collection(
            assets_by_site, oq.time_event)
        self.spec_indices = numpy.array(
            [a['asset_ref'] in oq.specific_assets for a in self.assetcol])

        logging.info('Populating the risk inputs')
        rup_by_tag = sum(self.datastore['sescollection'], AccumDict())
        all_ruptures = [rup_by_tag[tag] for tag in sorted(rup_by_tag)]
        for i, rup in enumerate(all_ruptures):
            rup.ordinal = i
        num_samples = min(len(all_ruptures), epsilon_sampling)
        eps_dict = riskinput.make_eps_dict(assets_by_site, num_samples,
                                           oq.master_seed,
                                           oq.asset_correlation)
        logging.info('Generated %d epsilons', num_samples * len(eps_dict))
        self.epsilon_matrix = numpy.array(
            [eps_dict[a['asset_ref']] for a in self.assetcol])
        self.riskinputs = list(
            self.riskmodel.build_inputs_from_ruptures(
                self.sitecol.complete, all_ruptures, gsims_by_col,
                oq.truncation_level, correl_model, eps_dict,
                oq.concurrent_tasks or 1))
        logging.info('Built %d risk inputs', len(self.riskinputs))

        # preparing empty datasets
        loss_types = self.riskmodel.loss_types
        self.L = len(loss_types)
        self.R = len(self.rlzs_assoc.realizations)
        self.outs = OUTPUTS
        self.datasets = {}
        self.monitor.oqparam = self.oqparam
        # ugly: attaching an attribute needed in the task function
        self.monitor.num_outputs = len(self.outs)
        # attaching two other attributes used in riskinput.gen_outputs
        self.monitor.assets_by_site = self.assets_by_site
        self.monitor.eps_dict = eps_dict
        self.monitor.num_assets = N = self.count_assets()
        for o, out in enumerate(self.outs):
            self.datastore.hdf5.create_group(out)
            for l, loss_type in enumerate(loss_types):
                cb = self.riskmodel.curve_builders[l]
                C = len(cb.ratios)  # curve resolution
                for r, rlz in enumerate(self.rlzs_assoc.realizations):
                    key = '/%s/%s' % (loss_type, rlz.uid)
                    if o == AGGLOSS:  # loss tables
                        dset = self.datastore.create_dset(out + key, elt_dt)
                    elif o == AVGLOSS:  # average losses
                        dset = self.datastore.create_dset(
                            out + key, numpy.float32, (N, 2))
                    elif o == SPECLOSS:  # specific losses
                        dset = self.datastore.create_dset(out + key, ela_dt)
                    else:  # risk curves
                        if not C:
                            continue
                        dset = self.datastore.create_dset(
                            out + key, cb.lr_dt, N)
                    self.datasets[o, l, r] = dset
                if o == RC and C:
                    grp = self.datastore['%s/%s' % (out, loss_type)]
                    grp.attrs['loss_ratios'] = cb.ratios

    def execute(self):
        """
        Run the event_based_risk calculator and aggregate the results
        """
        return apply_reduce(
            self.core_func.__func__,
            (self.riskinputs, self.riskmodel, self.rlzs_assoc, self.monitor),
            concurrent_tasks=self.oqparam.concurrent_tasks,
            agg=self.agg,
            acc=cube(self.monitor.num_outputs, self.L, self.R, list),
            weight=operator.attrgetter('weight'),
            key=operator.attrgetter('col_id'))

    def agg(self, acc, result):
        """
        Aggregate list of arrays in longer lists.

        :param acc: accumulator array of shape (O, L, R)
        :param result: a numpy array of shape (O, L, R)
        """
        for idx, arrays in numpy.ndenumerate(result):
            # TODO: special case for avg_losses, they can be summed directly
            if idx[0] == AVGLOSS:  # arrays has only 1 element
                acc[idx] = [sum(acc[idx] + arrays)]
            else:
                acc[idx].extend(arrays)
        return acc

    def post_execute(self, result):
        """
        Save the event loss table in the datastore.

        :param result:
            a numpy array of shape (O, L, R) containing lists of arrays
        """
        ses_ratio = self.oqparam.ses_ratio
        saved = {out: 0 for out in self.outs}
        N = len(self.assetcol)
        with self.monitor('saving loss table', autoflush=True,
                          measuremem=True):
            for (o, l, r), data in numpy.ndenumerate(result):
                if not data:  # empty list
                    continue
                cb = self.riskmodel.curve_builders[l]
                if o in (AGGLOSS, SPECLOSS):  # data is a list of arrays
                    losses = numpy.concatenate(data)
                    self.datasets[o, l, r].extend(losses)
                    saved[self.outs[o]] += losses.nbytes
                elif o == AVGLOSS:  # average losses
                    lt = self.riskmodel.loss_types[l]
                    [avgloss] = data
                    avglosses = numpy.array([
                        avgloss[i] * asset[lt]
                        for i, asset in enumerate(self.assetcol)
                    ], numpy.float32)
                    self.datasets[o, l, r].dset[:] = avglosses
                    saved[self.outs[o]] += avglosses.nbytes
                elif cb.user_provided:  # risk curves
                    # data is a list of dicts asset idx -> counts
                    poes = cb.build_poes(N, data, ses_ratio)
                    self.datasets[o, l, r] = poes
                    saved[self.outs[o]] += poes.nbytes
                self.datastore.hdf5.flush()

        for out in self.outs:
            nbytes = saved[out]
            if nbytes:
                self.datastore[out].attrs['nbytes'] = nbytes
                logging.info('Saved %s in %s', humansize(nbytes), out)
            else:  # remove empty outputs
                del self.datastore[out]

        if self.oqparam.specific_assets:
            self.build_specific_loss_curves(
                self.datastore['specific-losses-rlzs'])

        rlzs = self.rlzs_assoc.realizations
        if len(rlzs) > 1:
            self.compute_store_stats(rlzs, '')  # generic
            self.compute_store_stats(rlzs, '_specific')

        # The following is commented on purpose:
        # if (self.oqparam.conditional_loss_poes and
        #         'rcurves-rlzs' in self.datastore):
        #     self.build_loss_maps()

    def clean_up(self):
        """
        Final checks and cleanup
        """
        if (self.oqparam.ground_motion_fields
                and 'gmf_by_trt_gsim' not in self.datastore):
            logging.warn(
                'Even if the flag `ground_motion_fields` was set the GMFs '
                'were not saved.\nYou should use the event_based hazard '
                'calculator to do that, not the risk one')
        super(EventBasedRiskCalculator, self).clean_up()

    def build_specific_loss_curves(self, group, kind='loss'):
        ses_ratio = self.oqparam.ses_ratio
        assetcol = self.assetcol[self.spec_indices]
        for loss_type, builder in zip(group, self.riskmodel.curve_builders):
            for rlz, dset in group[loss_type].items():
                losses_by_aid = collections.defaultdict(list)
                for ela in dset.value:
                    losses_by_aid[ela['ass_id']].append(ela[kind])
                curves = builder.build_loss_curves(assetcol, losses_by_aid,
                                                   ses_ratio)
                key = 'specific-loss_curves-rlzs/%s/%s' % (loss_type, rlz)
                self.datastore[key] = curves

    def build_loss_maps(self):
        """
        Build loss maps from the loss curves
        """
        oq = self.oqparam
        for loss_type, group in self.datastore['rcurves-rlzs'].items():
            asset_values = self.assetcol[loss_type]
            ratios = group.attrs['loss_ratios']
            for rlz, poe_matrix in group.items():
                maps = scientific.calc_loss_maps(oq.conditional_loss_poes,
                                                 asset_values, ratios,
                                                 poe_matrix)
                key = 'lmaps-rlzs/%s/%s' % (loss_type, rlz)
                self.datastore[key] = maps

    # ################### methods to compute statistics  #################### #

    def build_stats(self, builder):
        """
        Compute all statistics for all assets starting from the
        stored loss curves. Yield a statistical output object for each
        loss type.
        """
        if 'rcurves-rlzs' not in self.datastore:
            return []
        stats = []
        # NB: should we encounter memory issues in the future, the easy
        # solution is to split the assets in blocks and perform
        # the computation one block at the time
        assets = self.assetcol['asset_ref']
        rlzs = self.rlzs_assoc.realizations
        for loss_type in self.riskmodel.loss_types:
            group = self.datastore['rcurves-rlzs/%s' % loss_type]
            asset_values = self.assetcol[loss_type]
            data = []
            for rlz, dataset in zip(rlzs, group.values()):
                dkey = 'avg_losses-rlzs/%s/%s' % (loss_type, rlz.uid)
                average_losses = self.datastore[dkey].value
                ratios = group.attrs['loss_ratios']
                lcs = []
                for avalue, poes in zip(asset_values, dataset['poes']):
                    lcs.append((avalue * ratios, poes))
                losses_poes = numpy.array(lcs)  # -> shape (N, 2, C)
                out = scientific.Output(
                    assets,
                    loss_type,
                    rlz.ordinal,
                    rlz.weight,
                    loss_curves=losses_poes,
                    insured_curves=None,
                    average_losses=average_losses[:, 0],
                    average_insured_losses=average_losses[:, 1])
                data.append(out)
            stats.append(builder.build(data))
        return stats

    # TODO: add a direct test
    def build_specific_stats(self, builder):
        """
        Compute all statistics for the specified assets starting from the
        stored loss curves. Yield a statistical output object for each
        loss type.
        """
        if not self.oqparam.specific_assets:
            return []

        specific_assets = set(self.oqparam.specific_assets)
        assetcol = self.assetcol
        specific_ids = []
        for i, a in enumerate(self.assetcol):
            if a['asset_ref'] in specific_assets:
                specific_ids.append(i)

        assets = assetcol['asset_ref']
        rlzs = self.rlzs_assoc.realizations
        stats = []
        for loss_type in self.riskmodel.loss_types:
            group = self.datastore['/specific-loss_curves-rlzs/%s' % loss_type]
            data = []
            for rlz, dataset in zip(rlzs, group.values()):
                dkey = 'avg_losses-rlzs/%s/%s' % (loss_type, rlz.uid)
                average_losses = self.datastore[dkey][specific_ids]
                lcs = dataset.value
                losses_poes = numpy.array(  # -> shape (N, 2, C)
                    [lcs['losses'], lcs['poes']]).transpose(1, 0, 2)
                out = scientific.Output(
                    assets,
                    loss_type,
                    rlz.ordinal,
                    rlz.weight,
                    loss_curves=losses_poes,
                    insured_curves=None,
                    average_losses=average_losses[:, 0],
                    average_insured_losses=average_losses[:, 1])
                data.append(out)
            stats.append(builder.build(data, prefix='specific-'))
        return stats

    def compute_store_stats(self, rlzs, kind):
        """
        Compute and store the statistical outputs
        """
        oq = self.oqparam
        N = (len(self.oqparam.specific_assets)
             if kind == '_specific' else len(self.assetcol))
        Q = 1 + len(oq.quantile_loss_curves)
        C = oq.loss_curve_resolution  # TODO: could be loss_type-dependent

        loss_curve_dt = numpy.dtype([('losses', (float, C)),
                                     ('poes', (float, C)), ('avg', float)])

        if oq.conditional_loss_poes:
            lm_names = _loss_map_names(oq.conditional_loss_poes)
            loss_map_dt = numpy.dtype([(f, float) for f in lm_names])

        loss_curve_stats = numpy.zeros((Q, N), loss_curve_dt)
        ins_curve_stats = numpy.zeros((Q, N), loss_curve_dt)
        if oq.conditional_loss_poes:
            loss_map_stats = numpy.zeros((Q, N), loss_map_dt)

        builder = scientific.StatsBuilder(oq.quantile_loss_curves,
                                          oq.conditional_loss_poes, [],
                                          scientific.normalize_curves_eb)

        build_stats = getattr(self, 'build%s_stats' % kind)
        all_stats = build_stats(builder)
        for stat in all_stats:
            # there is one stat for each loss_type
            curves, ins_curves, maps = scientific.get_stat_curves(stat)
            loss_curve_stats[:] = curves
            if oq.insured_losses:
                ins_curve_stats[:] = ins_curves
            if oq.conditional_loss_poes:
                loss_map_stats[:] = maps

            for i, path in enumerate(stat.paths):
                self._store(path % 'loss_curves', loss_curve_stats[i])
                self._store(path % 'ins_curves', ins_curve_stats[i])
                if oq.conditional_loss_poes:
                    self._store(path % 'loss_maps', loss_map_stats[i])

        stats = scientific.SimpleStats(rlzs, oq.quantile_loss_curves)
        stats.compute_and_store('avg_losses', self.datastore)

    def _store(self, path, curves):
        if curves.view(float).sum():
            # there are some nonzero values
            self.datastore[path] = curves
Example #14
0
class UCERFEventBasedRuptureCalculator(event_based.EventBasedRuptureCalculator
                                       ):
    """
    Event based PSHA calculator generating the ruptures only
    """
    core_task = compute_ruptures
    etags = datastore.persistent_attribute('etags')
    is_stochastic = True

    def pre_execute(self):
        """
        parse the logic tree and source model input
        """
        self.sitecol = readinput.get_site_collection(self.oqparam)
        self.save_mesh()
        self.gsim_lt = readinput.get_gsim_lt(self.oqparam, [DEFAULT_TRT])
        self.smlt = readinput.get_source_model_lt(self.oqparam)
        parser = source.SourceModelParser(
            UCERFSourceConverter(self.oqparam.investigation_time,
                                 self.oqparam.rupture_mesh_spacing))
        [self.source
         ] = parser.parse_sources(self.oqparam.inputs["source_model"])
        branches = sorted(self.smlt.branches.items())
        min_mag, max_mag = self.source.min_mag, None
        source_models = []
        num_gsim_paths = self.gsim_lt.get_num_paths()
        for ordinal, (name, branch) in enumerate(branches):
            tm = source.TrtModel(DEFAULT_TRT, [],
                                 min_mag,
                                 max_mag,
                                 ordinal,
                                 eff_ruptures=-1)
            sm = source.SourceModel(name, branch.weight, [name], [tm],
                                    num_gsim_paths, ordinal, 1)
            source_models.append(sm)
        self.csm = source.CompositeSourceModel(self.gsim_lt,
                                               self.smlt,
                                               source_models,
                                               set_weight=False)
        self.rup_data = {}
        self.infos = []

    def execute(self):
        """
        Run the ucerf rupture calculation
        """
        id_set = [(key, self.smlt.branches[key].value,
                   self.smlt.branches[key].weight)
                  for key in self.smlt.branches]
        ruptures_by_trt_id = parallel.apply_reduce(
            compute_ruptures,
            (id_set, self.source, self.sitecol, self.oqparam, self.monitor),
            concurrent_tasks=self.oqparam.concurrent_tasks,
            agg=self.agg)
        self.rlzs_assoc = self.csm.info.get_rlzs_assoc(
            functools.partial(self.count_eff_ruptures, ruptures_by_trt_id))
        self.datastore['csm_info'] = self.csm.info
        self.datastore['source_info'] = numpy.array(self.infos,
                                                    source.source_info_dt)
        return ruptures_by_trt_id

    def agg(self, acc, val):
        """
        Aggregated the ruptures and the calculation times
        """
        for trt_id in val:
            ltbrid, dt = val.calc_times[trt_id]
            info = source.SourceInfo(
                trt_id,
                ltbrid,
                source_class=UCERFSESControl.__class__.__name__,
                weight=1,
                sources=1,
                filter_time=0,
                split_time=0,
                calc_time=dt)
            self.infos.append(info)
        return acc + val
Example #15
0
class RiskCalculator(HazardCalculator):
    """
    Base class for all risk calculators. A risk calculator must set the
    attributes .riskmodel, .sitecol, .assets_by_site, .exposure
    .riskinputs in the pre_execute phase.
    """

    riskmodel = datastore.persistent_attribute('riskmodel')
    specific_assets = datastore.persistent_attribute('specific_assets')

    def make_eps_dict(self, num_ruptures):
        """
        :param num_ruptures: the size of the epsilon array for each asset
        """
        oq = self.oqparam
        with self.monitor('building epsilons', autoflush=True):
            eps = riskinput.make_eps_dict(self.assets_by_site, num_ruptures,
                                          oq.master_seed, oq.asset_correlation)
            return eps

    def build_riskinputs(self, hazards_by_key, eps_dict=None):
        """
        :param hazards_by_key:
            a dictionary key -> IMT -> array of length num_sites
        :returns:
            a list of RiskInputs objects, sorted by IMT.
        """
        imtls = self.oqparam.imtls
        with self.monitor('building riskinputs', autoflush=True):
            riskinputs = []
            idx_weight_pairs = [(i, len(assets))
                                for i, assets in enumerate(self.assets_by_site)
                                ]
            blocks = general.split_in_blocks(idx_weight_pairs,
                                             self.oqparam.concurrent_tasks
                                             or 1,
                                             weight=operator.itemgetter(1))
            for block in blocks:
                indices = numpy.array([idx for idx, _weight in block])
                reduced_assets = self.assets_by_site[indices]
                reduced_eps = {}  # for the assets belonging to the indices
                if eps_dict:
                    for assets in reduced_assets:
                        for asset in assets:
                            reduced_eps[asset.id] = eps_dict[asset.id]

                # collect the hazards by key into hazards by imt
                hdata = collections.defaultdict(lambda: [{} for _ in indices])
                for key, hazards_by_imt in hazards_by_key.items():
                    for imt in imtls:
                        hazards_by_site = hazards_by_imt[imt]
                        for i, haz in enumerate(hazards_by_site[indices]):
                            hdata[imt][i][key] = haz
                # build the riskinputs
                for imt in hdata:
                    ri = self.riskmodel.build_input(imt, hdata[imt],
                                                    reduced_assets,
                                                    reduced_eps)
                    if ri.weight > 0:
                        riskinputs.append(ri)
            logging.info('Built %d risk inputs', len(riskinputs))
            return sorted(riskinputs, key=self.riskinput_key)

    def riskinput_key(self, ri):
        """
        :param ri: riskinput object
        :returns: the IMT associated to it
        """
        return ri.imt

    def pre_execute(self):
        """
        Set the attributes .riskmodel, .sitecol, .assets_by_site
        """
        HazardCalculator.pre_execute(self)
        self.riskmodel = readinput.get_risk_model(self.oqparam)
        if hasattr(self, 'exposure'):
            missing = self.exposure.taxonomies - set(
                self.riskmodel.get_taxonomies())
            if missing:
                raise RuntimeError('The exposure contains the taxonomies %s '
                                   'which are not in the risk model' % missing)

    def execute(self):
        """
        Parallelize on the riskinputs and returns a dictionary of results.
        Require a `.core_func` to be defined with signature
        (riskinputs, riskmodel, rlzs_assoc, monitor).
        """
        # add fatalities as side effect
        riskinput.build_asset_collection(self.assets_by_site,
                                         self.oqparam.time_event)
        self.monitor.oqparam = self.oqparam
        if self.pre_calculator == 'event_based_rupture':
            self.monitor.assets_by_site = self.assets_by_site
            self.monitor.num_assets = self.count_assets()
        res = apply_reduce(
            self.core_func.__func__,
            (self.riskinputs, self.riskmodel, self.rlzs_assoc, self.monitor),
            concurrent_tasks=self.oqparam.concurrent_tasks,
            weight=get_weight,
            key=self.riskinput_key)
        return res
Example #16
0
class BaseCalculator(with_metaclass(abc.ABCMeta)):
    """
    Abstract base class for all calculators.

    :param oqparam: OqParam object
    :param monitor: monitor object
    :param calc_id: numeric calculation ID
    """
    sitemesh = datastore.persistent_attribute('sitemesh')
    sitecol = datastore.persistent_attribute('sitecol')
    rlzs_assoc = datastore.persistent_attribute('rlzs_assoc')
    realizations = datastore.persistent_attribute('realizations')
    assets_by_site = datastore.persistent_attribute('assets_by_site')
    assetcol = datastore.persistent_attribute('assetcol')
    cost_types = datastore.persistent_attribute('cost_types')
    taxonomies = datastore.persistent_attribute('taxonomies')
    job_info = datastore.persistent_attribute('job_info')
    source_chunks = datastore.persistent_attribute('source_chunks')
    performance = datastore.persistent_attribute('performance')
    csm = datastore.persistent_attribute('composite_source_model')
    pre_calculator = None  # to be overridden
    is_stochastic = False  # True for scenario and event based calculators

    def __init__(self, oqparam, monitor=DummyMonitor(), calc_id=None):
        self.monitor = monitor
        self.datastore = datastore.DataStore(calc_id)
        self.monitor.hdf5path = self.datastore.hdf5path
        self.datastore.export_dir = oqparam.export_dir
        self.oqparam = oqparam

    def save_params(self, **kw):
        """
        Update the current calculation parameters
        """
        vars(self.oqparam).update(kw)
        for name, val in self.oqparam.to_params():
            self.datastore.attrs[name] = val
        self.datastore.attrs['oqlite_version'] = repr(__version__)
        self.datastore.hdf5.flush()

    def set_log_format(self):
        """Set the format of the root logger"""
        fmt = '[%(asctime)s #{} %(levelname)s] %(message)s'.format(
            self.datastore.calc_id)
        for handler in logging.root.handlers:
            handler.setFormatter(logging.Formatter(fmt))

    def run(self, pre_execute=True, concurrent_tasks=None, **kw):
        """
        Run the calculation and return the exported outputs.
        """
        self.set_log_format()
        if (concurrent_tasks is not None
                and concurrent_tasks != OqParam.concurrent_tasks.default):
            self.oqparam.concurrent_tasks = concurrent_tasks
        self.save_params(**kw)
        exported = {}
        try:
            if pre_execute:
                self.pre_execute()
            result = self.execute()
            self.post_execute(result)
            exported = self.export(kw.get('exports', ''))
        except KeyboardInterrupt:
            pids = ' '.join(str(p.pid) for p in executor._processes)
            sys.stderr.write(
                'You can manually kill the workers with kill %s\n' % pids)
            raise
        except:
            if kw.get('pdb'):  # post-mortem debug
                tb = sys.exc_info()[2]
                traceback.print_exc(tb)
                pdb.post_mortem(tb)
            else:
                logging.critical('', exc_info=True)
                raise
        self.clean_up()
        return exported

    def core_func(*args):
        """
        Core routine running on the workers.
        """
        raise NotImplementedError

    @abc.abstractmethod
    def pre_execute(self):
        """
        Initialization phase.
        """

    @abc.abstractmethod
    def execute(self):
        """
        Execution phase. Usually will run in parallel the core
        function and return a dictionary with the results.
        """

    @abc.abstractmethod
    def post_execute(self, result):
        """
        Post-processing phase of the aggregated output. It must be
        overridden with the export code. It will return a dictionary
        of output files.
        """

    def export(self, exports=None):
        """
        Export all the outputs in the datastore in the given export formats.

        :returns: dictionary output_key -> sorted list of exported paths
        """
        # avoid circular imports
        from openquake.commonlib.export import export as exp
        exported = {}
        individual_curves = self.oqparam.individual_curves
        if exports and isinstance(exports, tuple):
            fmts = exports
        elif exports:  # is a string
            fmts = exports.split(',')
        else:  # use passed values
            fmts = self.oqparam.exports
        for fmt in fmts:
            if not fmt:
                continue
            for key in self.datastore:  # top level keys
                if 'rlzs' in key and not individual_curves:
                    continue  # skip individual curves
                ekey = (key, fmt)
                if ekey not in exp:  # non-exportable output
                    continue
                exported[ekey] = exp(ekey, self.datastore)
                logging.info('exported %s: %s', key, exported[ekey])
        return exported

    def clean_up(self):
        """
        Collect the realizations and the monitoring information,
        then close the datastore.
        """
        if 'hcurves' in self.datastore:
            _set_nbytes('hcurves', self.datastore)
        if 'hmaps' in self.datastore:
            _set_nbytes('hmaps', self.datastore)
        if 'rlzs_assoc' in self.datastore:
            rlzs = self.rlzs_assoc.realizations
            self.realizations = numpy.array([(r.uid, r.weight) for r in rlzs],
                                            rlz_dt)
Example #17
0
class ClassicalCalculator(base.HazardCalculator):
    """
    Classical PSHA calculator
    """
    core_func = classical
    source_info = datastore.persistent_attribute('source_info')

    def execute(self):
        """
        Run in parallel `core_func(sources, sitecol, monitor)`, by
        parallelizing on the sources according to their weight and
        tectonic region type.
        """
        monitor = self.monitor(self.core_func.__name__)
        monitor.oqparam = self.oqparam
        sources = self.csm.get_sources()
        zc = zero_curves(len(self.sitecol.complete), self.oqparam.imtls)
        zerodict = AccumDict((key, zc) for key in self.rlzs_assoc)
        zerodict['calc_times'] = []
        gsims_assoc = self.rlzs_assoc.gsims_by_trt_id
        curves_by_trt_gsim = parallel.apply_reduce(
            self.core_func.__func__,
            (sources, self.sitecol, gsims_assoc, monitor),
            agg=agg_dicts, acc=zerodict,
            concurrent_tasks=self.oqparam.concurrent_tasks,
            weight=operator.attrgetter('weight'),
            key=operator.attrgetter('trt_model_id'))
        if self.persistent:
            store_source_chunks(self.datastore)
        return curves_by_trt_gsim

    def post_execute(self, curves_by_trt_gsim):
        """
        Collect the hazard curves by realization and export them.

        :param curves_by_trt_gsim:
            a dictionary (trt_id, gsim) -> hazard curves
        """
        # save calculation time per source
        try:
            calc_times = curves_by_trt_gsim.pop('calc_times')
        except KeyError:
            pass
        else:
            sources = self.csm.get_sources()
            info = []
            for i, dt in calc_times:
                src = sources[i]
                info.append((src.trt_model_id, src.source_id, dt))
            info.sort(key=operator.itemgetter(2), reverse=True)
            self.source_info = numpy.array(info, source_info_dt)

        # save curves_by_trt_gsim
        for sm in self.rlzs_assoc.csm_info.source_models:
            group = self.datastore.hdf5.create_group(
                'curves_by_sm/' + '_'.join(sm.path))
            group.attrs['source_model'] = sm.name
            for tm in sm.trt_models:
                for gsim in tm.gsims:
                    try:
                        curves = curves_by_trt_gsim[tm.id, gsim]
                    except KeyError:  # no data for the trt_model
                        pass
                    else:
                        ts = '%03d-%s' % (tm.id, gsim)
                        group[ts] = curves
                        group[ts].attrs['trt'] = tm.trt
        oq = self.oqparam
        zc = zero_curves(len(self.sitecol.complete), oq.imtls)
        curves_by_rlz = self.rlzs_assoc.combine_curves(
            curves_by_trt_gsim, agg_curves, zc)
        rlzs = self.rlzs_assoc.realizations
        nsites = len(self.sitecol)
        if oq.individual_curves:
            for rlz, curves in curves_by_rlz.items():
                self.store_curves('rlz-%03d' % rlz.ordinal, curves, rlz)

        if len(rlzs) == 1:  # cannot compute statistics
            [self.mean_curves] = curves_by_rlz.values()
            return

        weights = (None if oq.number_of_logic_tree_samples
                   else [rlz.weight for rlz in rlzs])
        mean = oq.mean_hazard_curves
        if mean:
            self.mean_curves = numpy.array(zc)
            for imt in oq.imtls:
                self.mean_curves[imt] = scientific.mean_curve(
                    [curves_by_rlz[rlz][imt] for rlz in rlzs], weights)

        self.quantile = {}
        for q in oq.quantile_hazard_curves:
            self.quantile[q] = qc = numpy.array(zc)
            for imt in oq.imtls:
                curves = [curves_by_rlz[rlz][imt] for rlz in rlzs]
                qc[imt] = scientific.quantile_curve(
                    curves, q, weights).reshape((nsites, -1))

        if mean:
            self.store_curves('mean', self.mean_curves)
        for q in self.quantile:
            self.store_curves('quantile-%s' % q, self.quantile[q])

    def hazard_maps(self, curves):
        """
        Compute the hazard maps associated to the curves
        """
        n, p = len(self.sitecol), len(self.oqparam.poes)
        maps = zero_maps((n, p), self.oqparam.imtls)
        for imt in curves.dtype.fields:
            maps[imt] = calc.compute_hazard_maps(
                curves[imt], self.oqparam.imtls[imt], self.oqparam.poes)
        return maps

    def store_curves(self, kind, curves, rlz=None):
        """
        Store all kind of curves, optionally computing maps and uhs curves.

        :param kind: the kind of curves to store
        :param curves: an array of N curves to store
        :param rlz: hazard realization, if any
        """
        if not self.persistent:  # do nothing
            return
        oq = self.oqparam
        self._store('hcurves/' + kind, curves, rlz)
        if oq.hazard_maps or oq.uniform_hazard_spectra:
            # hmaps is a composite array of shape (N, P)
            hmaps = self.hazard_maps(curves)
            if oq.hazard_maps:
                self._store('hmaps/' + kind, hmaps, rlz, poes=oq.poes)
            if oq.uniform_hazard_spectra:
                # uhs is an array of shape (N, I, P)
                self._store('uhs/' + kind, calc.make_uhs(hmaps), rlz,
                            poes=oq.poes)

    def _store(self, name, curves, rlz, **kw):
        self.datastore.hdf5[name] = curves
        dset = self.datastore.hdf5[name]
        if rlz is not None:
            dset.attrs['uid'] = rlz.uid
        for k, v in kw.items():
            dset.attrs[k] = v
Example #18
0
class ScenarioCalculator(base.HazardCalculator):
    """
    Scenario hazard calculator
    """
    core_func = calc_gmfs
    tags = datastore.persistent_attribute('tags')
    sescollection = datastore.persistent_attribute('sescollection')
    is_stochastic = True

    def pre_execute(self):
        """
        Read the site collection and initialize GmfComputer, tags and seeds
        """
        super(ScenarioCalculator, self).pre_execute()
        trunc_level = self.oqparam.truncation_level
        correl_model = readinput.get_correl_model(self.oqparam)
        n_gmfs = self.oqparam.number_of_ground_motion_fields
        rupture = readinput.get_rupture(self.oqparam)
        self.gsims = readinput.get_gsims(self.oqparam)
        self.rlzs_assoc = readinput.get_rlzs_assoc(self.oqparam)

        # filter the sites
        self.sitecol = filters.filter_sites_by_distance_to_rupture(
            rupture, self.oqparam.maximum_distance, self.sitecol)
        if self.sitecol is None:
            raise RuntimeError('All sites were filtered out! '
                               'maximum_distance=%s km' %
                               self.oqparam.maximum_distance)
        self.tags = numpy.array(
            sorted(['scenario-%010d' % i for i in range(n_gmfs)]),
            (bytes, 100))
        self.computer = GmfComputer(rupture, self.sitecol, self.oqparam.imtls,
                                    self.gsims, trunc_level, correl_model)
        rnd = random.Random(self.oqparam.random_seed)
        self.tag_seed_pairs = [(tag, rnd.randint(0, calc.MAX_INT))
                               for tag in self.tags]
        self.sescollection = [{
            tag: Rupture(tag, seed, rupture)
            for tag, seed in self.tag_seed_pairs
        }]

    def execute(self):
        """
        Compute the GMFs in parallel and return a dictionary gmf_by_tag
        """
        logging.info('Computing the GMFs')
        args = (self.tag_seed_pairs, self.computer, self.monitor('calc_gmfs'))
        gmf_by_tag = parallel.apply_reduce(
            self.core_func.__func__,
            args,
            concurrent_tasks=self.oqparam.concurrent_tasks)
        return gmf_by_tag

    def post_execute(self, gmf_by_tag):
        """
        :param gmf_by_tag: a dictionary tag -> gmf
        """
        data = []
        for ordinal, tag in enumerate(sorted(gmf_by_tag)):
            gmf = gmf_by_tag[tag]
            gmf['idx'] = ordinal
            data.append(gmf)
        gmfa = numpy.concatenate(data)
        self.datastore['gmfs/col00'] = gmfa
        self.datastore['gmfs'].attrs['nbytes'] = gmfa.nbytes
Example #19
0
class EventBasedRuptureCalculator(ClassicalCalculator):
    """
    Event based PSHA calculator generating the ruptures only
    """
    core_task = compute_ruptures
    etags = datastore.persistent_attribute('etags')
    is_stochastic = True

    def init(self):
        """
        Set the random seed passed to the SourceManager and the
        minimum_intensity dictionary.
        """
        oq = self.oqparam
        self.random_seed = oq.random_seed
        self.rlzs_assoc = self.datastore['csm_info'].get_rlzs_assoc()
        self.min_iml = fix_minimum_intensity(oq.minimum_intensity, oq.imtls)
        self.rup_data = {}

    def count_eff_ruptures(self, ruptures_by_trt_id, trt_model):
        """
        Returns the number of ruptures sampled in the given trt_model.

        :param ruptures_by_trt_id: a dictionary with key trt_id
        :param trt_model: a TrtModel instance
        """
        return sum(
            len(ruptures) for trt_id, ruptures in ruptures_by_trt_id.items()
            if trt_model.id == trt_id)

    def zerodict(self):
        """
        Initial accumulator, a dictionary (trt_id, gsim) -> curves
        """
        zd = AccumDict()
        zd.calc_times = []
        zd.eff_ruptures = AccumDict()
        return zd

    def agg_dicts(self, acc, ruptures_by_trt_id):
        """
        Aggregate dictionaries of hazard curves by updating the accumulator.

        :param acc: accumulator dictionary
        :param ruptures_by_trt_id: a nested dictionary trt_id -> ProbabilityMap
        """
        with self.monitor('aggregate curves', autoflush=True):
            if hasattr(ruptures_by_trt_id, 'calc_times'):
                acc.calc_times.extend(ruptures_by_trt_id.calc_times)
            if hasattr(ruptures_by_trt_id, 'eff_ruptures'):
                acc.eff_ruptures += ruptures_by_trt_id.eff_ruptures
            acc += ruptures_by_trt_id
            if len(ruptures_by_trt_id):
                trt = ruptures_by_trt_id.trt
                try:
                    dset = self.rup_data[trt]
                except KeyError:
                    dset = self.rup_data[trt] = self.datastore.create_dset(
                        'rup_data/' + trt, ruptures_by_trt_id.rup_data.dtype)
                dset.extend(ruptures_by_trt_id.rup_data)
        self.datastore.flush()
        return acc

    def post_execute(self, result):
        """
        Save the SES collection
        """
        with self.monitor('saving ruptures', autoflush=True):
            # ordering ruptures
            sescollection = []
            for trt_id in result:
                for ebr in result[trt_id]:
                    sescollection.append(ebr)
            sescollection.sort(key=operator.attrgetter('serial'))
            etags = numpy.concatenate([ebr.etags for ebr in sescollection])
            self.etags = numpy.array(etags, (bytes, 100))
            nr = len(sescollection)
            logging.info('Saving SES collection with %d ruptures, %d events',
                         nr, len(etags))
            eid = 0
            for ebr in sescollection:
                eids = []
                for event in ebr.events:
                    event['eid'] = eid
                    eids.append(eid)
                    eid += 1
                self.datastore['sescollection/%s' % ebr.serial] = ebr
            self.datastore.set_nbytes('sescollection')

        for dset in self.rup_data.values():
            if len(dset.dset):
                numsites = dset.dset['numsites']
                multiplicity = dset.dset['multiplicity']
                spr = numpy.average(numsites, weights=multiplicity)
                mul = numpy.average(multiplicity, weights=numsites)
                self.datastore.set_attrs(dset.name,
                                         sites_per_rupture=spr,
                                         multiplicity=mul)
        if self.rup_data:
            self.datastore.set_nbytes('rup_data')
Example #20
0
class ClassicalCalculator(base.HazardCalculator):
    """
    Classical PSHA calculator
    """
    core_func = classical
    source_info = datastore.persistent_attribute('source_info')

    def agg_dicts(self, acc, val):
        """
        Aggregate dictionaries of hazard curves by updating the accumulator.

        :param acc: accumulator dictionary
        :param val: a dictionary of hazard curves, keyed by (trt_id, gsim)
        """
        with self.monitor('aggregate curves', autoflush=True):
            if hasattr(val, 'calc_times'):
                acc.calc_times.extend(val.calc_times)
            for bb in getattr(val, 'bbs', []):
                acc.bb_dict[bb.lt_model_id, bb.site_id].update_bb(bb)
            if hasattr(acc, 'n'):  # tiling calculator
                for key in val:
                    acc[key] = agg_curves(
                        acc[key], expand(val[key], acc.n, val.siteslice))
            else:  # classical, event_based
                for key in val:
                    acc[key] = agg_curves(acc[key], val[key])
        return acc

    def execute(self):
        """
        Run in parallel `core_func(sources, sitecol, monitor)`, by
        parallelizing on the sources according to their weight and
        tectonic region type.
        """
        monitor = self.monitor.new(self.core_func.__name__)
        monitor.oqparam = self.oqparam
        sources = self.csm.get_sources()
        zc = zero_curves(len(self.sitecol.complete), self.oqparam.imtls)
        zerodict = AccumDict((key, zc) for key in self.rlzs_assoc)
        zerodict.calc_times = []
        zerodict.bb_dict = {
            (smodel.ordinal, site.id): BoundingBox(smodel.ordinal, site.id)
            for site in self.sitecol
            for smodel in self.csm.source_models
        } if self.oqparam.poes_disagg else {}
        curves_by_trt_gsim = parallel.apply_reduce(
            self.core_func.__func__,
            (sources, self.sitecol, 0, self.rlzs_assoc, monitor),
            agg=self.agg_dicts, acc=zerodict,
            concurrent_tasks=self.oqparam.concurrent_tasks,
            weight=operator.attrgetter('weight'),
            key=operator.attrgetter('trt_model_id'))
        store_source_chunks(self.datastore)
        return curves_by_trt_gsim

    def post_execute(self, curves_by_trt_gsim):
        """
        Collect the hazard curves by realization and export them.

        :param curves_by_trt_gsim:
            a dictionary (trt_id, gsim) -> hazard curves
        """
        # save calculation time per source
        calc_times = getattr(curves_by_trt_gsim, 'calc_times', [])
        sources = self.csm.get_sources()
        infodict = collections.defaultdict(float)
        weight = {}
        for src_idx, dt in calc_times:
            src = sources[src_idx]
            weight[src.trt_model_id, src.source_id] = src.weight
            infodict[src.trt_model_id, src.source_id] += dt
        infolist = [key + (dt, weight[key]) for key, dt in infodict.items()]
        infolist.sort(key=operator.itemgetter(1), reverse=True)
        if infolist:
            self.source_info = numpy.array(infolist, source_info_dt)

        with self.monitor('save curves_by_trt_gsim', autoflush=True):
            for sm in self.rlzs_assoc.csm_info.source_models:
                group = self.datastore.hdf5.create_group(
                    'curves_by_sm/' + '_'.join(sm.path))
                group.attrs['source_model'] = sm.name
                for tm in sm.trt_models:
                    for i, gsim in enumerate(tm.gsims):
                        try:
                            curves = curves_by_trt_gsim[tm.id, gsim]
                        except KeyError:  # no data for the trt_model
                            pass
                        else:
                            ts = '%03d-%d' % (tm.id, i)
                            if nonzero(curves):
                                group[ts] = curves
                                group[ts].attrs['trt'] = tm.trt
                                group[ts].attrs['nbytes'] = curves.nbytes
                                group[ts].attrs['gsim'] = str(gsim)
                self.datastore.set_nbytes(group.name)
            self.datastore.set_nbytes('curves_by_sm')

        oq = self.oqparam
        with self.monitor('combine and save curves_by_rlz', autoflush=True):
            zc = zero_curves(len(self.sitecol.complete), oq.imtls)
            curves_by_rlz = self.rlzs_assoc.combine_curves(
                curves_by_trt_gsim, agg_curves, zc)
            rlzs = self.rlzs_assoc.realizations
            nsites = len(self.sitecol)
            if oq.individual_curves:
                for rlz, curves in curves_by_rlz.items():
                    self.store_curves('rlz-%03d' % rlz.ordinal, curves, rlz)

            if len(rlzs) == 1:  # cannot compute statistics
                [self.mean_curves] = curves_by_rlz.values()
                return

        with self.monitor('compute and save statistics', autoflush=True):
            weights = (None if oq.number_of_logic_tree_samples
                       else [rlz.weight for rlz in rlzs])
            mean = oq.mean_hazard_curves
            if mean:
                self.mean_curves = numpy.array(zc)
                for imt in oq.imtls:
                    self.mean_curves[imt] = scientific.mean_curve(
                        [curves_by_rlz[rlz][imt] for rlz in rlzs], weights)

            self.quantile = {}
            for q in oq.quantile_hazard_curves:
                self.quantile[q] = qc = numpy.array(zc)
                for imt in oq.imtls:
                    curves = [curves_by_rlz[rlz][imt] for rlz in rlzs]
                    qc[imt] = scientific.quantile_curve(
                        curves, q, weights).reshape((nsites, -1))

            if mean:
                self.store_curves('mean', self.mean_curves)
            for q in self.quantile:
                self.store_curves('quantile-%s' % q, self.quantile[q])

    def hazard_maps(self, curves):
        """
        Compute the hazard maps associated to the curves
        """
        maps = zero_maps(
            len(self.sitecol), self.oqparam.imtls, self.oqparam.poes)
        for imt in curves.dtype.fields:
            # build a matrix of size (N, P)
            data = calc.compute_hazard_maps(
                curves[imt], self.oqparam.imtls[imt], self.oqparam.poes)
            for poe, hmap in zip(self.oqparam.poes, data.T):
                maps['%s~%s' % (imt, poe)] = hmap
        return maps

    def store_curves(self, kind, curves, rlz=None):
        """
        Store all kind of curves, optionally computing maps and uhs curves.

        :param kind: the kind of curves to store
        :param curves: an array of N curves to store
        :param rlz: hazard realization, if any
        """
        oq = self.oqparam
        self._store('hcurves/' + kind, curves, rlz, nbytes=curves.nbytes)
        if oq.hazard_maps or oq.uniform_hazard_spectra:
            # hmaps is a composite array of shape (N, P)
            hmaps = self.hazard_maps(curves)
            if oq.hazard_maps:
                self._store('hmaps/' + kind, hmaps, rlz,
                            poes=oq.poes, nbytes=hmaps.nbytes)

    def _store(self, name, curves, rlz, **kw):
        self.datastore.hdf5[name] = curves
        dset = self.datastore.hdf5[name]
        if rlz is not None:
            dset.attrs['uid'] = rlz.uid
        for k, v in kw.items():
            dset.attrs[k] = v
Example #21
0
class HazardCalculator(BaseCalculator):
    """
    Base class for hazard calculators based on source models
    """
    riskmodel = datastore.persistent_attribute('riskmodel')

    mean_curves = None  # to be overridden
    SourceProcessor = source.SourceFilterSplitter

    def assoc_assets_sites(self, sitecol):
        """
        :param sitecol: a sequence of sites
        :returns: a pair (filtered_sites, assets_by_site)

        The new site collection is different from the original one
        if some assets were discarded or if there were missing assets
        for some sites.
        """
        maximum_distance = self.oqparam.asset_hazard_distance
        siteobjects = geodetic.GeographicObjects(
            Site(sid, lon, lat)
            for sid, lon, lat in zip(sitecol.sids, sitecol.lons, sitecol.lats))
        assets_by_sid = general.AccumDict()
        for assets in self.assets_by_site:
            if len(assets):
                lon, lat = assets[0].location
                site, _ = siteobjects.get_closest(lon, lat, maximum_distance)
                if site:
                    assets_by_sid += {site.sid: list(assets)}
        if not assets_by_sid:
            raise AssetSiteAssociationError(
                'Could not associate any site to any assets within the '
                'maximum distance of %s km' % maximum_distance)
        mask = numpy.array([sid in assets_by_sid for sid in sitecol.sids])
        assets_by_site = [assets_by_sid.get(sid, []) for sid in sitecol.sids]
        return sitecol.filter(mask), numpy.array(assets_by_site)

    def count_assets(self):
        """
        Count how many assets are taken into consideration by the calculator
        """
        return sum(len(assets) for assets in self.assets_by_site)

    def pre_execute(self):
        """
        Check if there is a pre_calculator or a previous calculation ID.
        If yes, read the inputs by invoking the precalculator or by retrieving
        the previous calculation; if not, read the inputs directly.
        """
        if self.pre_calculator is not None:
            # the parameter hazard_calculation_id is only meaningful if
            # there is a precalculator
            precalc_id = self.oqparam.hazard_calculation_id
            if precalc_id is None:  # recompute everything
                precalc = calculators[self.pre_calculator](
                    self.oqparam, self.monitor('precalculator'),
                    self.datastore.calc_id)
                precalc.run()
                if 'scenario' not in self.oqparam.calculation_mode:
                    self.csm = precalc.csm
            else:  # read previously computed data
                parent = datastore.DataStore(precalc_id)
                self.datastore.set_parent(parent)
                # update oqparam with the attributes saved in the datastore
                self.oqparam = OqParam.from_(self.datastore.attrs)
                self.read_risk_data()

        else:  # we are in a basic calculator
            self.read_risk_data()
            self.read_sources()
        self.datastore.hdf5.flush()

    def read_exposure(self):
        """
        Read the exposure, the riskmodel and update the attributes .exposure,
        .sitecol, .assets_by_site, .cost_types, .taxonomies.
        """
        logging.info('Reading the exposure')
        with self.monitor('reading exposure', autoflush=True):
            self.exposure = readinput.get_exposure(self.oqparam)
            self.sitecol, self.assets_by_site = (readinput.get_sitecol_assets(
                self.oqparam, self.exposure))
            if len(self.exposure.cost_types):
                self.cost_types = self.exposure.cost_types
            self.taxonomies = numpy.array(sorted(self.exposure.taxonomies),
                                          '|S100')
            self.datastore['time_events'] = sorted(self.exposure.time_events)

    def load_riskmodel(self):
        """
        Read the risk model and set the attribute .riskmodel.
        The riskmodel can be empty for hazard calculations.
        Save the loss ratios (if any) in the datastore.
        """
        rmdict = riskmodels.get_risk_models(self.oqparam)
        self.oqparam.set_risk_imtls(rmdict)
        # save risk_imtls in the datastore: this is crucial
        self.datastore.hdf5.attrs['risk_imtls'] = repr(self.oqparam.risk_imtls)
        self.riskmodel = rm = readinput.get_risk_model(self.oqparam, rmdict)
        if 'taxonomies' in self.datastore:
            # check that we are covering all the taxonomies in the exposure
            missing = set(self.taxonomies) - set(rm.taxonomies)
            if rm and missing:
                raise RuntimeError('The exposure contains the taxonomies %s '
                                   'which are not in the risk model' % missing)

        # save the loss ratios in the datastore
        pairs = [(cb.loss_type, (numpy.float64, len(cb.ratios)))
                 for cb in rm.curve_builders if cb.user_provided]
        if not pairs:
            return
        loss_ratios = numpy.zeros(len(rm), numpy.dtype(pairs))
        for cb in rm.curve_builders:
            if cb.user_provided:
                loss_ratios_lt = loss_ratios[cb.loss_type]
                for i, imt_taxo in enumerate(sorted(rm)):
                    loss_ratios_lt[i] = rm[imt_taxo].loss_ratios[cb.loss_type]
        self.datastore['loss_ratios'] = loss_ratios
        self.datastore['loss_ratios'].attrs['imt_taxos'] = sorted(rm)
        self.datastore['loss_ratios'].attrs['nbytes'] = loss_ratios.nbytes

    def read_risk_data(self):
        """
        Read the exposure (if any), the risk model (if any) and then the
        site collection, possibly extracted from the exposure.
        """
        logging.info('Reading the site collection')
        with self.monitor('reading site collection', autoflush=True):
            haz_sitecol = readinput.get_site_collection(self.oqparam)
        inputs = self.oqparam.inputs
        if 'exposure' in inputs:
            self.read_exposure()
            self.load_riskmodel()  # must be called *after* read_exposure
            num_assets = self.count_assets()
            if self.datastore.parent:
                haz_sitecol = self.datastore.parent['sitecol']
            if haz_sitecol is not None and haz_sitecol != self.sitecol:
                with self.monitor('assoc_assets_sites'):
                    self.sitecol, self.assets_by_site = \
                        self.assoc_assets_sites(haz_sitecol.complete)
                ok_assets = self.count_assets()
                num_sites = len(self.sitecol)
                logging.warn('Associated %d assets to %d sites, %d discarded',
                             ok_assets, num_sites, num_assets - ok_assets)
        elif (self.datastore.parent and 'exposure' in OqParam.from_(
                self.datastore.parent.attrs).inputs):
            logging.info('Re-using the already imported exposure')
            self.load_riskmodel()
        else:  # no exposure
            self.load_riskmodel()
            self.sitecol = haz_sitecol

        # save mesh and asset collection
        self.save_mesh()
        if hasattr(self, 'assets_by_site'):
            self.assetcol = riskinput.build_asset_collection(
                self.assets_by_site, self.oqparam.time_event)
            spec = set(self.oqparam.specific_assets)
            unknown = spec - set(self.assetcol['asset_ref'])
            if unknown:
                raise ValueError('The specific asset(s) %s are not in the '
                                 'exposure' % ', '.join(unknown))

    def save_mesh(self):
        """
        Save the mesh associated to the complete sitecol in the HDF5 file
        """
        if ('sitemesh' not in self.datastore
                and 'sitemesh' not in self.datastore.parent):
            col = self.sitecol.complete
            mesh_dt = numpy.dtype([('lon', F32), ('lat', F32)])
            self.sitemesh = numpy.array(list(zip(col.lons, col.lats)), mesh_dt)

    def read_sources(self):
        """
        Read the composite source model (if any).
        This method must be called after read_risk_data, to be able
        to filter to sources according to the site collection.
        """
        if 'source' in self.oqparam.inputs:
            logging.info('Reading the composite source model')
            with self.monitor('reading composite source model',
                              autoflush=True):
                self.csm = readinput.get_composite_source_model(
                    self.oqparam,
                    self.sitecol,
                    self.SourceProcessor,
                    self.monitor,
                    dstore=self.datastore)
                # we could manage limits here
                self.job_info = readinput.get_job_info(self.oqparam, self.csm,
                                                       self.sitecol)
                self.rlzs_assoc = self.csm.get_rlzs_assoc()

                logging.info('Total weight of the sources=%s',
                             self.job_info['input_weight'])
                logging.info('Expected output size=%s',
                             self.job_info['output_weight'])

    def post_process(self):
        """For compatibility with the engine"""
Example #22
0
class PSHACalculator(base.HazardCalculator):
    """
    Classical PSHA calculator
    """
    core_task = classical
    source_info = datastore.persistent_attribute('source_info')

    def agg_dicts(self, acc, pmap):
        """
        Aggregate dictionaries of hazard curves by updating the accumulator.

        :param acc: accumulator dictionary
        :param pmap: a ProbabilityMap
        """
        with self.monitor('aggregate curves', autoflush=True):
            for src_id, nsites, calc_time in pmap.calc_times:
                src_id = src_id.split(':', 1)[0]
                info = self.csm.infos[pmap.grp_id, src_id]
                info.calc_time += calc_time
                info.num_sites = max(info.num_sites, nsites)
                info.num_split += 1
            acc.eff_ruptures += pmap.eff_ruptures
            for bb in getattr(pmap, 'bbs', []):  # for disaggregation
                acc.bb_dict[bb.lt_model_id, bb.site_id].update_bb(bb)
            acc[pmap.grp_id] |= pmap
        self.datastore.flush()
        return acc

    def count_eff_ruptures(self, result_dict, src_group):
        """
        Returns the number of ruptures in the src_group (after filtering)
        or 0 if the src_group has been filtered away.

        :param result_dict: a dictionary with keys (grp_id, gsim)
        :param src_group: a SourceGroup instance
        """
        return result_dict.eff_ruptures.get(src_group.id, 0)

    def zerodict(self):
        """
        Initial accumulator, a dict grp_id -> ProbabilityMap(L, G)
        """
        zd = AccumDict()
        num_levels = len(self.oqparam.imtls.array)
        for grp in self.csm.src_groups:
            num_gsims = len(self.rlzs_assoc.gsims_by_grp_id[grp.id])
            zd[grp.id] = ProbabilityMap(num_levels, num_gsims)
        zd.calc_times = []
        zd.eff_ruptures = AccumDict()  # grp_id -> eff_ruptures
        zd.bb_dict = BBdict()
        if self.oqparam.poes_disagg or self.oqparam.iml_disagg:
            for sid in self.sitecol.sids:
                for smodel in self.csm.source_models:
                    zd.bb_dict[smodel.ordinal,
                               sid] = BoundingBox(smodel.ordinal, sid)
        return zd

    def execute(self):
        """
        Run in parallel `core_task(sources, sitecol, monitor)`, by
        parallelizing on the sources according to their weight and
        tectonic region type.
        """
        oq = self.oqparam
        monitor = self.monitor(self.core_task.__name__,
                               truncation_level=oq.truncation_level,
                               imtls=oq.imtls,
                               maximum_distance=oq.maximum_distance,
                               disagg=oq.poes_disagg or oq.iml_disagg)
        with self.monitor('managing sources', autoflush=True):
            allargs = self.gen_args(self.csm, monitor)
            iterargs = saving_sources_by_task(allargs, self.datastore)
            if isinstance(allargs, list):
                # there is a trick here: if the arguments are known
                # (a list, not an iterator), keep them as a list
                # then the Starmap will understand the case of a single
                # argument tuple and it will run in core the task
                iterargs = list(iterargs)
            ires = parallel.Starmap(self.core_task.__func__,
                                    iterargs).submit_all()
        acc = ires.reduce(self.agg_dicts, self.zerodict())
        with self.monitor('store source_info', autoflush=True):
            self.store_source_info(self.csm.infos, acc)
        return acc

    def gen_args(self, csm, monitor):
        """
        Used in the case of large source model logic trees.

        :param csm: a CompositeSourceModel instance
        :param monitor: a :class:`openquake.baselib.performance.Monitor`
        :yields: (sources, sites, gsims, monitor) tuples
        """
        oq = self.oqparam
        maxweight = self.csm.get_maxweight(oq.concurrent_tasks)
        logging.info('Using a maxweight of %d', maxweight)
        ngroups = sum(len(sm.src_groups) for sm in csm.source_models)
        for sm in csm.source_models:
            for sg in sm.src_groups:
                logging.info('Sending source group #%d of %d (%s, %d sources)',
                             sg.id + 1, ngroups, sg.trt, len(sg.sources))
                gsims = self.rlzs_assoc.gsims_by_grp_id[sg.id]
                if oq.poes_disagg or oq.iml_disagg:  # only for disaggregation
                    monitor.sm_id = self.rlzs_assoc.sm_ids[sg.id]
                param = dict(
                    samples=sm.samples,
                    seed=oq.ses_seed,
                    ses_per_logic_tree_path=oq.ses_per_logic_tree_path)
                if sg.src_interdep == 'mutex':  # do not split the group
                    self.csm.add_infos(sg.sources)
                    yield sg, self.src_filter, gsims, param, monitor
                else:
                    for block in self.csm.split_sources(
                            sg.sources, self.src_filter, maxweight):
                        yield block, self.src_filter, gsims, param, monitor

    def store_source_info(self, infos, acc):
        # save the calculation times per each source
        if infos:
            rows = sorted(infos.values(),
                          key=operator.attrgetter('calc_time'),
                          reverse=True)
            array = numpy.zeros(len(rows), source.SourceInfo.dt)
            for i, row in enumerate(rows):
                for name in array.dtype.names:
                    array[i][name] = getattr(row, name)
            self.source_info = array
            infos.clear()
        self.rlzs_assoc = self.csm.info.get_rlzs_assoc(
            partial(self.count_eff_ruptures, acc))
        self.datastore['csm_info'] = self.csm.info
        self.datastore['csm_info/assoc_by_grp'] = array = (
            self.rlzs_assoc.get_assoc_by_grp())
        # computing properly the length in bytes of a variable length array
        nbytes = array.nbytes + sum(rec['rlzis'].nbytes for rec in array)
        self.datastore.set_attrs('csm_info/assoc_by_grp', nbytes=nbytes)
        self.datastore.flush()

    def post_execute(self, pmap_by_grp_id):
        """
        Collect the hazard curves by realization and export them.

        :param pmap_by_grp_id:
            a dictionary grp_id -> hazard curves
        """
        if pmap_by_grp_id.bb_dict:
            self.datastore['bb_dict'] = pmap_by_grp_id.bb_dict
        grp_trt = self.csm.info.grp_trt()
        with self.monitor('saving probability maps', autoflush=True):
            for grp_id, pmap in pmap_by_grp_id.items():
                if pmap:  # pmap can be missing if the group is filtered away
                    key = 'poes/grp-%02d' % grp_id
                    self.datastore[key] = pmap
                    self.datastore.set_attrs(key, trt=grp_trt[grp_id])
            if 'poes' in self.datastore:
                self.datastore.set_nbytes('poes')
Example #23
0
class RiskCalculator(HazardCalculator):
    """
    Base class for all risk calculators. A risk calculator must set the
    attributes .riskmodel, .sitecol, .assets_by_site, .exposure
    .riskinputs in the pre_execute phase.
    """
    specific_assets = datastore.persistent_attribute('specific_assets')
    extra_args = ()  # to be overridden in subclasses

    def make_eps(self, num_ruptures):
        """
        :param num_ruptures: the size of the epsilon array for each asset
        """
        oq = self.oqparam
        with self.monitor('building epsilons', autoflush=True):
            return riskinput.make_eps(self.assets_by_site, num_ruptures,
                                      oq.master_seed, oq.asset_correlation)

    def build_riskinputs(self, hazards_by_key, eps=numpy.zeros(0)):
        """
        :param hazards_by_key:
            a dictionary key -> IMT -> array of length num_sites
        :param eps:
            a matrix of epsilons (possibly empty)
        :returns:
            a list of RiskInputs objects, sorted by IMT.
        """
        # add asset.idx as side effect
        riskinput.build_asset_collection(self.assets_by_site,
                                         self.oqparam.time_event)
        imtls = self.oqparam.imtls
        with self.monitor('building riskinputs', autoflush=True):
            riskinputs = []
            idx_weight_pairs = [(i, len(assets))
                                for i, assets in enumerate(self.assets_by_site)
                                ]
            blocks = general.split_in_blocks(idx_weight_pairs,
                                             self.oqparam.concurrent_tasks
                                             or 1,
                                             weight=operator.itemgetter(1))
            for block in blocks:
                indices = numpy.array([idx for idx, _weight in block])
                reduced_assets = self.assets_by_site[indices]
                reduced_eps = {}  # for the assets belonging to the indices
                if len(eps):
                    for assets in reduced_assets:
                        for asset in assets:
                            reduced_eps[asset.idx] = eps[asset.idx]

                # collect the hazards by key into hazards by imt
                hdata = collections.defaultdict(lambda: [{} for _ in indices])
                for key, hazards_by_imt in hazards_by_key.items():
                    for imt in imtls:
                        hazards_by_site = hazards_by_imt[imt]
                        for i, haz in enumerate(hazards_by_site[indices]):
                            hdata[imt][i][key] = haz
                # build the riskinputs
                for imt in hdata:
                    ri = self.riskmodel.build_input(imt, hdata[imt],
                                                    reduced_assets,
                                                    reduced_eps)
                    if ri.weight > 0:
                        riskinputs.append(ri)
            logging.info('Built %d risk inputs', len(riskinputs))
            return sorted(riskinputs, key=self.riskinput_key)

    def riskinput_key(self, ri):
        """
        :param ri: riskinput object
        :returns: the IMT associated to it
        """
        return ri.imt

    def execute(self):
        """
        Parallelize on the riskinputs and returns a dictionary of results.
        Require a `.core_func` to be defined with signature
        (riskinputs, riskmodel, rlzs_assoc, monitor).
        """
        # add fatalities as side effect
        riskinput.build_asset_collection(self.assets_by_site,
                                         self.oqparam.time_event)
        self.monitor.oqparam = self.oqparam
        if self.pre_calculator == 'event_based_rupture':
            self.monitor.assets_by_site = self.assets_by_site
            self.monitor.num_assets = self.count_assets()
        all_args = ((self.riskinputs, self.riskmodel, self.rlzs_assoc) +
                    self.extra_args + (self.monitor, ))
        res = apply_reduce(self.core_func.__func__,
                           all_args,
                           concurrent_tasks=self.oqparam.concurrent_tasks,
                           weight=get_weight,
                           key=self.riskinput_key)
        return res
Example #24
0
class BaseCalculator(with_metaclass(abc.ABCMeta)):
    """
    Abstract base class for all calculators.

    :param oqparam: OqParam object
    :param monitor: monitor object
    :param calc_id: numeric calculation ID
    """

    oqparam = datastore.persistent_attribute('oqparam')
    sitemesh = datastore.persistent_attribute('sitemesh')
    sitecol = datastore.persistent_attribute('sitecol')
    rlzs_assoc = datastore.persistent_attribute('rlzs_assoc')
    realizations = datastore.persistent_attribute('realizations')
    assets_by_site = datastore.persistent_attribute('assets_by_site')
    assetcol = datastore.persistent_attribute('assetcol')
    cost_types = datastore.persistent_attribute('cost_types')
    taxonomies = datastore.persistent_attribute('taxonomies')
    job_info = datastore.persistent_attribute('job_info')
    source_chunks = datastore.persistent_attribute('source_chunks')
    source_pre_info = datastore.persistent_attribute('source_pre_info')
    performance = datastore.persistent_attribute('performance')
    csm = datastore.persistent_attribute('composite_source_model')
    pre_calculator = None  # to be overridden
    is_stochastic = False  # True for scenario and event based calculators

    def __init__(self,
                 oqparam,
                 monitor=DummyMonitor(),
                 calc_id=None,
                 persistent=True):
        self.monitor = monitor
        if persistent:
            self.datastore = datastore.DataStore(calc_id)
        else:
            self.datastore = general.AccumDict()
            self.datastore.hdf5 = {}
        self.datastore.export_dir = oqparam.export_dir
        if 'oqparam' not in self.datastore:  # new datastore
            self.oqparam = oqparam
        # else we are doing a precalculation; oqparam has been already stored
        self.persistent = persistent

    def run(self,
            pre_execute=True,
            clean_up=True,
            concurrent_tasks=None,
            **kw):
        """
        Run the calculation and return the exported outputs.
        """
        if concurrent_tasks is not None:
            self.oqparam.concurrent_tasks = concurrent_tasks
        vars(self.oqparam).update(kw)
        exported = {}
        try:
            if pre_execute:
                with self.monitor('pre_execute', autoflush=True):
                    self.pre_execute()
            with self.monitor('execute', autoflush=True):
                result = self.execute()
            with self.monitor('post_execute', autoflush=True):
                self.post_execute(result)
            with self.monitor('export', autoflush=True):
                exported = self.export()
        finally:
            etype = sys.exc_info()[0]
            if etype:
                logging.critical('', exc_info=True)
            if clean_up:
                try:
                    self.clean_up()
                except:
                    logging.error('Cleanup error', exc_info=True)
            return exported

    def core_func(*args):
        """
        Core routine running on the workers.
        """
        raise NotImplementedError

    @abc.abstractmethod
    def pre_execute(self):
        """
        Initialization phase.
        """

    @abc.abstractmethod
    def execute(self):
        """
        Execution phase. Usually will run in parallel the core
        function and return a dictionary with the results.
        """

    @abc.abstractmethod
    def post_execute(self, result):
        """
        Post-processing phase of the aggregated output. It must be
        overridden with the export code. It will return a dictionary
        of output files.
        """

    def export(self, exports=None):
        """
        Export all the outputs in the datastore in the given export formats.

        :returns: dictionary output_key -> sorted list of exported paths
        """
        exported = {}
        individual_curves = self.oqparam.individual_curves
        fmts = exports.split(',') if exports else self.oqparam.exports
        for fmt in fmts:
            if not fmt:
                continue
            for key in self.datastore:
                if 'rlzs' in key and not individual_curves:
                    continue  # skip individual curves
                ekey = (key, fmt)
                try:
                    exported[ekey] = sorted(export.export(
                        ekey, self.datastore))
                    logging.info('exported %s: %s', key, exported[ekey])
                except KeyError:
                    logging.info('%s is not exportable in %s', key, fmt)
        return exported

    def clean_up(self):
        """
        Collect the realizations and the monitoring information,
        then close the datastore.
        """
        self.realizations = numpy.array(
            [(r.uid, r.weight) for r in self.rlzs_assoc.realizations], rlz_dt)
        performance = self.monitor.collect_performance()
        if performance is not None:
            self.performance = performance
        self.datastore.close()
        self.datastore.symlink(os.path.dirname(self.oqparam.inputs['job_ini']))
Example #25
0
class BaseCalculator(with_metaclass(abc.ABCMeta)):
    """
    Abstract base class for all calculators.

    :param oqparam: OqParam object
    :param monitor: monitor object
    :param calc_id: numeric calculation ID
    """
    sitemesh = datastore.persistent_attribute('sitemesh')
    sitecol = datastore.persistent_attribute('sitecol')
    etags = datastore.persistent_attribute('etags')
    assetcol = datastore.persistent_attribute('assetcol')
    cost_types = datastore.persistent_attribute('cost_types')
    job_info = datastore.persistent_attribute('job_info')
    performance = datastore.persistent_attribute('performance')
    csm = datastore.persistent_attribute('composite_source_model')
    pre_calculator = None  # to be overridden
    is_stochastic = False  # True for scenario and event based calculators

    @property
    def taxonomies(self):
        return self.datastore['assetcol/taxonomies'].value

    def __init__(self, oqparam, monitor=Monitor(), calc_id=None):
        self.monitor = monitor
        self.datastore = datastore.DataStore(calc_id)
        self.monitor.calc_id = self.datastore.calc_id
        self.monitor.hdf5path = self.datastore.hdf5path
        self.datastore.export_dir = oqparam.export_dir
        self.oqparam = oqparam

    def save_params(self, **kw):
        """
        Update the current calculation parameters and save engine_version
        """
        vars(self.oqparam).update(engine_version=__version__, **kw)
        self.datastore['oqparam'] = self.oqparam  # save the updated oqparam
        self.datastore.flush()

    def set_log_format(self):
        """Set the format of the root logger"""
        fmt = '[%(asctime)s #{} %(levelname)s] %(message)s'.format(
            self.datastore.calc_id)
        for handler in logging.root.handlers:
            handler.setFormatter(logging.Formatter(fmt))

    def run(self, pre_execute=True, concurrent_tasks=None, close=True, **kw):
        """
        Run the calculation and return the exported outputs.
        """
        self.close = close
        self.set_log_format()
        if logversion:  # make sure this is logged only once
            logging.info('Using engine version %s', __version__)
            logversion.pop()
        if (concurrent_tasks is not None
                and concurrent_tasks != OqParam.concurrent_tasks.default):
            self.oqparam.concurrent_tasks = concurrent_tasks
        self.save_params(**kw)
        exported = {}
        try:
            if pre_execute:
                self.pre_execute()
            result = self.execute()
            self.post_execute(result)
            exported = self.export(kw.get('exports', ''))
        except KeyboardInterrupt:
            pids = ' '.join(str(p.pid) for p in executor._processes)
            sys.stderr.write(
                'You can manually kill the workers with kill %s\n' % pids)
            raise
        except:
            if kw.get('pdb'):  # post-mortem debug
                tb = sys.exc_info()[2]
                traceback.print_exc(tb)
                pdb.post_mortem(tb)
            else:
                logging.critical('', exc_info=True)
                raise
        self.clean_up()
        return exported

    def core_task(*args):
        """
        Core routine running on the workers.
        """
        raise NotImplementedError

    @abc.abstractmethod
    def pre_execute(self):
        """
        Initialization phase.
        """

    @abc.abstractmethod
    def execute(self):
        """
        Execution phase. Usually will run in parallel the core
        function and return a dictionary with the results.
        """

    @abc.abstractmethod
    def post_execute(self, result):
        """
        Post-processing phase of the aggregated output. It must be
        overridden with the export code. It will return a dictionary
        of output files.
        """

    def export(self, exports=None):
        """
        Export all the outputs in the datastore in the given export formats.

        :returns: dictionary output_key -> sorted list of exported paths
        """
        # avoid circular imports
        from openquake.commonlib.export import export as exp
        exported = {}
        individual_curves = self.oqparam.individual_curves
        if exports and isinstance(exports, tuple):
            fmts = exports
        elif exports:  # is a string
            fmts = exports.split(',')
        else:  # use passed values
            fmts = self.oqparam.exports
        for fmt in fmts:
            if not fmt:
                continue
            keys = set(self.datastore)
            if (self.oqparam.uniform_hazard_spectra
                    and not self.oqparam.hazard_maps):
                # do not export the hazard maps, even if they are there
                keys.remove('hmaps')
            for key in sorted(keys):  # top level keys
                if 'rlzs' in key and not individual_curves:
                    continue  # skip individual curves
                ekey = (key, fmt)
                if ekey not in exp:  # non-exportable output
                    continue
                with self.monitor('export'):
                    exported[ekey] = exp(ekey, self.datastore)
                logging.info('exported %s: %s', key, exported[ekey])
            # special case for uhs which is a view
            if (self.oqparam.uniform_hazard_spectra
                    and 'hmaps' in self.datastore):
                ekey = ('uhs', fmt)
                exported[ekey] = exp(ekey, self.datastore)
                logging.info('exported %s: %s', key, exported[ekey])
        return exported

    def clean_up(self):
        """
        Collect the realizations and the monitoring information,
        then close the datastore.
        """
        if 'hcurves' in self.datastore:
            self.datastore.set_nbytes('hcurves')
        if 'hmaps' in self.datastore:
            self.datastore.set_nbytes('hmaps')
        self.datastore.flush()
        if self.close:  # in the engine we close later
            try:
                self.datastore.close()
            except RuntimeError:  # there could be a mysterious HDF5 error
                logging.warn('', exc_info=True)
Example #26
0
class EventBasedRiskCalculator(base.RiskCalculator):
    """
    Event based PSHA calculator generating the event loss table and
    fixed ratios loss curves.
    """
    pre_calculator = 'event_based_rupture'
    core_func = ebr

    epsilon_matrix = datastore.persistent_attribute('epsilon_matrix')
    is_stochastic = True

    def pre_execute(self):
        """
        Read the precomputed ruptures (or compute them on the fly) and
        prepare some datasets in the datastore.
        """
        super(EventBasedRiskCalculator, self).pre_execute()
        if not self.riskmodel:  # there is no riskmodel, exit early
            self.execute = lambda: None
            self.post_execute = lambda result: None
            return
        oq = self.oqparam
        epsilon_sampling = oq.epsilon_sampling
        correl_model = readinput.get_correl_model(oq)
        gsims_by_col = self.rlzs_assoc.get_gsims_by_col()
        assets_by_site = self.assets_by_site
        # the following is needed to set the asset idx attribute
        self.assetcol = riskinput.build_asset_collection(
            assets_by_site, oq.time_event)

        logging.info('Populating the risk inputs')
        rup_by_tag = sum(self.datastore['sescollection'], AccumDict())
        all_ruptures = [rup_by_tag[tag] for tag in sorted(rup_by_tag)]
        num_samples = min(len(all_ruptures), epsilon_sampling)
        eps_dict = riskinput.make_eps_dict(assets_by_site, num_samples,
                                           oq.master_seed,
                                           oq.asset_correlation)
        logging.info('Generated %d epsilons', num_samples * len(eps_dict))
        self.epsilon_matrix = numpy.array(
            [eps_dict[a['asset_ref']] for a in self.assetcol])
        self.riskinputs = list(
            self.riskmodel.build_inputs_from_ruptures(
                self.sitecol.complete, all_ruptures, gsims_by_col,
                oq.truncation_level, correl_model, eps_dict,
                oq.concurrent_tasks or 1))
        logging.info('Built %d risk inputs', len(self.riskinputs))

        # preparing empty datasets
        loss_types = self.riskmodel.loss_types
        self.L = len(loss_types)
        self.R = len(self.rlzs_assoc.realizations)
        self.outs = OUTPUTS
        self.datasets = {}
        self.monitor.oqparam = self.oqparam
        # ugly: attaching an attribute needed in the task function
        self.monitor.num_outputs = len(self.outs)
        # attaching two other attributes used in riskinput.gen_outputs
        self.monitor.assets_by_site = self.assets_by_site
        self.monitor.num_assets = N = self.count_assets()
        for o, out in enumerate(self.outs):
            self.datastore.hdf5.create_group(out)
            for l, loss_type in enumerate(loss_types):
                cb = self.riskmodel.curve_builders[l]
                build_curves = len(cb.ratios)
                for r, rlz in enumerate(self.rlzs_assoc.realizations):
                    key = '/%s/rlz-%03d' % (loss_type, rlz.ordinal)
                    if o in (ELT, ILT):  # loss tables
                        dset = self.datastore.create_dset(out + key, elt_dt)
                    else:  # risk curves
                        if not build_curves:
                            continue
                        dset = self.datastore.create_dset(
                            out + key, cb.poes_dt, N)
                    self.datasets[o, l, r] = dset
                if o in (FRC, IRC) and build_curves:
                    grp = self.datastore['%s/%s' % (out, loss_type)]
                    grp.attrs['loss_ratios'] = cb.ratios

    def execute(self):
        """
        Run the ebr calculator in parallel and aggregate the results
        """
        return apply_reduce(
            self.core_func.__func__,
            (self.riskinputs, self.riskmodel, self.rlzs_assoc, self.monitor),
            concurrent_tasks=self.oqparam.concurrent_tasks,
            agg=self.agg,
            acc=cube(self.monitor.num_outputs, self.L, self.R, list),
            weight=operator.attrgetter('weight'),
            key=operator.attrgetter('col_id'))

    def agg(self, acc, result):
        """
        Aggregate list of arrays in longer lists.

        :param acc: accumulator array of shape (O, L, R)
        :param result: a numpy array of shape (O, L, R)
        """
        for idx, arrays in numpy.ndenumerate(result):
            acc[idx].extend(arrays)
        return acc

    def post_execute(self, result):
        """
        Save the event loss table in the datastore.

        :param result:
            a numpy array of shape (O, L, R) containing lists of arrays
        """
        nses = self.oqparam.ses_per_logic_tree_path
        saved = {out: 0 for out in self.outs}
        N = len(self.assetcol)
        with self.monitor('saving loss table', autoflush=True,
                          measuremem=True):
            for (o, l, r), data in numpy.ndenumerate(result):
                if not data:  # empty list
                    continue
                if o in (ELT, ILT):  # loss tables, data is a list of arrays
                    losses = numpy.concatenate(data)
                    self.datasets[o, l, r].extend(losses)
                    saved[self.outs[o]] += losses.nbytes
                else:  # risk curves, data is a list of counts dictionaries
                    cb = self.riskmodel.curve_builders[l]
                    counts_matrix = cb.get_counts(N, data)
                    curves = cb.build_rcurves(counts_matrix, nses,
                                              self.assetcol)
                    self.datasets[o, l, r].dset[:] = curves
                    saved[self.outs[o]] += curves.nbytes
                self.datastore.hdf5.flush()

        for out in self.outs:
            nbytes = saved[out]
            if nbytes:
                self.datastore[out].attrs['nbytes'] = nbytes
                logging.info('Saved %s in %s', humansize(nbytes), out)
            else:  # remove empty outputs
                del self.datastore[out]
Example #27
0
class EventBasedRiskCalculator(base.RiskCalculator):
    """
    Event based PSHA calculator generating the ruptures only
    """
    pre_calculator = 'event_based_rupture'
    core_func = event_based_risk

    epsilon_matrix = datastore.persistent_attribute('epsilon_matrix')
    event_loss_asset = datastore.persistent_attribute('event_loss_asset')
    event_loss = datastore.persistent_attribute('event_loss')
    is_stochastic = True

    def riskinput_key(self, ri):
        """
        :param ri: riskinput object
        :returns: the SESCollection idx associated to it
        """
        return ri.col_id

    def pre_execute(self):
        """
        Read the precomputed ruptures (or compute them on the fly) and
        prepare some empty files in the export directory to store the gmfs
        (if any). If there were pre-existing files, they will be erased.
        """
        super(EventBasedRiskCalculator, self).pre_execute()

        oq = self.oqparam
        epsilon_sampling = getattr(oq, 'epsilon_sampling', 1000)

        correl_model = readinput.get_correl_model(oq)
        gsims_by_col = self.rlzs_assoc.get_gsims_by_col()
        assets_by_site = self.assets_by_site
        logging.info('Building the epsilons')

        logging.info('Populating the risk inputs')
        rup_by_tag = sum(self.datastore['sescollection'], AccumDict())
        all_ruptures = [rup_by_tag[tag] for tag in sorted(rup_by_tag)]
        num_samples = min(len(all_ruptures), epsilon_sampling)
        eps_dict = riskinput.make_eps_dict(
            assets_by_site, num_samples, oq.master_seed, oq.asset_correlation)
        logging.info('Generated %d epsilons', num_samples * len(eps_dict))
        self.epsilon_matrix = numpy.array(
            [eps_dict[a['asset_ref']] for a in self.assetcol])
        self.riskinputs = list(self.riskmodel.build_inputs_from_ruptures(
            self.sitecol.complete, all_ruptures, gsims_by_col,
            oq.truncation_level, correl_model, eps_dict,
            oq.concurrent_tasks or 1))
        logging.info('Built %d risk inputs', len(self.riskinputs))

    def zeros(self, shape, dtype):
        """
        Build a composite dtype from the given loss_types and dtype and
        return a zero array of the given shape.
        """
        loss_types = self.riskmodel.get_loss_types()
        dt = numpy.dtype([(lt, dtype) for lt in loss_types])
        return numpy.zeros(shape, dt)

    def post_execute(self, result):
        """
        Extract from the result dictionary
        rlz.ordinal -> (loss_type, tag) -> [(asset.id, loss), ...]
        several interesting outputs.
        """
        oq = self.oqparam
        # take the cached self.rlzs_assoc and write it on the datastore
        self.rlzs_assoc = self.rlzs_assoc
        rlzs = self.rlzs_assoc.realizations
        loss_types = self.riskmodel.get_loss_types()

        C = oq.loss_curve_resolution
        self.loss_curve_dt = numpy.dtype(
            [('losses', (float, C)), ('poes', (float, C)), ('avg', float)])

        if oq.conditional_loss_poes:
            lm_names = _loss_map_names(oq.conditional_loss_poes)
            self.loss_map_dt = numpy.dtype([(f, float) for f in lm_names])

        self.assets = assets = riskinput.sorted_assets(self.assets_by_site)

        self.specific_assets = specific_assets = [
            a for a in assets if a.id in self.oqparam.specific_assets]
        specific_asset_refs = set(self.oqparam.specific_assets)

        N = len(assets)

        event_loss_asset = [{} for rlz in rlzs]
        event_loss = [{} for rlz in rlzs]

        loss_curves = self.zeros(N, self.loss_curve_dt)
        ins_curves = self.zeros(N, self.loss_curve_dt)
        if oq.conditional_loss_poes:
            loss_maps = self.zeros(N, self.loss_map_dt)
        agg_loss_curve = self.zeros(1, self.loss_curve_dt)

        for i in sorted(result):
            rlz = rlzs[i]

            data_by_lt_tag = result[i]
            # (loss_type, asset_id) -> [(tag, loss, ins_loss), ...]
            elass = {(loss_type, asset.id): [] for asset in assets
                     for loss_type in loss_types}
            elagg = []  # aggregate event loss
            nonzero = total = 0
            for loss_type, tag in data_by_lt_tag:
                d = data_by_lt_tag[loss_type, tag]
                if tag == 'counts_matrix':
                    assets, counts = d.keys(), d.values()
                    indices = numpy.array([asset.idx for asset in assets])
                    asset_values = workflows.get_values(loss_type, assets)
                    poes = scientific.build_poes(
                        counts, oq.ses_per_logic_tree_path)
                    cb = scientific.CurveBuilder(
                        loss_type, numpy.linspace(0, 1, C))
                    lcurves = cb.build_loss_curves(
                        poes, asset_values, indices, N)
                    self.store('lcurves/' + loss_type, rlz, lcurves)
                    continue

                for aid, loss, ins_loss in d['data']:
                    elass[loss_type, aid].append((tag, loss, ins_loss))

                # aggregates
                elagg.append((loss_type, tag, d['loss'], d['ins_loss']))
                nonzero += d['nonzero']
                total += d['total']
            logging.info('rlz=%d: %d/%d nonzero losses', i, nonzero, total)

            if elass:
                data_by_lt = collections.defaultdict(list)
                for (loss_type, asset_id), rows in elass.items():
                    for tag, loss, ins_loss in rows:
                        data_by_lt[loss_type].append(
                            (tag, asset_id, loss, ins_loss))
                for loss_type, data in data_by_lt.items():
                    event_loss_asset[i][loss_type] = sorted(
                        # data contains rows (tag, asset, loss, ins_loss)
                        (t, a, l, i) for t, a, l, i in data
                        if a in specific_asset_refs)

                    # build the loss curves per asset
                    lc = self.build_loss_curves(elass, loss_type, 1)
                    loss_curves[loss_type] = lc

                    if oq.insured_losses:
                        # build the insured loss curves per asset
                        ic = self.build_loss_curves(elass, loss_type, 2)
                        ins_curves[loss_type] = ic

                    if oq.conditional_loss_poes:
                        # build the loss maps per asset, array of shape (N, P)
                        losses_poes = numpy.array(  # shape (N, 2, C)
                            [lc['losses'], lc['poes']]).transpose(1, 0, 2)
                        lmaps = scientific.loss_map_matrix(
                            oq.conditional_loss_poes, losses_poes)  # (P, N)
                        for lm, lmap in zip(lm_names, lmaps):
                            loss_maps[loss_type][lm] = lmap

            self.store('loss_curves', rlz, loss_curves)
            if oq.insured_losses:
                self.store('ins_curves', rlz, ins_curves)
            if oq.conditional_loss_poes:
                self.store('loss_maps', rlz, loss_maps)

            if elagg:
                for loss_type, rows in groupby(
                        elagg, operator.itemgetter(0)).items():
                    event_loss[i][loss_type] = [row[1:] for row in rows]
                    # aggregate loss curve for all tags
                    losses, poes, avg, _ = self.build_agg_loss_curve_and_map(
                        [loss for _lt, _tag, loss, _ins_loss in rows])
                    # NB: there is no aggregate insured loss curve
                    agg_loss_curve[loss_type][0] = (losses, poes, avg)
                    # NB: the aggregated loss_map is not stored
                self.store('agg_loss_curve', rlz, agg_loss_curve)

        if specific_assets:
            self.event_loss_asset = event_loss_asset
        self.event_loss = event_loss

        # store statistics (i.e. mean and quantiles) for curves and maps
        if len(self.rlzs_assoc.realizations) > 1:
            self.compute_store_stats('loss_curves')
            self.compute_store_stats('agg_loss_curve')

    def clean_up(self):
        """
        Final checks and cleanup
        """
        if (self.oqparam.ground_motion_fields and
                'gmf_by_trt_gsim' not in self.datastore):
            logging.warn(
                'Even if the flag `ground_motion_fields` was set the GMFs '
                'were not saved.\nYou should use the event_based hazard '
                'calculator to do that, not the risk one')
        super(EventBasedRiskCalculator, self).clean_up()

    def build_agg_loss_curve_and_map(self, losses):
        """
        Build a loss curve from a set of losses with length given by
        the parameter loss_curve_resolution.

        :param losses: a sequence of losses
        :returns: a quartet (losses, poes, avg, loss_map)
        """
        oq = self.oqparam
        clp = oq.conditional_loss_poes
        losses_poes = scientific.event_based(
            losses, tses=oq.tses, time_span=oq.risk_investigation_time or
            oq.investigation_time, curve_resolution=oq.loss_curve_resolution)
        loss_map = scientific.loss_map_matrix(
            clp, [losses_poes]).reshape(len(clp)) if clp else None
        return (losses_poes[0], losses_poes[1],
                scientific.average_loss(losses_poes), loss_map)

    def build_loss_curves(self, elass, loss_type, i):
        """
        Build loss curves per asset from a set of losses with length given by
        the parameter loss_curve_resolution.

        :param elass: a dict (loss_type, asset_id) -> (tag, loss, ins_loss)
        :param loss_type: the loss_type
        :param i: 1 for loss curves or 2 for insured losses
        :returns: an array of loss curves, one for each asset
        """
        oq = self.oqparam
        C = oq.loss_curve_resolution
        lcs = []
        for asset in self.assets:
            all_losses = [loss[i] for loss in elass[loss_type, asset.id]]
            if all_losses:
                losses, poes = scientific.event_based(
                    all_losses, tses=oq.tses,
                    time_span=oq.risk_investigation_time or
                    oq.investigation_time, curve_resolution=C)
                avg = scientific.average_loss((losses, poes))
            else:
                losses, poes = numpy.zeros(C), numpy.zeros(C)
                avg = 0
            lcs.append((losses, poes, avg))
        return numpy.array(lcs, self.loss_curve_dt)

    def store(self, name, dset, curves):
        """
        Store loss curves, maps and aggregates

        :param name: the name of the HDF5 file
        :param dset: the dataset where to store the curves
        :param curves: an array of curves to store
        """
        if hasattr(dset, 'uid'):
            dset = dset.uid
            kind = 'rlzs'
        else:
            kind = 'stats'
        self.datastore['%s-%s/%s' % (name, kind, dset)] = curves

    # ################### methods to compute statistics  #################### #

    def build_stats(self, loss_curve_key):
        """
        Compute all statistics for the specified assets starting from the
        stored loss curves. Yield a statistical output object for each
        loss type.
        """
        oq = self.oqparam
        rlzs = self.rlzs_assoc.realizations
        stats = scientific.StatsBuilder(
            oq.quantile_loss_curves, oq.conditional_loss_poes, [],
            scientific.normalize_curves_eb)
        # NB: should we encounter memory issues in the future, the easy
        # solution is to split the specific assets in blocks and perform
        # the computation one block at the time
        for loss_type in self.riskmodel.get_loss_types():
            outputs = []
            for rlz in rlzs:
                key = '%s-rlzs/%s' % (loss_curve_key, rlz.uid)
                lcs = self.datastore[key][loss_type]
                assets = [None] if key.startswith('agg') else self.assets
                losses_poes = numpy.array(  # -> shape (N, 2, C)
                    [lcs['losses'], lcs['poes']]).transpose(1, 0, 2)
                out = scientific.Output(
                    assets, loss_type, rlz.ordinal, rlz.weight,
                    loss_curves=losses_poes, insured_curves=None)
                outputs.append(out)
            yield stats.build(outputs)

    def compute_store_stats(self, loss_curve_key):
        """
        Compute and store the statistical outputs
        """
        oq = self.oqparam
        N = 1 if loss_curve_key.startswith('agg_') else len(self.assets)
        Q = 1 + len(oq.quantile_loss_curves)
        loss_curve_stats = self.zeros((Q, N), self.loss_curve_dt)
        ins_curve_stats = self.zeros((Q, N), self.loss_curve_dt)
        if oq.conditional_loss_poes:
            loss_map_stats = self.zeros((Q, N), self.loss_map_dt)

        for stat in self.build_stats(loss_curve_key):
            # there is one stat for each loss_type
            curves, ins_curves, maps = scientific.get_stat_curves(stat)
            loss_curve_stats[:][stat.loss_type] = curves
            if oq.insured_losses:
                ins_curve_stats[:][stat.loss_type] = ins_curves
            if oq.conditional_loss_poes:
                loss_map_stats[:][stat.loss_type] = maps

        for i, stats in enumerate(_mean_quantiles(oq.quantile_loss_curves)):
            self.store(loss_curve_key, stats, loss_curve_stats[i])
            if oq.insured_losses:
                self.store(loss_curve_key + '_ins', stats, ins_curve_stats[i])
            if oq.conditional_loss_poes:
                self.store(loss_curve_key + '_maps', stats, loss_map_stats[i])
Example #28
0
class BaseCalculator(with_metaclass(abc.ABCMeta)):
    """
    Abstract base class for all calculators.

    :param oqparam: OqParam object
    :param monitor: monitor object
    :param calc_id: numeric calculation ID
    """
    sitemesh = datastore.persistent_attribute('sitemesh')
    sitecol = datastore.persistent_attribute('sitecol')
    rlzs_assoc = datastore.persistent_attribute('rlzs_assoc')
    realizations = datastore.persistent_attribute('realizations')
    assets_by_site = datastore.persistent_attribute('assets_by_site')
    assetcol = datastore.persistent_attribute('assetcol')
    cost_types = datastore.persistent_attribute('cost_types')
    taxonomies = datastore.persistent_attribute('taxonomies')
    job_info = datastore.persistent_attribute('job_info')
    source_chunks = datastore.persistent_attribute('source_chunks')
    source_pre_info = datastore.persistent_attribute('source_pre_info')
    performance = datastore.persistent_attribute('performance')
    csm = datastore.persistent_attribute('composite_source_model')
    pre_calculator = None  # to be overridden
    is_stochastic = False  # True for scenario and event based calculators

    def __init__(self, oqparam, monitor=DummyMonitor(), calc_id=None,
                 persistent=True):
        self.monitor = monitor
        if persistent:
            self.datastore = datastore.DataStore(calc_id)
        else:
            self.datastore = general.AccumDict()
            self.datastore.hdf5 = {}
            self.datastore.attrs = {}
        self.datastore.export_dir = oqparam.export_dir
        self.oqparam = oqparam
        self.persistent = persistent

    def save_params(self, **kw):
        """
        Update the current calculation parameters
        """
        vars(self.oqparam).update(kw)
        for name, val in self.oqparam.to_params():
            self.datastore.attrs[name] = val
        self.datastore.attrs['oqlite_version'] = repr(__version__)
        self.datastore.hdf5.flush()

    def run(self, pre_execute=True, clean_up=True, concurrent_tasks=None,
            **kw):
        """
        Run the calculation and return the exported outputs.
        """
        if concurrent_tasks is not None:
            self.oqparam.concurrent_tasks = concurrent_tasks
        self.save_params(**kw)
        exported = {}
        try:
            if pre_execute:
                with self.monitor('pre_execute', autoflush=True):
                    self.pre_execute()
            with self.monitor('execute', autoflush=True):
                result = self.execute()
            with self.monitor('post_execute', autoflush=True):
                self.post_execute(result)
            with self.monitor('export', autoflush=True):
                exported = self.export()
        except:
            if kw.get('pdb'):  # post-mortem debug
                tb = sys.exc_info()[2]
                traceback.print_exc(tb)
                pdb.post_mortem(tb)
            else:
                logging.critical('', exc_info=True)
                raise
        # don't cleanup if there is a critical error, otherwise
        # there will likely be a cleanup error covering the real one
        if clean_up:
            self.clean_up()
        return exported

    def core_func(*args):
        """
        Core routine running on the workers.
        """
        raise NotImplementedError

    @abc.abstractmethod
    def pre_execute(self):
        """
        Initialization phase.
        """

    @abc.abstractmethod
    def execute(self):
        """
        Execution phase. Usually will run in parallel the core
        function and return a dictionary with the results.
        """

    @abc.abstractmethod
    def post_execute(self, result):
        """
        Post-processing phase of the aggregated output. It must be
        overridden with the export code. It will return a dictionary
        of output files.
        """

    def export(self, exports=None):
        """
        Export all the outputs in the datastore in the given export formats.

        :returns: dictionary output_key -> sorted list of exported paths
        """
        exported = {}
        individual_curves = self.oqparam.individual_curves
        fmts = exports.split(',') if exports else self.oqparam.exports
        for fmt in fmts:
            if not fmt:
                continue
            for key in self.datastore:  # top level keys
                if 'rlzs' in key and not individual_curves:
                    continue  # skip individual curves
                ekey = (key, fmt)
                if ekey not in export.export:  # non-exportable output
                    continue
                exported[ekey] = export.export(ekey, self.datastore)
                logging.info('exported %s: %s', key, exported[ekey])
        return exported

    def clean_up(self):
        """
        Collect the realizations and the monitoring information,
        then close the datastore.
        """
        if 'rlzs_assoc' in self.datastore:
            self.realizations = numpy.array(
                [(r.uid, r.weight) for r in self.rlzs_assoc.realizations],
                rlz_dt)
        performance = self.monitor.collect_performance()
        if performance is not None:
            self.performance = performance
Example #29
0
class BaseCalculator(with_metaclass(abc.ABCMeta)):
    """
    Abstract base class for all calculators.

    :param oqparam: OqParam object
    :param monitor: monitor object
    :param calc_id: numeric calculation ID
    """
    from_engine = False  # set by engine.run_calc
    sitecol = datastore.persistent_attribute('sitecol')
    assetcol = datastore.persistent_attribute('assetcol')
    performance = datastore.persistent_attribute('performance')
    pre_calculator = None  # to be overridden
    is_stochastic = False  # True for scenario and event based calculators

    @property
    def taxonomies(self):
        return self.datastore['assetcol/taxonomies'].value

    def __init__(self, oqparam, monitor=Monitor(), calc_id=None):
        self._monitor = monitor
        self.datastore = datastore.DataStore(calc_id)
        self.oqparam = oqparam

    def monitor(self, operation, **kw):
        """
        Return a new Monitor instance
        """
        mon = self._monitor(operation, hdf5path=self.datastore.hdf5path)
        self._monitor.calc_id = mon.calc_id = self.datastore.calc_id
        vars(mon).update(kw)
        return mon

    def save_params(self, **kw):
        """
        Update the current calculation parameters and save engine_version
        """
        vars(self.oqparam).update(**kw)
        self.datastore['oqparam'] = self.oqparam  # save the updated oqparam
        attrs = self.datastore['/'].attrs
        attrs['engine_version'] = engine_version
        self.datastore.flush()

    def set_log_format(self):
        """Set the format of the root logger"""
        fmt = '[%(asctime)s #{} %(levelname)s] %(message)s'.format(
            self.datastore.calc_id)
        for handler in logging.root.handlers:
            handler.setFormatter(logging.Formatter(fmt))

    def run(self, pre_execute=True, concurrent_tasks=None, close=True, **kw):
        """
        Run the calculation and return the exported outputs.
        """
        global logversion
        self.close = close
        self.set_log_format()
        if logversion:  # make sure this is logged only once
            logging.info('Running %s', self.oqparam.inputs['job_ini'])
            logging.info('Using engine version %s', engine_version)
            logversion = False
        if concurrent_tasks is None:  # use the job.ini parameter
            ct = self.oqparam.concurrent_tasks
        else:  # used the parameter passed in the command-line
            ct = concurrent_tasks
        if ct == 0:  # disable distribution temporarily
            oq_distribute = os.environ.get('OQ_DISTRIBUTE')
            os.environ['OQ_DISTRIBUTE'] = 'no'
        if ct != self.oqparam.concurrent_tasks:
            # save the used concurrent_tasks
            self.oqparam.concurrent_tasks = ct
        self.save_params(**kw)
        exported = {}
        try:
            if pre_execute:
                self.pre_execute()
            self.result = self.execute()
            if self.result is not None:
                self.post_execute(self.result)
            self.before_export()
            exported = self.export(kw.get('exports', ''))
        except KeyboardInterrupt:
            pids = ' '.join(str(p.pid) for p in executor._processes)
            sys.stderr.write(
                'You can manually kill the workers with kill %s\n' % pids)
            raise
        except:
            if kw.get('pdb'):  # post-mortem debug
                tb = sys.exc_info()[2]
                traceback.print_tb(tb)
                pdb.post_mortem(tb)
            else:
                logging.critical('', exc_info=True)
                raise
        finally:
            if ct == 0:  # restore OQ_DISTRIBUTE
                if oq_distribute is None:  # was not set
                    del os.environ['OQ_DISTRIBUTE']
                else:
                    os.environ['OQ_DISTRIBUTE'] = oq_distribute
        return exported

    def core_task(*args):
        """
        Core routine running on the workers.
        """
        raise NotImplementedError

    @abc.abstractmethod
    def pre_execute(self):
        """
        Initialization phase.
        """

    @abc.abstractmethod
    def execute(self):
        """
        Execution phase. Usually will run in parallel the core
        function and return a dictionary with the results.
        """

    @abc.abstractmethod
    def post_execute(self, result):
        """
        Post-processing phase of the aggregated output. It must be
        overridden with the export code. It will return a dictionary
        of output files.
        """

    def export(self, exports=None):
        """
        Export all the outputs in the datastore in the given export formats.
        Individual outputs are not exported if there are multiple realizations.

        :returns: dictionary output_key -> sorted list of exported paths
        """
        num_rlzs = len(self.datastore['realizations'])
        exported = {}
        if isinstance(exports, tuple):
            fmts = exports
        elif exports:  # is a string
            fmts = exports.split(',')
        elif isinstance(self.oqparam.exports, tuple):
            fmts = self.oqparam.exports
        else:  # is a string
            fmts = self.oqparam.exports.split(',')
        keys = set(self.datastore)
        has_hcurves = 'hcurves' in self.datastore or 'poes' in self.datastore
        if has_hcurves:
            keys.add('hcurves')
        for fmt in fmts:
            if not fmt:
                continue
            for key in sorted(keys):  # top level keys
                if 'rlzs' in key and num_rlzs > 1:
                    continue  # skip individual curves
                self._export((key, fmt), exported)
            if has_hcurves and self.oqparam.hazard_maps:
                self._export(('hmaps', fmt), exported)
            if has_hcurves and self.oqparam.uniform_hazard_spectra:
                self._export(('uhs', fmt), exported)

        if self.close:  # in the engine we close later
            self.result = None
            try:
                self.datastore.close()
            except (RuntimeError, ValueError):
                # sometimes produces errors but they are difficult to
                # reproduce
                logging.warn('', exc_info=True)
        return exported

    def _export(self, ekey, exported):
        if ekey in exp:
            with self.monitor('export'):
                exported[ekey] = exp(ekey, self.datastore)
                logging.info('exported %s: %s', ekey[0], exported[ekey])

    def before_export(self):
        """
        Collect the realizations and set the attributes nbytes
        """
        sm_by_rlz = self.datastore['csm_info'].get_sm_by_rlz(
            self.rlzs_assoc.realizations) or collections.defaultdict(
                lambda: 'NA')
        self.datastore['realizations'] = numpy.array(
            [(r.uid, sm_by_rlz[r], gsim_names(r), r.weight)
             for r in self.rlzs_assoc.realizations], rlz_dt)
        if 'hcurves' in set(self.datastore):
            self.datastore.set_nbytes('hcurves')
        self.datastore.flush()
Example #30
0
class ClassicalRiskCalculator(base.RiskCalculator):
    """
    Classical Risk calculator
    """
    pre_calculator = 'classical'
    avg_losses = datastore.persistent_attribute('avg_losses-rlzs')
    core_task = classical_risk

    def pre_execute(self):
        """
        Associate the assets to the sites and build the riskinputs.
        """
        if 'hazard_curves' in self.oqparam.inputs:  # read hazard from file
            haz_sitecol, haz_curves = readinput.get_hcurves(self.oqparam)
            self.save_params()
            self.read_exposure()  # define .assets_by_site
            self.load_riskmodel()
            self.assetcol = riskinput.AssetCollection(
                self.assets_by_site, self.cost_calculator,
                self.oqparam.time_event)
            self.sitecol, self.assets_by_site = self.assoc_assets_sites(
                haz_sitecol)
            curves_by_trt_gsim = {(0, 'FromFile'): haz_curves}
            self.datastore['csm_info'] = fake = source.CompositionInfo.fake()
            self.rlzs_assoc = fake.get_rlzs_assoc()
            self.save_mesh()
        else:  # compute hazard or read it from the datastore
            super(ClassicalRiskCalculator, self).pre_execute()
            logging.info('Preparing the risk input')
            curves_by_trt_gsim = {}
            for key in self.datastore['poes']:
                pmap = self.datastore['poes/' + key]
                trt_id = int(key)
                gsims = self.rlzs_assoc.gsims_by_trt_id[trt_id]
                for i, gsim in enumerate(gsims):
                    curves_by_trt_gsim[trt_id, gsim] = array_of_curves(
                        pmap, len(self.sitecol), self.oqparam.imtls, i)
        self.riskinputs = self.build_riskinputs(curves_by_trt_gsim)
        self.monitor.oqparam = self.oqparam

        self.N = sum(len(assets) for assets in self.assets_by_site)
        self.L = len(self.riskmodel.loss_types)
        self.R = len(self.rlzs_assoc.realizations)
        self.I = self.oqparam.insured_losses
        self.Q1 = len(self.oqparam.quantile_loss_curves) + 1

    def post_execute(self, result):
        """
        Save the losses in a compact form.
        """
        self.loss_curve_dt, self.loss_maps_dt = (
            self.riskmodel.build_loss_dtypes(
                self.oqparam.conditional_loss_poes, self.I))

        self.save_loss_curves(result)
        if self.oqparam.conditional_loss_poes:
            self.save_loss_maps(result)

    def save_loss_curves(self, result):
        """
        Saving loss curves in the datastore.

        :param result: aggregated result of the task classical_risk
        """
        ltypes = self.riskmodel.loss_types
        loss_curves = numpy.zeros((self.N, self.R), self.loss_curve_dt)
        for l, r, aid, lcurve in result['loss_curves']:
            loss_curves_lt = loss_curves[ltypes[l]]
            for i, name in enumerate(loss_curves_lt.dtype.names):
                if name.startswith('avg'):
                    loss_curves_lt[name][aid, r] = lcurve[i]
                else:
                    base.set_array(loss_curves_lt[name][aid, r], lcurve[i])
        self.datastore['loss_curves-rlzs'] = loss_curves

        # loss curves stats
        if self.R > 1:
            stat_curves = numpy.zeros((self.N, self.Q1), self.loss_curve_dt)
            for l, aid, statcurve in result['stat_curves']:
                stat_curves_lt = stat_curves[ltypes[l]]
                for name in stat_curves_lt.dtype.names:
                    for s in range(self.Q1):
                        if name.startswith('avg'):
                            stat_curves_lt[name][aid, s] = statcurve[name][s]
                        else:
                            base.set_array(stat_curves_lt[name][aid, s],
                                           statcurve[name][s])
            self.datastore['loss_curves-stats'] = stat_curves

    def save_loss_maps(self, result):
        """
        Saving loss maps in the datastore.

        :param result: aggregated result of the task classical_risk
        """
        ltypes = self.riskmodel.loss_types
        loss_maps = numpy.zeros((self.N, self.R), self.loss_maps_dt)
        for l, r, aid, lmaps in result['loss_maps']:
            loss_maps_lt = loss_maps[ltypes[l]]
            for i, name in enumerate(loss_maps_lt.dtype.names):
                loss_maps_lt[name][aid, r] = lmaps[i]
        self.datastore['loss_maps-rlzs'] = loss_maps

        # loss maps stats
        if self.R > 1:
            stat_maps = numpy.zeros((self.N, self.Q1), self.loss_maps_dt)
            for l, aid, statmaps in result['stat_maps']:
                statmaps_lt = stat_maps[ltypes[l]]
                for name in statmaps_lt.dtype.names:
                    for s in range(self.Q1):
                        statmaps_lt[name][aid, s] = statmaps[name][s]
            self.datastore['loss_maps-stats'] = stat_maps