def test_trivial(self):
     # using the heaviside function, i.e. `amplify_one` has contributions
     # only for soil_intensity < a * mid_intensity with a=1
     # in this case the minimimum mid_intensity is 0.0015 which is
     # smaller than the minimum soil intensity 0.0020, so some contribution
     # is lost and this is the reason why the first poe in 0.985
     # instead of 0.989
     fname = gettemp(trivial_ampl_func)
     df = read_csv(fname, {
         'ampcode': ampcode_dt,
         None: numpy.float64
     },
                   index='ampcode')
     a = Amplifier(self.imtls, df, self.soil_levels)
     a.check(self.vs30, 0)
     numpy.testing.assert_allclose(a.midlevels, [
         0.0015, 0.0035, 0.0075, 0.015, 0.035, 0.075, 0.15, 0.35, 0.75, 1.1
     ])
     poes = a.amplify_one(b'A', 'SA(0.1)', self.hcurve[1]).flatten()
     numpy.testing.assert_allclose(
         poes, [0.985, 0.98, 0.97, 0.94, 0.89, 0.79, 0.69], atol=1E-6)
     poes = a.amplify_one(b'A', 'SA(0.2)', self.hcurve[2]).flatten()
     numpy.testing.assert_allclose(
         poes, [0.985, 0.98, 0.97, 0.94, 0.89, 0.79, 0.69], atol=1E-6)
     poes = a.amplify_one(b'A', 'SA(0.5)', self.hcurve[3]).flatten()
     numpy.testing.assert_allclose(
         poes, [0.985, 0.98, 0.97, 0.94, 0.89, 0.79, 0.69], atol=1E-6)
Esempio n. 2
0
    def test_simple(self):
        fname = gettemp(simple_ampl_func)
        aw = read_csv(fname, {'ampcode': ampcode_dt, None: numpy.float64})
        a = Amplifier(self.imtls, aw, self.soil_levels)
        a.check(self.vs30, vs30_tolerance=1)
        poes = a.amplify_one(b'A', 'SA(0.1)', self.hcurve[1]).flatten()
        numpy.testing.assert_allclose(
            poes,
            [0.985002, 0.979997, 0.970004, 0.940069, 0.889961, 0.79, 0.690037],
            atol=1E-6)

        poes = a.amplify_one(b'A', 'SA(0.2)', self.hcurve[2]).flatten()
        numpy.testing.assert_allclose(
            poes,
            [0.985002, 0.979997, 0.970004, 0.940069, 0.889961, 0.79, 0.690037],
            atol=1E-6)

        poes = a.amplify_one(b'A', 'SA(0.5)', self.hcurve[3]).flatten()
        numpy.testing.assert_allclose(
            poes,
            [0.985002, 0.979996, 0.969991, 0.940012, 0.889958, 0.79, 0.690037],
            atol=1E-6)

        # amplify GMFs with sigmas
        numpy.random.seed(42)
        gmvs = a._amplify_gmvs(b'A', numpy.array([.005, .010, .015]), 'PGA')
        numpy.testing.assert_allclose(gmvs, [0.005307, 0.010093, 0.016804],
                                      atol=1E-5)
Esempio n. 3
0
 def calc_stats(self):
     oq = self.oqparam
     hstats = oq.hazard_stats()
     # initialize datasets
     N = len(self.sitecol.complete)
     P = len(oq.poes)
     M = len(oq.imtls)
     if oq.soil_intensities is not None:
         L = M * len(oq.soil_intensities)
     else:
         L = len(oq.imtls.array)
     R = len(self.rlzs_assoc.realizations)
     S = len(hstats)
     if R > 1 and oq.individual_curves or not hstats:
         self.datastore.create_dset('hcurves-rlzs', F32, (N, R, L))
         if oq.poes:
             self.datastore.create_dset('hmaps-rlzs', F32, (N, R, M, P))
     if hstats:
         self.datastore.create_dset('hcurves-stats', F32, (N, S, L))
         if oq.poes:
             self.datastore.create_dset('hmaps-stats', F32, (N, S, M, P))
     ct = oq.concurrent_tasks
     logging.info('Building hazard statistics with %d concurrent_tasks', ct)
     weights = [rlz.weight for rlz in self.rlzs_assoc.realizations]
     if 'amplification' in oq.inputs:
         amplifier = Amplifier(oq.imtls, self.datastore['amplification'],
                               oq.soil_intensities)
         amplifier.check(self.sitecol.vs30, oq.vs30_tolerance)
     else:
         amplifier = None
     allargs = [  # this list is very fast to generate
         (getters.PmapGetter(self.datastore, weights, t.sids, oq.poes), N,
          hstats, oq.individual_curves, oq.max_sites_disagg, amplifier)
         for t in self.sitecol.split_in_tiles(ct)
     ]
     self.datastore.swmr_on()
     parallel.Starmap(build_hazard, allargs,
                      h5=self.datastore.hdf5).reduce(self.save_hazard)
    def test_simple(self):
        #
        # MP: checked using hand calculations some values of the poes computed
        # considering uncertainty
        #
        fname = gettemp(simple_ampl_func)
        df = read_csv(fname, {
            'ampcode': ampcode_dt,
            None: numpy.float64
        },
                      index='ampcode')
        a = Amplifier(self.imtls, df, self.soil_levels)
        a.check(self.vs30, vs30_tolerance=1)
        poes = a.amplify_one(b'A', 'SA(0.1)', self.hcurve[1]).flatten()
        numpy.testing.assert_allclose(poes, [
            0.985008, 0.980001, 0.970019, 0.94006, 0.890007, 0.790198, 0.690201
        ],
                                      atol=1E-6)

        poes = a.amplify_one(b'A', 'SA(0.2)', self.hcurve[2]).flatten()
        numpy.testing.assert_allclose(poes, [
            0.985008, 0.980001, 0.970019, 0.94006, 0.890007, 0.790198, 0.690201
        ],
                                      atol=1E-6)

        poes = a.amplify_one(b'A', 'SA(0.5)', self.hcurve[3]).flatten()
        numpy.testing.assert_allclose(poes, [
            0.985109, 0.980022, 0.970272, 0.940816, 0.890224, 0.792719,
            0.692719
        ],
                                      atol=1E-6)

        # Amplify GMFs with sigmas
        numpy.random.seed(42)
        gmvs = a._amplify_gmvs(b'A', numpy.array([.005, .010, .015]), 'PGA')
        numpy.testing.assert_allclose(gmvs, [0.005401, 0.010356, 0.016704],
                                      atol=1E-5)
Esempio n. 5
0
 def test_simple(self):
     fname = gettemp(simple_ampl_func)
     aw = read_csv(fname, {
         'ampcode': 'S2',
         'level': numpy.uint8,
         None: numpy.float64
     })
     a = Amplifier(self.imtls, aw, self.soil_levels)
     a.check(self.vs30, 1)
     poes = a.amplify_one(b'A', 'SA(0.1)', self.hcurve[1]).flatten()
     numpy.testing.assert_allclose(
         poes,
         [0.985002, 0.979997, 0.970004, 0.940069, 0.889961, 0.79, 0.690037],
         atol=1E-6)
     poes = a.amplify_one(b'A', 'SA(0.2)', self.hcurve[2]).flatten()
     numpy.testing.assert_allclose(
         poes,
         [0.985002, 0.979997, 0.970004, 0.940069, 0.889961, 0.79, 0.690037],
         atol=1E-6)
     poes = a.amplify_one(b'A', 'SA(0.5)', self.hcurve[3]).flatten()
     numpy.testing.assert_allclose(
         poes,
         [0.985002, 0.979996, 0.969991, 0.940012, 0.889958, 0.79, 0.690037],
         atol=1E-6)
Esempio n. 6
0
class HazardCalculator(BaseCalculator):
    """
    Base class for hazard calculators based on source models
    """
    def src_filter(self, filename=None):
        """
        :returns: a SourceFilter/UcerfFilter
        """
        oq = self.oqparam
        if getattr(self, 'sitecol', None):
            sitecol = self.sitecol.complete
        else:  # can happen to the ruptures-only calculator
            sitecol = None
            filename = None
        if 'ucerf' in oq.calculation_mode:
            return UcerfFilter(sitecol, oq.maximum_distance, filename)
        return SourceFilter(sitecol, oq.maximum_distance, filename)

    @property
    def E(self):
        """
        :returns: the number of stored events
        """
        try:
            return len(self.datastore['events'])
        except KeyError:
            return 0

    @property
    def N(self):
        """
        :returns: the total number of sites
        """
        return len(self.sitecol.complete) if self.sitecol else None

    def check_overflow(self):
        """Overridden in event based"""

    def check_floating_spinning(self):
        f, s = self.csm.get_floating_spinning_factors()
        if f != 1:
            logging.info('Rupture floating factor = %s', f)
        if s != 1:
            logging.info('Rupture spinning factor = %s', s)

    def read_inputs(self):
        """
        Read risk data and sources if any
        """
        oq = self.oqparam
        self._read_risk_data()
        self.check_overflow()  # check if self.sitecol is too large
        if getattr(self, 'sitecol', None):
            # can be None for the ruptures-only calculator
            with hdf5.File(self.datastore.tempname, 'w') as tmp:
                tmp['sitecol'] = self.sitecol
        if ('source_model_logic_tree' in oq.inputs and
                oq.hazard_calculation_id is None):
            with self.monitor('composite source model', measuremem=True):
                self.csm = csm = readinput.get_composite_source_model(
                    oq, self.datastore.hdf5)
                ns = len(csm.get_sources())
                if oq.disagg_by_src and ns > 1000:
                    j = oq.inputs['job_ini']
                    raise InvalidFile(
                        '%s: disagg_by_src can be set only if there are <=1000'
                        ' sources, but %d were found in the model' % (j, ns))
                self.csm_info = csm.info
                self.datastore['source_model_lt'] = csm.source_model_lt
                res = views.view('dupl_sources', self.datastore)
                logging.info(f'The composite source model has {res.val:_d} '
                             'ruptures')
            if res:
                logging.info(res)
        self.init()  # do this at the end of pre-execute

        if not oq.hazard_calculation_id:
            self.gzip_inputs()

    def save_multi_peril(self):
        """Defined in MultiRiskCalculator"""

    def pre_execute(self):
        """
        Check if there is a previous calculation ID.
        If yes, read the inputs by retrieving the previous calculation;
        if not, read the inputs directly.
        """
        oq = self.oqparam
        if 'gmfs' in oq.inputs or 'multi_peril' in oq.inputs:
            # read hazard from files
            assert not oq.hazard_calculation_id, (
                'You cannot use --hc together with gmfs_file')
            self.read_inputs()
            if 'gmfs' in oq.inputs:
                if not oq.inputs['gmfs'].endswith('.csv'):
                    raise NotImplementedError(
                        'Importer for %s' % oq.inputs['gmfs'])
                E = len(import_gmfs(self.datastore, oq.inputs['gmfs'],
                                    self.sitecol.complete.sids))
                if hasattr(oq, 'number_of_ground_motion_fields'):
                    if oq.number_of_ground_motion_fields != E:
                        raise RuntimeError(
                            'Expected %d ground motion fields, found %d' %
                            (oq.number_of_ground_motion_fields, E))
                else:  # set the number of GMFs from the file
                    oq.number_of_ground_motion_fields = E
            else:
                self.save_multi_peril()
            self.save_crmodel()
        elif 'hazard_curves' in oq.inputs:  # read hazard from file
            assert not oq.hazard_calculation_id, (
                'You cannot use --hc together with hazard_curves')
            haz_sitecol = readinput.get_site_collection(oq)
            self.load_crmodel()  # must be after get_site_collection
            self.read_exposure(haz_sitecol)  # define .assets_by_site
            self.datastore['poes/grp-00'] = fix_ones(readinput.pmap)
            self.datastore['sitecol'] = self.sitecol
            self.datastore['assetcol'] = self.assetcol
            self.datastore['csm_info'] = fake = source.CompositionInfo.fake()
            self.rlzs_assoc = fake.get_rlzs_assoc()
            self.datastore['rlzs_by_grp'] = self.rlzs_assoc.by_grp()
            self.save_crmodel()
        elif oq.hazard_calculation_id:
            parent = util.read(oq.hazard_calculation_id)
            self.check_precalc(parent['oqparam'].calculation_mode)
            self.datastore.parent = parent
            # copy missing parameters from the parent
            if 'concurrent_tasks' not in vars(self.oqparam):
                self.oqparam.concurrent_tasks = (
                    self.oqparam.__class__.concurrent_tasks.default)
            params = {name: value for name, value in
                      vars(parent['oqparam']).items()
                      if name not in vars(self.oqparam)}
            self.save_params(**params)
            self.read_inputs()
            oqp = parent['oqparam']
            if oqp.investigation_time != oq.investigation_time:
                raise ValueError(
                    'The parent calculation was using investigation_time=%s'
                    ' != %s' % (oqp.investigation_time, oq.investigation_time))
            if not consistent(oqp.minimum_intensity, oq.minimum_intensity):
                raise ValueError(
                    'The parent calculation was using minimum_intensity=%s'
                    ' != %s' % (oqp.minimum_intensity, oq.minimum_intensity))
            hstats, rstats = list(oqp.hazard_stats()), list(oq.hazard_stats())
            if hstats != rstats:
                raise ValueError(
                    'The parent calculation had stats %s != %s' %
                    (hstats, rstats))
            missing_imts = set(oq.risk_imtls) - set(oqp.imtls)
            if missing_imts:
                raise ValueError(
                    'The parent calculation is missing the IMT(s) %s' %
                    ', '.join(missing_imts))
            self.save_crmodel()
        elif self.__class__.precalc:
            calc = calculators[self.__class__.precalc](
                self.oqparam, self.datastore.calc_id)
            calc.run(remove=False)
            for name in ('csm param sitecol assetcol crmodel rlzs_assoc '
                         'policy_name policy_dict csm_info').split():
                if hasattr(calc, name):
                    setattr(self, name, getattr(calc, name))
        else:
            self.read_inputs()
            self.save_crmodel()

    def init(self):
        """
        To be overridden to initialize the datasets needed by the calculation
        """
        oq = self.oqparam
        if not oq.risk_imtls:
            if self.datastore.parent:
                oq.risk_imtls = (
                    self.datastore.parent['oqparam'].risk_imtls)
        if 'precalc' in vars(self):
            self.rlzs_assoc = self.precalc.rlzs_assoc
        elif 'csm_info' in self.datastore:
            csm_info = self.datastore['csm_info']
            if oq.hazard_calculation_id and 'gsim_logic_tree' in oq.inputs:
                # redefine the realizations by reading the weights from the
                # gsim_logic_tree_file that could be different from the parent
                csm_info.gsim_lt = logictree.GsimLogicTree(
                    oq.inputs['gsim_logic_tree'], set(csm_info.trts))
            self.rlzs_assoc = csm_info.get_rlzs_assoc()
        elif hasattr(self, 'csm'):
            self.check_floating_spinning()
            self.rlzs_assoc = self.csm.info.get_rlzs_assoc()
        else:  # build a fake; used by risk-from-file calculators
            self.datastore['csm_info'] = fake = source.CompositionInfo.fake()
            self.rlzs_assoc = fake.get_rlzs_assoc()

    @general.cached_property
    def R(self):
        """
        :returns: the number of realizations
        """
        try:
            return self.csm.info.get_num_rlzs()
        except AttributeError:  # no self.csm
            return self.datastore['csm_info'].get_num_rlzs()

    def read_exposure(self, haz_sitecol):  # after load_risk_model
        """
        Read the exposure, the risk models and update the attributes
        .sitecol, .assetcol
        """
        oq = self.oqparam
        with self.monitor('reading exposure'):
            self.sitecol, self.assetcol, discarded = (
                readinput.get_sitecol_assetcol(
                    oq, haz_sitecol, self.crmodel.loss_types))
            if len(discarded):
                self.datastore['discarded'] = discarded
                if hasattr(self, 'rup'):
                    # this is normal for the case of scenario from rupture
                    logging.info('%d assets were discarded because too far '
                                 'from the rupture; use `oq show discarded` '
                                 'to show them and `oq plot_assets` to plot '
                                 'them' % len(discarded))
                elif not oq.discard_assets:  # raise an error
                    self.datastore['sitecol'] = self.sitecol
                    self.datastore['assetcol'] = self.assetcol
                    raise RuntimeError(
                        '%d assets were discarded; use `oq show discarded` to'
                        ' show them and `oq plot_assets` to plot them' %
                        len(discarded))
        self.policy_name = ''
        self.policy_dict = {}
        if oq.inputs.get('insurance'):
            k, v = zip(*oq.inputs['insurance'].items())
            self.load_insurance_data(k, v)
        return readinput.exposure

    def load_insurance_data(self, ins_types, ins_files):
        """
        Read the insurance files and populate the policy_dict
        """
        for loss_type, fname in zip(ins_types, ins_files):
            array = hdf5.read_csv(
                fname, {'insurance_limit': float, 'deductible': float,
                        None: object}).array
            policy_name = array.dtype.names[0]
            policy_idx = getattr(self.assetcol.tagcol, policy_name + '_idx')
            insurance = numpy.zeros((len(policy_idx), 2))
            for pol, ded, lim in array[
                    [policy_name, 'deductible', 'insurance_limit']]:
                insurance[policy_idx[pol]] = ded, lim
            self.policy_dict[loss_type] = insurance
            if self.policy_name and policy_name != self.policy_name:
                raise ValueError(
                    'The file %s contains %s as policy field, but we were '
                    'expecting %s' % (fname, policy_name, self.policy_name))
            else:
                self.policy_name = policy_name

    def load_crmodel(self):
        # to be called before read_exposure
        # NB: this is called even if there is no risk model
        """
        Read the risk models and set the attribute .crmodel.
        The crmodel can be empty for hazard calculations.
        Save the loss ratios (if any) in the datastore.
        """
        logging.info('Reading the risk model if present')
        self.crmodel = readinput.get_crmodel(self.oqparam)
        if not self.crmodel:
            parent = self.datastore.parent
            if 'risk_model' in parent:
                self.crmodel = riskmodels.CompositeRiskModel.read(parent)
            return
        if self.oqparam.ground_motion_fields and not self.oqparam.imtls:
            raise InvalidFile('No intensity_measure_types specified in %s' %
                              self.oqparam.inputs['job_ini'])
        self.save_params()  # re-save oqparam

    def save_crmodel(self):
        """
        Save the risk models in the datastore
        """
        if len(self.crmodel):
            self.datastore['risk_model'] = rm = self.crmodel
            attrs = self.datastore.getitem('risk_model').attrs
            attrs['min_iml'] = hdf5.array_of_vstr(sorted(rm.min_iml.items()))

    def _read_risk_data(self):
        # read the exposure (if any), the risk model (if any) and then the
        # site collection, possibly extracted from the exposure.
        oq = self.oqparam
        self.load_crmodel()  # must be called first

        if oq.hazard_calculation_id:
            with util.read(oq.hazard_calculation_id) as dstore:
                haz_sitecol = dstore['sitecol'].complete
                if ('amplification' in oq.inputs and
                        'ampcode' not in haz_sitecol.array.dtype.names):
                    haz_sitecol.add_col('ampcode', site.ampcode_dt)
        else:
            haz_sitecol = readinput.get_site_collection(oq)
            if hasattr(self, 'rup'):
                # for scenario we reduce the site collection to the sites
                # within the maximum distance from the rupture
                haz_sitecol, _dctx = self.cmaker.filter(
                    haz_sitecol, self.rup)
                haz_sitecol.make_complete()

            if 'site_model' in oq.inputs:
                self.datastore['site_model'] = readinput.get_site_model(oq)

        oq_hazard = (self.datastore.parent['oqparam']
                     if self.datastore.parent else None)
        if 'exposure' in oq.inputs:
            exposure = self.read_exposure(haz_sitecol)
            self.datastore['assetcol'] = self.assetcol
            self.datastore['cost_calculator'] = exposure.cost_calculator
            if hasattr(readinput.exposure, 'exposures'):
                self.datastore['assetcol/exposures'] = (
                    numpy.array(exposure.exposures, hdf5.vstr))
        elif 'assetcol' in self.datastore.parent:
            assetcol = self.datastore.parent['assetcol']
            if oq.region:
                region = wkt.loads(oq.region)
                self.sitecol = haz_sitecol.within(region)
            if oq.shakemap_id or 'shakemap' in oq.inputs:
                self.sitecol, self.assetcol = self.read_shakemap(
                    haz_sitecol, assetcol)
                self.datastore['assetcol'] = self.assetcol
                logging.info('Extracted %d/%d assets',
                             len(self.assetcol), len(assetcol))
                nsites = len(self.sitecol)
                if (oq.spatial_correlation != 'no' and
                        nsites > MAXSITES):  # hard-coded, heuristic
                    raise ValueError(CORRELATION_MATRIX_TOO_LARGE % nsites)
            elif hasattr(self, 'sitecol') and general.not_equal(
                    self.sitecol.sids, haz_sitecol.sids):
                self.assetcol = assetcol.reduce(self.sitecol)
                self.datastore['assetcol'] = self.assetcol
                logging.info('Extracted %d/%d assets',
                             len(self.assetcol), len(assetcol))
            else:
                self.assetcol = assetcol
        else:  # no exposure
            self.sitecol = haz_sitecol
            if self.sitecol:
                logging.info('Read N=%d hazard sites and L=%d hazard levels',
                             len(self.sitecol), len(oq.imtls.array))

        if oq_hazard:
            parent = self.datastore.parent
            if 'assetcol' in parent:
                check_time_event(oq, parent['assetcol'].occupancy_periods)
            elif oq.job_type == 'risk' and 'exposure' not in oq.inputs:
                raise ValueError('Missing exposure both in hazard and risk!')
            if oq_hazard.time_event and oq_hazard.time_event != oq.time_event:
                raise ValueError(
                    'The risk configuration file has time_event=%s but the '
                    'hazard was computed with time_event=%s' % (
                        oq.time_event, oq_hazard.time_event))

        if oq.job_type == 'risk':
            tmap_arr, tmap_lst = logictree.taxonomy_mapping(
                self.oqparam.inputs.get('taxonomy_mapping'),
                self.assetcol.tagcol.taxonomy)
            self.crmodel.tmap = tmap_lst
            if len(tmap_arr):
                self.datastore['taxonomy_mapping'] = tmap_arr
            taxonomies = set(taxo for items in self.crmodel.tmap
                             for taxo, weight in items if taxo != '?')
            # check that we are covering all the taxonomies in the exposure
            missing = taxonomies - set(self.crmodel.taxonomies)
            if self.crmodel and missing:
                raise RuntimeError('The exposure contains the taxonomies %s '
                                   'which are not in the risk model' % missing)
            if len(self.crmodel.taxonomies) > len(taxonomies):
                logging.info('Reducing risk model from %d to %d taxonomies',
                             len(self.crmodel.taxonomies), len(taxonomies))
                self.crmodel = self.crmodel.reduce(taxonomies)
                self.crmodel.tmap = tmap_lst
            self.crmodel.vectorize_cons_model(self.assetcol.tagcol)

        if hasattr(self, 'sitecol') and self.sitecol:
            if 'site_model' in oq.inputs:
                assoc_dist = (oq.region_grid_spacing * 1.414
                              if oq.region_grid_spacing else 5)  # Graeme's 5km
                sm = readinput.get_site_model(oq)
                self.sitecol.complete.assoc(sm, assoc_dist)
            self.datastore['sitecol'] = self.sitecol.complete

        # store amplification functions if any
        if 'amplification' in oq.inputs:
            logging.info('Reading %s', oq.inputs['amplification'])
            self.datastore['amplification'] = readinput.get_amplification(oq)
            check_amplification(self.datastore)
            self.amplifier = Amplifier(
                oq.imtls, self.datastore['amplification'], oq.soil_intensities)
            self.amplifier.check(self.sitecol.vs30, oq.vs30_tolerance)
        else:
            self.amplifier = None

        # used in the risk calculators
        self.param = dict(individual_curves=oq.individual_curves,
                          avg_losses=oq.avg_losses, amplifier=self.amplifier)

        # compute exposure stats
        if hasattr(self, 'assetcol'):
            save_exposed_values(
                self.datastore, self.assetcol, oq.loss_names, oq.aggregate_by)

    def store_rlz_info(self, eff_ruptures=None):
        """
        Save info about the composite source model inside the csm_info dataset
        """
        if hasattr(self, 'csm_info'):  # no scenario
            self.csm_info.update_eff_ruptures(eff_ruptures)
            self.rlzs_assoc = self.csm_info.get_rlzs_assoc(
                self.oqparam.sm_lt_path)
            if not self.rlzs_assoc.realizations:
                raise RuntimeError('Empty logic tree: too much filtering?')

            # sanity check that eff_ruptures have been set, i.e. are not -1
            for sm in self.csm_info.source_models:
                for sg in sm.src_groups:
                    assert sg.eff_ruptures != -1, sg
            self.datastore['csm_info'] = self.csm_info

        R = len(self.rlzs_assoc.realizations)
        logging.info('There are %d realization(s)', R)
        rlzs_by_grp = self.rlzs_assoc.by_grp()

        if self.oqparam.imtls:
            self.datastore['weights'] = arr = build_weights(
                self.rlzs_assoc.realizations, self.oqparam.imt_dt())
            self.datastore.set_attrs('weights', nbytes=arr.nbytes)

        if ('event_based' in self.oqparam.calculation_mode and R >= TWO16
                or R >= TWO32):
            # rlzi is 16 bit integer in the GMFs and 32 bit in rlzs_by_grp
            raise ValueError(
                'The logic tree has too many realizations (%d), use sampling '
                'instead' % R)
        elif R > 10000:
            logging.warning(
                'The logic tree has %d realizations(!), please consider '
                'sampling it', R)

        # save vlen-arrays of rlz indices, one per group
        if rlzs_by_grp:
            self.datastore['rlzs_by_grp'] = rlzs_by_grp

    def store_source_info(self, calc_times):
        """
        Save (weight, num_sites, calc_time) inside the source_info dataset
        """
        if calc_times:
            source_info = self.datastore['source_info']
            arr = numpy.zeros((len(source_info), 3), F32)
            # NB: the zip magic is needed for performance,
            # looping would be too slow
            ids, vals = zip(*calc_times.items())
            arr[numpy.array(ids)] = vals
            source_info['eff_ruptures'] += arr[:, 0]
            source_info['num_sites'] += arr[:, 1]
            source_info['calc_time'] += arr[:, 2]

    def post_process(self):
        """For compatibility with the engine"""
Esempio n. 7
0
class HazardCalculator(BaseCalculator):
    """
    Base class for hazard calculators based on source models
    """
    def src_filter(self, filename=None):
        """
        :returns: a SourceFilter/UcerfFilter
        """
        oq = self.oqparam
        if getattr(self, 'sitecol', None):
            sitecol = self.sitecol.complete
        else:  # can happen to the ruptures-only calculator
            sitecol = None
            filename = None
        if oq.is_ucerf():
            return UcerfFilter(sitecol, oq.maximum_distance, filename)
        return SourceFilter(sitecol, oq.maximum_distance, filename)

    @property
    def E(self):
        """
        :returns: the number of stored events
        """
        try:
            return len(self.datastore['events'])
        except KeyError:
            return 0

    @property
    def N(self):
        """
        :returns: the total number of sites
        """
        return len(self.sitecol.complete) if self.sitecol else None

    @property
    def few_sites(self):
        """
        :returns: True if there are less than max_sites_disagg
        """
        return len(self.sitecol.complete) <= self.oqparam.max_sites_disagg

    def check_overflow(self):
        """Overridden in event based"""

    def check_floating_spinning(self):
        f, s = self.csm.get_floating_spinning_factors()
        if f != 1:
            logging.info('Rupture floating factor = %s', f)
        if s != 1:
            logging.info('Rupture spinning factor = %s', s)
        if (f * s >= 1.5 and self.oqparam.pointsource_distance is None
                and 'classical' in self.oqparam.calculation_mode):
            logging.info(
                'You are not using the pointsource_distance approximation:\n'
                'https://docs.openquake.org/oq-engine/advanced/common-mistakes.html#pointsource-distance'
            )

    def read_inputs(self):
        """
        Read risk data and sources if any
        """
        oq = self.oqparam
        self._read_risk_data()
        self.check_overflow()  # check if self.sitecol is too large

        if ('amplification' in oq.inputs
                and oq.amplification_method == 'kernel'):
            logging.info('Reading %s', oq.inputs['amplification'])
            df = readinput.get_amplification(oq)
            check_amplification(df, self.sitecol)
            self.af = AmplFunction.from_dframe(df)

        if getattr(self, 'sitecol', None):
            # can be None for the ruptures-only calculator
            with hdf5.File(self.datastore.tempname, 'w') as tmp:
                tmp['sitecol'] = self.sitecol
        elif (oq.calculation_mode == 'disaggregation'
              and oq.max_sites_disagg < len(self.sitecol)):
            raise ValueError('Please set max_sites_disagg=%d in %s' %
                             (len(self.sitecol), oq.inputs['job_ini']))
        elif oq.disagg_by_src and len(self.sitecol) > oq.max_sites_disagg:
            raise ValueError(
                'There are too many sites to use disagg_by_src=true')
        if ('source_model_logic_tree' in oq.inputs
                and oq.hazard_calculation_id is None):
            with self.monitor('composite source model', measuremem=True):
                self.csm = csm = readinput.get_composite_source_model(
                    oq, self.datastore.hdf5)
                srcs = [src for sg in csm.src_groups for src in sg]
                if not srcs:
                    raise RuntimeError('All sources were discarded!?')
                logging.info('Checking the sources bounding box')
                sids = self.src_filter().within_bbox(srcs)
                if len(sids) == 0:
                    raise RuntimeError('All sources were discarded!?')
                self.full_lt = csm.full_lt
        self.init()  # do this at the end of pre-execute

        if (not oq.hazard_calculation_id
                and oq.calculation_mode != 'preclassical'
                and not oq.save_disk_space):
            self.gzip_inputs()

    def save_multi_peril(self):
        """Defined in MultiRiskCalculator"""

    def pre_execute(self):
        """
        Check if there is a previous calculation ID.
        If yes, read the inputs by retrieving the previous calculation;
        if not, read the inputs directly.
        """
        oq = self.oqparam
        if 'gmfs' in oq.inputs or 'multi_peril' in oq.inputs:
            # read hazard from files
            assert not oq.hazard_calculation_id, (
                'You cannot use --hc together with gmfs_file')
            self.read_inputs()
            if 'gmfs' in oq.inputs:
                if not oq.inputs['gmfs'].endswith('.csv'):
                    raise NotImplementedError('Importer for %s' %
                                              oq.inputs['gmfs'])
                E = len(
                    import_gmfs(self.datastore, oq.inputs['gmfs'],
                                self.sitecol.complete.sids))
                if hasattr(oq, 'number_of_ground_motion_fields'):
                    if oq.number_of_ground_motion_fields != E:
                        raise RuntimeError(
                            'Expected %d ground motion fields, found %d' %
                            (oq.number_of_ground_motion_fields, E))
                else:  # set the number of GMFs from the file
                    oq.number_of_ground_motion_fields = E
            else:
                self.save_multi_peril()
            self.save_crmodel()
        elif 'hazard_curves' in oq.inputs:  # read hazard from file
            assert not oq.hazard_calculation_id, (
                'You cannot use --hc together with hazard_curves')
            haz_sitecol = readinput.get_site_collection(oq)
            self.load_crmodel()  # must be after get_site_collection
            self.read_exposure(haz_sitecol)  # define .assets_by_site
            self.datastore['poes/grp-00'] = fix_ones(readinput.pmap)
            self.datastore['sitecol'] = self.sitecol
            self.datastore['assetcol'] = self.assetcol
            self.datastore['full_lt'] = fake = logictree.FullLogicTree.fake()
            self.realizations = fake.get_realizations()
            self.save_crmodel()
        elif oq.hazard_calculation_id:
            parent = util.read(oq.hazard_calculation_id)
            self.check_precalc(parent['oqparam'].calculation_mode)
            self.datastore.parent = parent
            # copy missing parameters from the parent
            if 'concurrent_tasks' not in vars(self.oqparam):
                self.oqparam.concurrent_tasks = (
                    self.oqparam.__class__.concurrent_tasks.default)
            params = {
                name: value
                for name, value in vars(parent['oqparam']).items()
                if name not in vars(self.oqparam)
            }
            self.save_params(**params)
            self.read_inputs()
            oqp = parent['oqparam']
            if oqp.investigation_time != oq.investigation_time:
                raise ValueError(
                    'The parent calculation was using investigation_time=%s'
                    ' != %s' % (oqp.investigation_time, oq.investigation_time))
            if not consistent(oqp.minimum_intensity, oq.minimum_intensity):
                raise ValueError(
                    'The parent calculation was using minimum_intensity=%s'
                    ' != %s' % (oqp.minimum_intensity, oq.minimum_intensity))
            hstats, rstats = list(oqp.hazard_stats()), list(oq.hazard_stats())
            if hstats != rstats:
                raise ValueError('The parent calculation had stats %s != %s' %
                                 (hstats, rstats))
            missing_imts = set(oq.risk_imtls) - set(oqp.imtls)
            if missing_imts:
                raise ValueError(
                    'The parent calculation is missing the IMT(s) %s' %
                    ', '.join(missing_imts))
            self.save_crmodel()
        elif self.__class__.precalc:
            calc = calculators[self.__class__.precalc](self.oqparam,
                                                       self.datastore.calc_id)
            calc.run(remove=False)
            for name in ('csm param sitecol assetcol crmodel realizations '
                         'policy_name policy_dict full_lt').split():
                if hasattr(calc, name):
                    setattr(self, name, getattr(calc, name))
        else:
            self.read_inputs()
            self.save_crmodel()

    def init(self):
        """
        To be overridden to initialize the datasets needed by the calculation
        """
        oq = self.oqparam
        if not oq.risk_imtls:
            if self.datastore.parent:
                oq.risk_imtls = (self.datastore.parent['oqparam'].risk_imtls)
        if 'precalc' in vars(self):
            self.realizations = self.precalc.realizations
        elif 'full_lt' in self.datastore:
            full_lt = self.datastore['full_lt']
            self.realizations = full_lt.get_realizations()
            if oq.hazard_calculation_id and 'gsim_logic_tree' in oq.inputs:
                # redefine the realizations by reading the weights from the
                # gsim_logic_tree_file that could be different from the parent
                full_lt.gsim_lt = logictree.GsimLogicTree(
                    oq.inputs['gsim_logic_tree'], set(full_lt.trts))
        elif hasattr(self, 'csm'):
            self.check_floating_spinning()
            self.realizations = self.csm.full_lt.get_realizations()
        else:  # build a fake; used by risk-from-file calculators
            self.datastore['full_lt'] = fake = logictree.FullLogicTree.fake()
            self.realizations = fake.get_realizations()

    @general.cached_property
    def R(self):
        """
        :returns: the number of realizations
        """
        try:
            return self.csm.full_lt.get_num_rlzs()
        except AttributeError:  # no self.csm
            return self.datastore['full_lt'].get_num_rlzs()

    def read_exposure(self, haz_sitecol):  # after load_risk_model
        """
        Read the exposure, the risk models and update the attributes
        .sitecol, .assetcol
        """
        oq = self.oqparam
        with self.monitor('reading exposure'):
            self.sitecol, self.assetcol, discarded = (
                readinput.get_sitecol_assetcol(oq, haz_sitecol,
                                               self.crmodel.loss_types))
            if len(discarded):
                self.datastore['discarded'] = discarded
                if 'scenario' in oq.calculation_mode:
                    # this is normal for the case of scenario from rupture
                    logging.info('%d assets were discarded because too far '
                                 'from the rupture; use `oq show discarded` '
                                 'to show them and `oq plot_assets` to plot '
                                 'them' % len(discarded))
                elif not oq.discard_assets:  # raise an error
                    self.datastore['sitecol'] = self.sitecol
                    self.datastore['assetcol'] = self.assetcol
                    raise RuntimeError(
                        '%d assets were discarded; use `oq show discarded` to'
                        ' show them and `oq plot_assets` to plot them' %
                        len(discarded))
        self.policy_name = ''
        self.policy_dict = {}
        if oq.inputs.get('insurance'):
            k, v = zip(*oq.inputs['insurance'].items())
            self.load_insurance_data(k, v)
        return readinput.exposure

    def load_insurance_data(self, ins_types, ins_files):
        """
        Read the insurance files and populate the policy_dict
        """
        for loss_type, fname in zip(ins_types, ins_files):
            array = hdf5.read_csv(fname, {
                'insurance_limit': float,
                'deductible': float,
                None: object
            }).array
            policy_name = array.dtype.names[0]
            policy_idx = getattr(self.assetcol.tagcol, policy_name + '_idx')
            insurance = numpy.zeros((len(policy_idx), 2))
            for pol, ded, lim in array[[
                    policy_name, 'deductible', 'insurance_limit'
            ]]:
                insurance[policy_idx[pol]] = ded, lim
            self.policy_dict[loss_type] = insurance
            if self.policy_name and policy_name != self.policy_name:
                raise ValueError(
                    'The file %s contains %s as policy field, but we were '
                    'expecting %s' % (fname, policy_name, self.policy_name))
            else:
                self.policy_name = policy_name

    def load_crmodel(self):
        # to be called before read_exposure
        # NB: this is called even if there is no risk model
        """
        Read the risk models and set the attribute .crmodel.
        The crmodel can be empty for hazard calculations.
        Save the loss ratios (if any) in the datastore.
        """
        logging.info('Reading the risk model if present')
        self.crmodel = readinput.get_crmodel(self.oqparam)
        if not self.crmodel:
            parent = self.datastore.parent
            if 'risk_model' in parent:
                self.crmodel = riskmodels.CompositeRiskModel.read(parent)
            return
        if self.oqparam.ground_motion_fields and not self.oqparam.imtls:
            raise InvalidFile('No intensity_measure_types specified in %s' %
                              self.oqparam.inputs['job_ini'])
        self.save_params()  # re-save oqparam

    def save_crmodel(self):
        """
        Save the risk models in the datastore
        """
        if len(self.crmodel):
            self.datastore['risk_model'] = rm = self.crmodel
            attrs = self.datastore.getitem('risk_model').attrs
            attrs['min_iml'] = hdf5.array_of_vstr(sorted(rm.min_iml.items()))

    def _read_risk_data(self):
        # read the exposure (if any), the risk model (if any) and then the
        # site collection, possibly extracted from the exposure.
        oq = self.oqparam
        self.load_crmodel()  # must be called first

        if oq.hazard_calculation_id:
            with util.read(oq.hazard_calculation_id) as dstore:
                haz_sitecol = dstore['sitecol'].complete
                if ('amplification' in oq.inputs
                        and 'ampcode' not in haz_sitecol.array.dtype.names):
                    haz_sitecol.add_col('ampcode', site.ampcode_dt)
        else:
            haz_sitecol = readinput.get_site_collection(oq, self.datastore)
            if hasattr(self, 'rup'):
                # for scenario we reduce the site collection to the sites
                # within the maximum distance from the rupture
                haz_sitecol, _dctx = self.cmaker.filter(haz_sitecol, self.rup)
                haz_sitecol.make_complete()

            if 'site_model' in oq.inputs:
                self.datastore['site_model'] = readinput.get_site_model(oq)

        oq_hazard = (self.datastore.parent['oqparam']
                     if self.datastore.parent else None)
        if 'exposure' in oq.inputs:
            exposure = self.read_exposure(haz_sitecol)
            self.datastore['assetcol'] = self.assetcol
            self.datastore['cost_calculator'] = exposure.cost_calculator
            if hasattr(readinput.exposure, 'exposures'):
                self.datastore['assetcol/exposures'] = (numpy.array(
                    exposure.exposures, hdf5.vstr))
        elif 'assetcol' in self.datastore.parent:
            assetcol = self.datastore.parent['assetcol']
            if oq.region:
                region = wkt.loads(oq.region)
                self.sitecol = haz_sitecol.within(region)
            if oq.shakemap_id or 'shakemap' in oq.inputs:
                self.sitecol, self.assetcol = self.read_shakemap(
                    haz_sitecol, assetcol)
                self.datastore['assetcol'] = self.assetcol
                logging.info('Extracted %d/%d assets', len(self.assetcol),
                             len(assetcol))
                nsites = len(self.sitecol)
                if (oq.spatial_correlation != 'no'
                        and nsites > MAXSITES):  # hard-coded, heuristic
                    raise ValueError(CORRELATION_MATRIX_TOO_LARGE % nsites)
            elif hasattr(self, 'sitecol') and general.not_equal(
                    self.sitecol.sids, haz_sitecol.sids):
                self.assetcol = assetcol.reduce(self.sitecol)
                self.datastore['assetcol'] = self.assetcol
                logging.info('Extracted %d/%d assets', len(self.assetcol),
                             len(assetcol))
            else:
                self.assetcol = assetcol
        else:  # no exposure
            self.sitecol = haz_sitecol
            if self.sitecol:
                logging.info('Read N=%d hazard sites and L=%d hazard levels',
                             len(self.sitecol), len(oq.imtls.array))

        if oq_hazard:
            parent = self.datastore.parent
            if 'assetcol' in parent:
                check_time_event(oq, parent['assetcol'].occupancy_periods)
            elif oq.job_type == 'risk' and 'exposure' not in oq.inputs:
                raise ValueError('Missing exposure both in hazard and risk!')
            if oq_hazard.time_event and oq_hazard.time_event != oq.time_event:
                raise ValueError(
                    'The risk configuration file has time_event=%s but the '
                    'hazard was computed with time_event=%s' %
                    (oq.time_event, oq_hazard.time_event))

        if oq.job_type == 'risk':
            tmap_arr, tmap_lst = logictree.taxonomy_mapping(
                self.oqparam.inputs.get('taxonomy_mapping'),
                self.assetcol.tagcol.taxonomy)
            self.crmodel.tmap = tmap_lst
            if len(tmap_arr):
                self.datastore['taxonomy_mapping'] = tmap_arr
            taxonomies = set(taxo for items in self.crmodel.tmap
                             for taxo, weight in items if taxo != '?')
            # check that we are covering all the taxonomies in the exposure
            missing = taxonomies - set(self.crmodel.taxonomies)
            if self.crmodel and missing:
                raise RuntimeError('The exposure contains the taxonomies %s '
                                   'which are not in the risk model' % missing)
            if len(self.crmodel.taxonomies) > len(taxonomies):
                logging.info('Reducing risk model from %d to %d taxonomies',
                             len(self.crmodel.taxonomies), len(taxonomies))
                self.crmodel = self.crmodel.reduce(taxonomies)
                self.crmodel.tmap = tmap_lst
            self.crmodel.vectorize_cons_model(self.assetcol.tagcol)

        if hasattr(self, 'sitecol') and self.sitecol:
            if 'site_model' in oq.inputs:
                assoc_dist = (oq.region_grid_spacing *
                              1.414 if oq.region_grid_spacing else 5
                              )  # Graeme's 5km
                sm = readinput.get_site_model(oq)
                self.sitecol.complete.assoc(sm, assoc_dist)
            self.datastore['sitecol'] = self.sitecol.complete

        # store amplification functions if any
        self.af = None
        if 'amplification' in oq.inputs:
            logging.info('Reading %s', oq.inputs['amplification'])
            df = readinput.get_amplification(oq)
            check_amplification(df, self.sitecol)
            self.amplifier = Amplifier(oq.imtls, df, oq.soil_intensities)
            self.amplifier.check(self.sitecol.vs30, oq.vs30_tolerance)
            if oq.amplification_method == 'kernel':
                # TODO: need to add additional checks on the main calculation
                # methodology since the kernel method is currently tested only
                # for classical PSHA
                self.af = AmplFunction.from_dframe(df)
                self.amplifier = None
        else:
            self.amplifier = None

        # used in the risk calculators
        self.param = dict(individual_curves=oq.individual_curves,
                          collapse_level=oq.collapse_level,
                          avg_losses=oq.avg_losses,
                          amplifier=self.amplifier)

        # compute exposure stats
        if hasattr(self, 'assetcol'):
            save_exposed_values(self.datastore, self.assetcol, oq.loss_names,
                                oq.aggregate_by)

    def store_rlz_info(self, eff_ruptures):
        """
        Save info about the composite source model inside the full_lt dataset
        """
        oq = self.oqparam
        if hasattr(self, 'full_lt'):  # no scenario
            self.realizations = self.full_lt.get_realizations()
            if not self.realizations:
                raise RuntimeError('Empty logic tree: too much filtering?')
            self.datastore['full_lt'] = self.full_lt
        else:  # scenario
            self.full_lt = self.datastore['full_lt']

        R = self.R
        logging.info('There are %d realization(s)', R)

        if oq.imtls:
            self.datastore['weights'] = arr = build_weights(
                self.realizations, oq.imt_dt())
            self.datastore.set_attrs('weights', nbytes=arr.nbytes)

        if ('event_based' in oq.calculation_mode and R >= TWO16 or R >= TWO32):
            raise ValueError(
                'The logic tree has too many realizations (%d), use sampling '
                'instead' % R)
        elif R > 10000:
            logging.warning(
                'The logic tree has %d realizations(!), please consider '
                'sampling it', R)

        # check for gsim logic tree reduction
        discard_trts = []
        for trt in self.full_lt.gsim_lt.values:
            if eff_ruptures.get(trt, 0) == 0:
                discard_trts.append(trt)
        if (discard_trts and 'scenario' not in oq.calculation_mode
                and not oq.is_ucerf()):
            msg = ('No sources for some TRTs: you should set\n'
                   'discard_trts = %s\nin %s') % (', '.join(discard_trts),
                                                  oq.inputs['job_ini'])
            logging.warning(msg)

    def store_source_info(self, calc_times):
        """
        Save (eff_ruptures, num_sites, calc_time) inside the source_info
        """
        for src_id, arr in calc_times.items():
            src_id = re.sub(r':\d+$', '', src_id)
            row = self.csm.source_info[src_id]
            row[EFF_RUPTURES] += arr[0]
            row[NUM_SITES] += arr[1]
            row[CALC_TIME] += arr[2]
        rows = self.csm.source_info.values()
        recs = [tuple(row) for row in rows]
        hdf5.extend(self.datastore['source_info'],
                    numpy.array(recs, readinput.source_info_dt))

    def post_process(self):
        """For compatibility with the engine"""