Esempio n. 1
0
def get_risk_functions(oqparam, kind='vulnerability fragility consequence '
                       'vulnerability_retrofitted'):
    """
    :param oqparam:
        an OqParam instance
    :param kind:
        a space-separated string with the kinds of risk models to read
    :returns:
        a list of risk functions
    """
    kinds = kind.split()
    rmodels = AccumDict()
    for kind in kinds:
        for key in sorted(oqparam.inputs):
            mo = re.match('(occupants|%s)_%s$' % (COST_TYPE_REGEX, kind), key)
            if mo:
                loss_type = mo.group(1)  # the cost_type in the key
                # can be occupants, structural, nonstructural, ...
                rmodel = nrml.to_python(oqparam.inputs[key])
                if len(rmodel) == 0:
                    raise InvalidFile('%s is empty!' % oqparam.inputs[key])
                rmodels[loss_type, kind] = rmodel
                if rmodel.lossCategory is None:  # NRML 0.4
                    continue
                cost_type = str(rmodel.lossCategory)
                rmodel_kind = rmodel.__class__.__name__
                kind_ = kind.replace('_retrofitted', '')  # strip retrofitted
                if not rmodel_kind.lower().startswith(kind_):
                    raise ValueError(
                        'Error in the file "%s_file=%s": is '
                        'of kind %s, expected %s' % (
                            key, oqparam.inputs[key], rmodel_kind,
                            kind.capitalize() + 'Model'))
                if cost_type != loss_type:
                    raise ValueError(
                        'Error in the file "%s_file=%s": lossCategory is of '
                        'type "%s", expected "%s"' %
                        (key, oqparam.inputs[key],
                         rmodel.lossCategory, loss_type))
    cl_risk = oqparam.calculation_mode in ('classical', 'classical_risk')
    rlist = RiskFuncList()
    rlist.limit_states = []
    for (loss_type, kind), rm in sorted(rmodels.items()):
        if kind == 'fragility':
            for (imt, riskid), ffl in sorted(rm.items()):
                if not rlist.limit_states:
                    rlist.limit_states.extend(rm.limitStates)
                # we are rejecting the case of loss types with different
                # limit states; this may change in the future
                assert rlist.limit_states == rm.limitStates, (
                    rlist.limit_states, rm.limitStates)
                ffl.loss_type = loss_type
                ffl.kind = kind
                rlist.append(ffl)
        elif kind == 'consequence':
            for riskid, cf in sorted(rm.items()):
                rf = hdf5.ArrayWrapper(
                    cf, dict(id=riskid, loss_type=loss_type, kind=kind))
                rlist.append(rf)
        else:  # vulnerability, vulnerability_retrofitted
            # only for classical_risk reduce the loss_ratios
            # to make sure they are strictly increasing
            for (imt, riskid), rf in sorted(rm.items()):
                rf = rf.strictly_increasing() if cl_risk else rf
                rf.loss_type = loss_type
                rf.kind = kind
                rlist.append(rf)
    return rlist
Esempio n. 2
0
    def _read_scenario_ruptures(self):
        oq = self.oqparam
        gsim_lt = readinput.get_gsim_lt(self.oqparam)
        G = gsim_lt.get_num_paths()
        if oq.calculation_mode.startswith('scenario'):
            ngmfs = oq.number_of_ground_motion_fields
        if oq.inputs['rupture_model'].endswith('.xml'):
            # check the number of branchsets
            bsets = len(gsim_lt._ltnode)
            if bsets > 1:
                raise InvalidFile(
                    '%s for a scenario calculation must contain a single '
                    'branchset, found %d!' % (oq.inputs['job_ini'], bsets))
            [(trt, rlzs_by_gsim)] = gsim_lt.get_rlzs_by_gsim_trt().items()
            self.cmaker = ContextMaker(trt, rlzs_by_gsim, oq)
            rup = readinput.get_rupture(oq)
            if self.N > oq.max_sites_disagg:  # many sites, split rupture
                ebrs = [
                    EBRupture(copyobj(rup, rup_id=rup.rup_id + i),
                              'NA',
                              0,
                              G,
                              e0=i * G,
                              scenario=True) for i in range(ngmfs)
                ]
            else:  # keep a single rupture with a big occupation number
                ebrs = [
                    EBRupture(rup,
                              'NA',
                              0,
                              G * ngmfs,
                              rup.rup_id,
                              scenario=True)
                ]
            srcfilter = SourceFilter(self.sitecol, oq.maximum_distance(trt))
            aw = get_rup_array(ebrs, srcfilter)
            if len(aw) == 0:
                raise RuntimeError(
                    'The rupture is too far from the sites! Please check the '
                    'maximum_distance and the position of the rupture')
        elif oq.inputs['rupture_model'].endswith('.csv'):
            aw = get_ruptures(oq.inputs['rupture_model'])
            if len(gsim_lt.values) == 1:  # fix for scenario_damage/case_12
                aw['trt_smr'] = 0  # a single TRT
            if oq.calculation_mode.startswith('scenario'):
                # rescale n_occ by ngmfs and nrlzs
                aw['n_occ'] *= ngmfs * gsim_lt.get_num_paths()
        else:
            raise InvalidFile("Something wrong in %s" % oq.inputs['job_ini'])
        rup_array = aw.array
        hdf5.extend(self.datastore['rupgeoms'], aw.geom)

        if len(rup_array) == 0:
            raise RuntimeError(
                'There are no sites within the maximum_distance'
                ' of %s km from the rupture' %
                oq.maximum_distance(rup.tectonic_region_type)(rup.mag))

        fake = logictree.FullLogicTree.fake(gsim_lt)
        self.realizations = fake.get_realizations()
        self.datastore['full_lt'] = fake
        self.store_rlz_info({})  # store weights
        self.save_params()
        imp = calc.RuptureImporter(self.datastore)
        imp.import_rups_events(rup_array, get_rupture_getters)
Esempio n. 3
0
def get_sitecol_assetcol(oqparam, haz_sitecol=None, cost_types=()):
    """
    :param oqparam: calculation parameters
    :param haz_sitecol: the hazard site collection
    :param cost_types: the expected cost types
    :returns: (site collection, asset collection) instances
    """
    global exposure
    if exposure is None:
        # haz_sitecol not extracted from the exposure
        exposure = get_exposure(oqparam)
    if haz_sitecol is None:
        haz_sitecol = get_site_collection(oqparam)
    missing = set(cost_types) - set(exposure.cost_types['name']) - set(
        ['occupants'])  # TODO: remove occupants and fragility special cases
    if missing and not oqparam.calculation_mode.endswith('damage'):
        expo = oqparam.inputs.get('exposure', '')
        raise InvalidFile(
            'Expected cost types %s but the exposure %r contains %s' % (
                cost_types, expo, exposure.cost_types['name']))
    if oqparam.region_grid_spacing and not oqparam.region:
        # extract the hazard grid from the exposure
        poly = exposure.mesh.get_convex_hull()
        mesh = poly.dilate(oqparam.region_grid_spacing).discretize(
            oqparam.region_grid_spacing)
        if len(mesh) > len(haz_sitecol):
            raise LargeExposureGrid(mesh, haz_sitecol.mesh,
                                    oqparam.region_grid_spacing)
        haz_sitecol = get_site_collection(oqparam, mesh)  # redefine
        haz_distance = oqparam.region_grid_spacing
        if haz_distance != oqparam.asset_hazard_distance:
            logging.info('Using asset_hazard_distance=%d km instead of %d km',
                         haz_distance, oqparam.asset_hazard_distance)
    else:
        haz_distance = oqparam.asset_hazard_distance

    if haz_sitecol.mesh != exposure.mesh:
        # associate the assets to the hazard sites
        tot_assets = sum(len(assets) for assets in exposure.assets_by_site)
        mode = 'strict' if oqparam.region_grid_spacing else 'filter'
        sitecol, assets_by = geo.utils.assoc(
            exposure.assets_by_site, haz_sitecol, haz_distance, mode)
        assets_by_site = [[] for _ in sitecol.complete.sids]
        num_assets = 0
        for sid, assets in zip(sitecol.sids, assets_by):
            assets_by_site[sid] = assets
            num_assets += len(assets)
        logging.info(
            'Associated %d assets to %d sites', num_assets, len(sitecol))
        if num_assets < tot_assets:
            logging.warn('Discarded %d assets outside the '
                         'asset_hazard_distance of %d km',
                         tot_assets - num_assets, haz_distance)
    else:
        # asset sites and hazard sites are the same
        sitecol = haz_sitecol
        assets_by_site = exposure.assets_by_site

    asset_refs = numpy.array(
        [exposure.asset_refs[asset.ordinal]
         for assets in assets_by_site for asset in assets])
    assetcol = asset.AssetCollection(
        asset_refs, assets_by_site, exposure.tagcol, exposure.cost_calculator,
        oqparam.time_event, exposure.occupancy_periods)

    return sitecol, assetcol
Esempio n. 4
0
    def _read_risk_data(self):
        # read the risk model (if any), the exposure (if any) and then the
        # site collection, possibly extracted from the exposure.
        oq = self.oqparam
        self.load_crmodel()  # must be called first
        if (not oq.imtls and 'shakemap' not in oq.inputs
                and oq.ground_motion_fields):
            raise InvalidFile('There are no intensity measure types in %s' %
                              oq.inputs['job_ini'])
        if oq.hazard_calculation_id:
            with util.read(oq.hazard_calculation_id) as dstore:
                haz_sitecol = dstore['sitecol'].complete
                if ('amplification' in oq.inputs
                        and 'ampcode' not in haz_sitecol.array.dtype.names):
                    haz_sitecol.add_col('ampcode', site.ampcode_dt)
        else:
            haz_sitecol = readinput.get_site_collection(oq, self.datastore)
            if hasattr(self, 'rup'):
                # for scenario we reduce the site collection to the sites
                # within the maximum distance from the rupture
                haz_sitecol, _dctx = self.cmaker.filter(haz_sitecol, self.rup)
                haz_sitecol.make_complete()

            if 'site_model' in oq.inputs:
                self.datastore['site_model'] = readinput.get_site_model(oq)

        oq_hazard = (self.datastore.parent['oqparam']
                     if self.datastore.parent else None)
        if 'exposure' in oq.inputs:
            exposure = self.read_exposure(haz_sitecol)
            self.datastore['assetcol'] = self.assetcol
            self.datastore['cost_calculator'] = exposure.cost_calculator
            if hasattr(readinput.exposure, 'exposures'):
                self.datastore['assetcol/exposures'] = (numpy.array(
                    exposure.exposures, hdf5.vstr))
        elif 'assetcol' in self.datastore.parent:
            assetcol = self.datastore.parent['assetcol']
            if oq.region:
                region = wkt.loads(oq.region)
                self.sitecol = haz_sitecol.within(region)
            if oq.shakemap_id or 'shakemap' in oq.inputs:
                self.sitecol, self.assetcol = self.read_shakemap(
                    haz_sitecol, assetcol)
                self.datastore['sitecol'] = self.sitecol
                self.datastore['assetcol'] = self.assetcol
                logging.info('Extracted %d/%d assets', len(self.assetcol),
                             len(assetcol))
                nsites = len(self.sitecol)
                if (oq.spatial_correlation != 'no'
                        and nsites > MAXSITES):  # hard-coded, heuristic
                    raise ValueError(CORRELATION_MATRIX_TOO_LARGE % nsites)
            elif hasattr(self, 'sitecol') and general.not_equal(
                    self.sitecol.sids, haz_sitecol.sids):
                self.assetcol = assetcol.reduce(self.sitecol)
                self.datastore['assetcol'] = self.assetcol
                logging.info('Extracted %d/%d assets', len(self.assetcol),
                             len(assetcol))
            else:
                self.assetcol = assetcol
        else:  # no exposure
            self.sitecol = haz_sitecol
            if self.sitecol and oq.imtls:
                logging.info('Read N=%d hazard sites and L=%d hazard levels',
                             len(self.sitecol), oq.imtls.size)

        if oq_hazard:
            parent = self.datastore.parent
            if 'assetcol' in parent:
                check_time_event(oq, parent['assetcol'].occupancy_periods)
            elif oq.job_type == 'risk' and 'exposure' not in oq.inputs:
                raise ValueError('Missing exposure both in hazard and risk!')
            if oq_hazard.time_event and oq_hazard.time_event != oq.time_event:
                raise ValueError(
                    'The risk configuration file has time_event=%s but the '
                    'hazard was computed with time_event=%s' %
                    (oq.time_event, oq_hazard.time_event))

        if oq.job_type == 'risk':
            tmap_arr, tmap_lst = logictree.taxonomy_mapping(
                self.oqparam.inputs.get('taxonomy_mapping'),
                self.assetcol.tagcol.taxonomy)
            self.crmodel.tmap = tmap_lst
            if len(tmap_arr):
                self.datastore['taxonomy_mapping'] = tmap_arr
            taxonomies = set(taxo for items in self.crmodel.tmap
                             for taxo, weight in items if taxo != '?')
            # check that we are covering all the taxonomies in the exposure
            missing = taxonomies - set(self.crmodel.taxonomies)
            if self.crmodel and missing:
                raise RuntimeError('The exposure contains the taxonomies %s '
                                   'which are not in the risk model' % missing)
            if len(self.crmodel.taxonomies) > len(taxonomies):
                logging.info('Reducing risk model from %d to %d taxonomies',
                             len(self.crmodel.taxonomies), len(taxonomies))
                self.crmodel = self.crmodel.reduce(taxonomies)
                self.crmodel.tmap = tmap_lst
            self.crmodel.reduce_cons_model(self.assetcol.tagcol)

        if hasattr(self, 'sitecol') and self.sitecol:
            if 'site_model' in oq.inputs:
                assoc_dist = (oq.region_grid_spacing *
                              1.414 if oq.region_grid_spacing else 5
                              )  # Graeme's 5km
                sm = readinput.get_site_model(oq)
                self.sitecol.complete.assoc(sm, assoc_dist)
                self.datastore['sitecol'] = self.sitecol

        # store amplification functions if any
        self.af = None
        if 'amplification' in oq.inputs:
            logging.info('Reading %s', oq.inputs['amplification'])
            df = readinput.get_amplification(oq)
            check_amplification(df, self.sitecol)
            self.amplifier = Amplifier(oq.imtls, df, oq.soil_intensities)
            if oq.amplification_method == 'kernel':
                # TODO: need to add additional checks on the main calculation
                # methodology since the kernel method is currently tested only
                # for classical PSHA
                self.af = AmplFunction.from_dframe(df)
                self.amplifier = None
        else:
            self.amplifier = None

        # manage secondary perils
        sec_perils = oq.get_sec_perils()
        for sp in sec_perils:
            sp.prepare(self.sitecol)  # add columns as needed

        mal = {
            lt: getdefault(oq.minimum_asset_loss, lt)
            for lt in oq.loss_names
        }
        if mal:
            logging.info('minimum_asset_loss=%s', mal)
        self.param = dict(individual_curves=oq.individual_curves,
                          ps_grid_spacing=oq.ps_grid_spacing,
                          collapse_level=oq.collapse_level,
                          split_sources=oq.split_sources,
                          avg_losses=oq.avg_losses,
                          amplifier=self.amplifier,
                          sec_perils=sec_perils,
                          ses_seed=oq.ses_seed,
                          minimum_asset_loss=mal)

        # compute exposure stats
        if hasattr(self, 'assetcol'):
            save_agg_values(self.datastore, self.assetcol, oq.loss_names,
                            oq.aggregate_by)
Esempio n. 5
0
def get_site_model(oqparam):
    """
    Convert the NRML file into an array of site parameters.

    :param oqparam:
        an :class:`openquake.commonlib.oqvalidation.OqParam` instance
    :returns:
        an array with fields lon, lat, vs30, ...
    """
    req_site_params = get_gsim_lt(oqparam).req_site_params
    if 'amplification' in oqparam.inputs:
        req_site_params.add('ampcode')
    arrays = []
    dtypedic = {
        None: float,
        'vs30measured': numpy.uint8,
        'ampcode': site.ampcode_dt
    }
    for fname in oqparam.inputs['site_model']:
        if isinstance(fname, str) and fname.endswith('.csv'):
            sm = hdf5.read_csv(fname, dtypedic).array
            sm['lon'] = numpy.round(sm['lon'], 5)
            sm['lat'] = numpy.round(sm['lat'], 5)
            dupl = get_duplicates(sm, 'lon', 'lat')
            if dupl:
                raise InvalidFile('Found duplicate sites %s in %s' %
                                  (dupl, fname))
            if 'site_id' in sm.dtype.names:
                raise InvalidFile('%s: you passed a sites.csv file instead of '
                                  'a site_model.csv file!' % fname)
            z = numpy.zeros(len(sm), sorted(sm.dtype.descr))
            for name in z.dtype.names:  # reorder the fields
                z[name] = sm[name]
            arrays.append(z)
            continue
        nodes = nrml.read(fname).siteModel
        params = [valid.site_param(node.attrib) for node in nodes]
        missing = req_site_params - set(params[0])
        if 'vs30measured' in missing:  # use a default of False
            missing -= {'vs30measured'}
            for param in params:
                param['vs30measured'] = False
        if 'backarc' in missing:  # use a default of False
            missing -= {'backarc'}
            for param in params:
                param['backarc'] = False
        if 'ampcode' in missing:  # use a default of b''
            missing -= {'ampcode'}
            for param in params:
                param['ampcode'] = b''
        if missing:
            raise InvalidFile(
                '%s: missing parameter %s' %
                (oqparam.inputs['site_model'], ', '.join(missing)))
        # NB: the sorted in sorted(params[0]) is essential, otherwise there is
        # an heisenbug in scenario/test_case_4
        site_model_dt = numpy.dtype([(p, site.site_param_dt[p])
                                     for p in sorted(params[0])])
        sm = numpy.array([
            tuple(param[name] for name in site_model_dt.names)
            for param in params
        ], site_model_dt)
        dupl = "\n".join('%s %s' % loc
                         for loc, n in countby(sm, 'lon', 'lat').items()
                         if n > 1)
        if dupl:
            raise InvalidFile('There are duplicated sites in %s:\n%s' %
                              (fname, dupl))
        arrays.append(sm)
    return numpy.concatenate(arrays)
    def __init__(self, **names_vals):
        super(OqParam, self).__init__(**names_vals)
        job_ini = self.inputs['job_ini']
        if 'calculation_mode' not in names_vals:
            raise InvalidFile('Missing calculation_mode in %s' % job_ini)
        self.risk_investigation_time = (self.risk_investigation_time
                                        or self.investigation_time)
        if ('intensity_measure_types_and_levels' in names_vals
                and 'intensity_measure_types' in names_vals):
            logging.warn('Ignoring intensity_measure_types since '
                         'intensity_measure_types_and_levels is set')
        if 'iml_disagg' in names_vals:
            self.hazard_imtls = self.iml_disagg
            if 'intensity_measure_types_and_levels' in names_vals:
                raise InvalidFile(
                    'Please remove the intensity_measure_types_and_levels '
                    'from %s: they will be inferred from the iml_disagg '
                    'dictionary' % job_ini)
        elif 'intensity_measure_types_and_levels' in names_vals:
            self.hazard_imtls = self.intensity_measure_types_and_levels
            delattr(self, 'intensity_measure_types_and_levels')
        elif 'intensity_measure_types' in names_vals:
            self.hazard_imtls = dict.fromkeys(self.intensity_measure_types)
            delattr(self, 'intensity_measure_types')
        self._file_type, self._risk_files = get_risk_files(self.inputs)

        self.check_source_model()
        if self.hazard_precomputed():
            self.check_missing('site_model', 'error')
            self.check_missing('gsim_logic_tree', 'error')
            self.check_missing('source_model_logic_tree', 'error')

        # check the gsim_logic_tree
        if self.inputs.get('gsim_logic_tree'):
            if self.gsim:
                raise InvalidFile('%s: if `gsim_logic_tree_file` is set, there'
                                  ' must be no `gsim` key' % job_ini)
            path = os.path.join(self.base_path, self.inputs['gsim_logic_tree'])
            gsim_lt = logictree.GsimLogicTree(path, ['*'])

            # check the number of branchsets
            branchsets = len(gsim_lt._ltnode)
            if 'scenario' in self.calculation_mode and branchsets > 1:
                raise InvalidFile(
                    '%s: %s for a scenario calculation must contain a single '
                    'branchset, found %d!' % (job_ini, path, branchsets))

            # check the IMTs vs the GSIMs
            self._gsims_by_trt = gsim_lt.values
            for gsims in self._gsims_by_trt.values():
                self.check_gsims(gsims)
        elif self.gsim is not None:
            self.check_gsims([self.gsim])

        # checks for disaggregation
        if self.calculation_mode == 'disaggregation':
            if not self.poes_disagg and not self.iml_disagg:
                raise InvalidFile('poes_disagg or iml_disagg must be set '
                                  'in %(job_ini)s' % self.inputs)
            elif self.poes_disagg and self.iml_disagg:
                raise InvalidFile(
                    '%s: iml_disagg and poes_disagg cannot be set '
                    'at the same time' % job_ini)
            for k in ('mag_bin_width', 'distance_bin_width',
                      'coordinate_bin_width', 'num_epsilon_bins'):
                if k not in vars(self):
                    raise InvalidFile('%s must be set in %s' % (k, job_ini))

        # checks for classical_damage
        if self.calculation_mode == 'classical_damage':
            if self.conditional_loss_poes:
                raise InvalidFile('%s: conditional_loss_poes are not defined '
                                  'for classical_damage calculations' %
                                  job_ini)

        # checks for event_based_risk
        if (self.calculation_mode == 'event_based_risk'
                and self.asset_correlation not in (0, 1)):
            raise ValueError('asset_correlation != {0, 1} is no longer'
                             ' supported')
        elif (self.calculation_mode == 'event_based_risk'
              and self.conditional_loss_poes and not self.asset_loss_table):
            raise InvalidFile(
                '%s: asset_loss_table is not set, probably you want to remove'
                ' conditional_loss_poes' % job_ini)

        # check for GMFs from file
        if (self.inputs.get('gmfs', '').endswith('.csv') and not self.sites
                and 'sites' not in self.inputs):
            raise InvalidFile('%s: You forgot sites|sites_csv' % job_ini)

        # checks for ucerf
        if 'ucerf' in self.calculation_mode:
            if self.ses_per_logic_tree_path >= TWO16:
                raise ValueError('ses_per_logic_tree_path too big: %d' %
                                 self.ses_per_logic_tree_path)
            if self.number_of_logic_tree_samples >= TWO16:
                raise ValueError('number_of_logic_tree_samples too big: %d' %
                                 self.number_of_logic_tree_samples)
Esempio n. 7
0
def get_risk_models(oqparam,
                    kind='vulnerability fragility consequence '
                    'vulnerability_retrofitted'):
    """
    :param oqparam:
        an OqParam instance
    :param kind:
        a space-separated string with the kinds of risk models to read
    :returns:
        a dictionary riskid -> loss_type, kind -> function
    """
    kinds = kind.split()
    rmodels = AccumDict()
    for kind in kinds:
        for key in sorted(oqparam.inputs):
            mo = re.match('(occupants|%s)_%s$' % (COST_TYPE_REGEX, kind), key)
            if mo:
                loss_type = mo.group(1)  # the cost_type in the key
                # can be occupants, structural, nonstructural, ...
                rmodel = nrml.to_python(oqparam.inputs[key])
                if len(rmodel) == 0:
                    raise InvalidFile('%s is empty!' % oqparam.inputs[key])
                rmodels[loss_type, kind] = rmodel
                if rmodel.lossCategory is None:  # NRML 0.4
                    continue
                cost_type = str(rmodel.lossCategory)
                rmodel_kind = rmodel.__class__.__name__
                kind_ = kind.replace('_retrofitted', '')  # strip retrofitted
                if not rmodel_kind.lower().startswith(kind_):
                    raise ValueError('Error in the file "%s_file=%s": is '
                                     'of kind %s, expected %s' %
                                     (key, oqparam.inputs[key], rmodel_kind,
                                      kind.capitalize() + 'Model'))
                if cost_type != loss_type:
                    raise ValueError(
                        'Error in the file "%s_file=%s": lossCategory is of '
                        'type "%s", expected "%s"' %
                        (key, oqparam.inputs[key], rmodel.lossCategory,
                         loss_type))
    rdict = AccumDict(accum={})
    rdict.limit_states = []
    for (loss_type, kind), rm in sorted(rmodels.items()):
        if kind == 'fragility':
            # build a copy of the FragilityModel with different IM levels
            newfm = rm.build(oqparam.continuous_fragility_discretization,
                             oqparam.steps_per_interval)
            for (imt, riskid), ffl in sorted(newfm.items()):
                if not rdict.limit_states:
                    rdict.limit_states.extend(rm.limitStates)
                # we are rejecting the case of loss types with different
                # limit states; this may change in the future
                assert rdict.limit_states == rm.limitStates, (
                    rdict.limit_states, rm.limitStates)
                rdict[riskid][loss_type, kind] = ffl
                # TODO: see if it is possible to remove the attribute
                # below, used in classical_damage
                ffl.steps_per_interval = oqparam.steps_per_interval
        elif kind == 'consequence':
            for riskid, cf in sorted(rm.items()):
                rdict[riskid][loss_type, kind] = cf
        else:  # vulnerability, vulnerability_retrofitted
            cl_risk = oqparam.calculation_mode in ('classical',
                                                   'classical_risk')
            # only for classical_risk reduce the loss_ratios
            # to make sure they are strictly increasing
            for (imt, riskid), rf in sorted(rm.items()):
                rdict[riskid][loss_type, kind] = (rf.strictly_increasing()
                                                  if cl_risk else rf)
    return rdict
Esempio n. 8
0
    def __init__(self, **names_vals):
        # support legacy names
        for name in list(names_vals):
            if name == 'quantile_hazard_curves':
                names_vals['quantiles'] = names_vals.pop(name)
            elif name == 'mean_hazard_curves':
                names_vals['mean'] = names_vals.pop(name)
            elif name == 'max':
                names_vals['max'] = names_vals.pop(name)
        super().__init__(**names_vals)
        job_ini = self.inputs['job_ini']
        if 'calculation_mode' not in names_vals:
            raise InvalidFile('Missing calculation_mode in %s' % job_ini)
        if 'region_constraint' in names_vals:
            if 'region' in names_vals:
                raise InvalidFile('You cannot have both region and '
                                  'region_constraint in %s' % job_ini)
            logging.warning(
                'region_constraint is obsolete, use region instead')
            self.region = valid.wkt_polygon(
                names_vals.pop('region_constraint'))
        self.risk_investigation_time = (self.risk_investigation_time
                                        or self.investigation_time)
        if ('intensity_measure_types_and_levels' in names_vals
                and 'intensity_measure_types' in names_vals):
            logging.warning('Ignoring intensity_measure_types since '
                            'intensity_measure_types_and_levels is set')
        if 'iml_disagg' in names_vals:
            self.iml_disagg.pop('default')
            # normalize things like SA(0.10) -> SA(0.1)
            self.iml_disagg = {
                str(from_string(imt)): val
                for imt, val in self.iml_disagg.items()
            }
            self.hazard_imtls = self.iml_disagg
            if 'intensity_measure_types_and_levels' in names_vals:
                raise InvalidFile(
                    'Please remove the intensity_measure_types_and_levels '
                    'from %s: they will be inferred from the iml_disagg '
                    'dictionary' % job_ini)
        elif 'intensity_measure_types_and_levels' in names_vals:
            self.hazard_imtls = self.intensity_measure_types_and_levels
            delattr(self, 'intensity_measure_types_and_levels')
            lens = set(map(len, self.hazard_imtls.values()))
            if len(lens) > 1:
                dic = {imt: len(ls) for imt, ls in self.hazard_imtls.items()}
                warnings.warn(
                    'Each IMT must have the same number of levels, instead '
                    'you have %s' % dic, DeprecationWarning)
        elif 'intensity_measure_types' in names_vals:
            self.hazard_imtls = dict.fromkeys(self.intensity_measure_types)
            delattr(self, 'intensity_measure_types')
        self._risk_files = get_risk_files(self.inputs)

        self.check_source_model()
        if self.hazard_precomputed() and self.job_type == 'risk':
            self.check_missing('site_model', 'debug')
            self.check_missing('gsim_logic_tree', 'debug')
            self.check_missing('source_model_logic_tree', 'debug')

        # check the gsim_logic_tree
        if self.inputs.get('gsim_logic_tree'):
            if self.gsim != '[FromFile]':
                raise InvalidFile('%s: if `gsim_logic_tree_file` is set, there'
                                  ' must be no `gsim` key' % job_ini)
            path = os.path.join(self.base_path, self.inputs['gsim_logic_tree'])
            gsim_lt = logictree.GsimLogicTree(path, ['*'])

            # check the number of branchsets
            branchsets = len(gsim_lt._ltnode)
            if 'scenario' in self.calculation_mode and branchsets > 1:
                raise InvalidFile(
                    '%s: %s for a scenario calculation must contain a single '
                    'branchset, found %d!' % (job_ini, path, branchsets))

            # check the IMTs vs the GSIMs
            self._gsims_by_trt = gsim_lt.values
            for gsims in gsim_lt.values.values():
                self.check_gsims(gsims)
        elif self.gsim is not None:
            self.check_gsims([valid.gsim(self.gsim, self.base_path)])

        # check inputs
        unknown = set(self.inputs) - self.KNOWN_INPUTS
        if unknown:
            raise ValueError('Unknown key %s_file in %s' %
                             (unknown.pop(), self.inputs['job_ini']))

        # checks for disaggregation
        if self.calculation_mode == 'disaggregation':
            if not self.poes_disagg and not self.iml_disagg:
                raise InvalidFile('poes_disagg or iml_disagg must be set '
                                  'in %(job_ini)s' % self.inputs)
            elif self.poes_disagg and self.iml_disagg:
                raise InvalidFile(
                    '%s: iml_disagg and poes_disagg cannot be set '
                    'at the same time' % job_ini)
            for k in ('mag_bin_width', 'distance_bin_width',
                      'coordinate_bin_width', 'num_epsilon_bins'):
                if k not in vars(self):
                    raise InvalidFile('%s must be set in %s' % (k, job_ini))

        # checks for classical_damage
        if self.calculation_mode == 'classical_damage':
            if self.conditional_loss_poes:
                raise InvalidFile('%s: conditional_loss_poes are not defined '
                                  'for classical_damage calculations' %
                                  job_ini)

        # checks for event_based_risk
        if (self.calculation_mode == 'event_based_risk'
                and self.asset_correlation not in (0, 1)):
            raise ValueError('asset_correlation != {0, 1} is no longer'
                             ' supported')

        # checks for ebrisk
        if self.calculation_mode == 'ebrisk':
            if self.risk_investigation_time is None:
                raise InvalidFile('Please set the risk_investigation_time in'
                                  ' %s' % job_ini)

        # check for GMFs from file
        if (self.inputs.get('gmfs', '').endswith('.csv')
                and 'sites' not in self.inputs and self.sites is None):
            raise InvalidFile('%s: You forgot sites|sites_csv' % job_ini)
        elif self.inputs.get('gmfs', '').endswith('.xml'):
            raise InvalidFile('%s: GMFs in XML are not supported anymore' %
                              job_ini)

        # checks for event_based
        if 'event_based' in self.calculation_mode:
            if self.ses_per_logic_tree_path >= TWO32:
                raise ValueError('ses_per_logic_tree_path too big: %d' %
                                 self.ses_per_logic_tree_path)
            if self.number_of_logic_tree_samples >= TWO16:
                raise ValueError('number_of_logic_tree_samples too big: %d' %
                                 self.number_of_logic_tree_samples)

        # check grid + sites
        if self.region_grid_spacing and ('sites' in self.inputs or self.sites):
            raise ValueError('You are specifying grid and sites at the same '
                             'time: which one do you want?')

        # check for amplification
        if ('amplification' in self.inputs and self.imtls
                and self.calculation_mode
                in ['classical', 'classical_risk', 'disaggregation']):
            check_same_levels(self.imtls)
Esempio n. 9
0
def get_source_models(oqparam, gsim_lt, source_model_lt, in_memory=True):
    """
    Build all the source models generated by the logic tree.

    :param oqparam:
        an :class:`openquake.commonlib.oqvalidation.OqParam` instance
    :param gsim_lt:
        a :class:`openquake.commonlib.logictree.GsimLogicTree` instance
    :param source_model_lt:
        a :class:`openquake.commonlib.logictree.SourceModelLogicTree` instance
    :param in_memory:
        if True, keep in memory the sources, else just collect the TRTs
    :returns:
        an iterator over :class:`openquake.commonlib.logictree.SourceModel`
        tuples
    """
    converter = sourceconverter.SourceConverter(
        oqparam.investigation_time, oqparam.rupture_mesh_spacing,
        oqparam.complex_fault_mesh_spacing, oqparam.width_of_mfd_bin,
        oqparam.area_source_discretization)
    psr = nrml.SourceModelParser(converter)

    # consider only the effective realizations
    for sm in source_model_lt.gen_source_models(gsim_lt):
        src_groups = []
        for name in sm.name.split():
            fname = possibly_gunzip(
                os.path.abspath(os.path.join(oqparam.base_path, name)))
            if in_memory:
                apply_unc = source_model_lt.make_apply_uncertainties(sm.path)
                try:
                    logging.info('Parsing %s', fname)
                    src_groups.extend(psr.parse_src_groups(fname, apply_unc))
                except ValueError as e:
                    if str(e) in ('Surface does not conform with Aki & '
                                  'Richards convention',
                                  'Edges points are not in the right order'):
                        raise InvalidFile('''\
        %s: %s. Probably you are using an obsolete model.
        In that case you can fix the file with the command
        python -m openquake.engine.tools.correct_complex_sources %s
        ''' % (fname, e, fname))
                    else:
                        raise
            else:  # just collect the TRT models
                smodel = nrml.read(fname).sourceModel
                if smodel[0].tag.endswith('sourceGroup'):  # NRML 0.5 format
                    for sg_node in smodel:
                        sg = sourceconverter.SourceGroup(
                            sg_node['tectonicRegion'])
                        sg.sources = sg_node.nodes
                        src_groups.append(sg)
                else:  # NRML 0.4 format: smodel is a list of source nodes
                    src_groups.extend(
                        sourceconverter.SourceGroup.collect(smodel))
        num_sources = sum(len(sg.sources) for sg in src_groups)
        sm.src_groups = src_groups
        trts = [mod.trt for mod in src_groups]
        source_model_lt.tectonic_region_types.update(trts)
        logging.info(
            'Processed source model %d with %d potential gsim path(s) and %d '
            'sources', sm.ordinal + 1, sm.num_gsim_paths, num_sources)

        gsim_file = oqparam.inputs.get('gsim_logic_tree')
        if gsim_file:  # check TRTs
            for src_group in src_groups:
                if src_group.trt not in gsim_lt.values:
                    raise ValueError(
                        "Found in %r a tectonic region type %r inconsistent "
                        "with the ones in %r" % (sm, src_group.trt, gsim_file))
        yield sm

    # log if some source file is being used more than once
    for fname, hits in psr.fname_hits.items():
        if hits > 1:
            logging.info('%s has been considered %d times', fname, hits)
Esempio n. 10
0
def get_input_files(oqparam, hazard=False):
    """
    :param oqparam: an OqParam instance
    :param hazard: if True, consider only the hazard files
    :returns: input path names in a specific order
    """
    fnames = set()  # files entering in the checksum
    uri = oqparam.shakemap_uri
    if isinstance(uri, dict) and uri:
        if uri['kind'] == 'shapefile':
            fname = os.path.join(oqparam.base_path, uri['fname'])
            fnames.update(get_shapefiles(os.path.dirname(fname)))
        else:  # xml local files
            for key, val in uri.items():
                if key == 'fname' or (key.endswith('_url')
                                      and os.path.exists(val)):
                    fname = os.path.join(oqparam.base_path, val)
                    uri[key] = fname
                    fnames.add(fname)
    for key in oqparam.inputs:
        fname = oqparam.inputs[key]
        if hazard and key not in ('source_model_logic_tree', 'gsim_logic_tree',
                                  'source'):
            continue
        # collect .hdf5 tables for the GSIMs, if any
        elif key == 'gsim_logic_tree':
            gsim_lt = get_gsim_lt(oqparam)
            for gsims in gsim_lt.values.values():
                for gsim in gsims:
                    for k, v in gsim.kwargs.items():
                        if k.endswith(('_file', '_table')):
                            fnames.add(v)
            fnames.add(fname)
        elif key == 'source_model':  # UCERF
            f = oqparam.inputs['source_model']
            fnames.add(f)
            fname = nrml.read(f).sourceModel.UCERFSource['filename']
            fnames.add(os.path.join(os.path.dirname(f), fname))
        elif key == 'exposure':  # fname is a list
            for exp in asset.Exposure.read_headers(fname):
                fnames.update(exp.datafiles)
            fnames.update(fname)
        elif isinstance(fname, dict):
            fnames.update(fname.values())
        elif isinstance(fname, list):
            for f in fname:
                if f == oqparam.input_dir:
                    raise InvalidFile('%s there is an empty path in %s' %
                                      (oqparam.inputs['job_ini'], key))
            fnames.update(fname)
        elif key == 'source_model_logic_tree':
            args = (fname, oqparam.random_seed,
                    oqparam.number_of_logic_tree_samples,
                    oqparam.sampling_method)
            try:
                smlt = smlt_cache[args]
            except KeyError:
                smlt = smlt_cache[args] = logictree.SourceModelLogicTree(*args)
            fnames.update(smlt.hdf5_files)
            fnames.update(smlt.info.smpaths)
            fnames.add(fname)
        else:
            fnames.add(fname)
    return sorted(fnames)
Esempio n. 11
0
 def pre_execute(self):
     """
     Check if there is a previous calculation ID.
     If yes, read the inputs by retrieving the previous calculation;
     if not, read the inputs directly.
     """
     oq = self.oqparam
     if 'gmfs' in oq.inputs or 'multi_peril' in oq.inputs:
         # read hazard from files
         assert not oq.hazard_calculation_id, (
             'You cannot use --hc together with gmfs_file')
         self.read_inputs()
         if 'gmfs' in oq.inputs:
             if not oq.inputs['gmfs'].endswith('.csv'):
                 raise NotImplementedError('Importer for %s' %
                                           oq.inputs['gmfs'])
             E = len(
                 import_gmfs(self.datastore, oq.inputs['gmfs'],
                             self.sitecol.complete.sids))
             if hasattr(oq, 'number_of_ground_motion_fields'):
                 if oq.number_of_ground_motion_fields != E:
                     raise RuntimeError(
                         'Expected %d ground motion fields, found %d' %
                         (oq.number_of_ground_motion_fields, E))
             else:  # set the number of GMFs from the file
                 oq.number_of_ground_motion_fields = E
         else:
             self.save_multi_peril()
     elif 'hazard_curves' in oq.inputs:  # read hazard from file
         assert not oq.hazard_calculation_id, (
             'You cannot use --hc together with hazard_curves')
         haz_sitecol = readinput.get_site_collection(oq)
         # NB: horrible: get_site_collection calls get_pmap_from_nrml
         # that sets oq.investigation_time, so it must be called first
         self.load_riskmodel()  # must be after get_site_collection
         self.read_exposure(haz_sitecol)  # define .assets_by_site
         self.datastore['poes/grp-00'] = fix_ones(readinput.pmap)
         self.datastore['sitecol'] = self.sitecol
         self.datastore['assetcol'] = self.assetcol
         self.datastore['csm_info'] = fake = source.CompositionInfo.fake()
         self.rlzs_assoc = fake.get_rlzs_assoc()
         self.datastore['rlzs_by_grp'] = self.rlzs_assoc.by_grp()
     elif oq.hazard_calculation_id:
         parent = util.read(oq.hazard_calculation_id)
         self.check_precalc(parent['oqparam'].calculation_mode)
         self.datastore.parent = parent
         # copy missing parameters from the parent
         if 'concurrent_tasks' not in vars(self.oqparam):
             self.oqparam.concurrent_tasks = (
                 self.oqparam.__class__.concurrent_tasks.default)
         params = {
             name: value
             for name, value in vars(parent['oqparam']).items()
             if name not in vars(self.oqparam)
         }
         self.save_params(**params)
         self.read_inputs()
         oqp = parent['oqparam']
         if oqp.investigation_time != oq.investigation_time:
             raise ValueError(
                 'The parent calculation was using investigation_time=%s'
                 ' != %s' % (oqp.investigation_time, oq.investigation_time))
         if oqp.minimum_intensity != oq.minimum_intensity:
             raise ValueError(
                 'The parent calculation was using minimum_intensity=%s'
                 ' != %s' % (oqp.minimum_intensity, oq.minimum_intensity))
         hstats, rstats = list(oqp.hazard_stats()), list(oq.hazard_stats())
         if hstats != rstats:
             raise ValueError('The parent calculation had stats %s != %s' %
                              (hstats, rstats))
         missing_imts = set(oq.risk_imtls) - set(oqp.imtls)
         if missing_imts:
             raise ValueError(
                 'The parent calculation is missing the IMT(s) %s' %
                 ', '.join(missing_imts))
     elif self.__class__.precalc:
         calc = calculators[self.__class__.precalc](self.oqparam,
                                                    self.datastore.calc_id)
         calc.run()
         self.param = calc.param
         self.sitecol = calc.sitecol
         if hasattr(calc, 'assetcol'):
             self.assetcol = calc.assetcol
         if hasattr(calc, 'riskmodel'):
             self.riskmodel = calc.riskmodel
         if hasattr(calc, 'rlzs_assoc'):
             self.rlzs_assoc = calc.rlzs_assoc
         else:
             # this happens for instance for a scenario_damage without
             # rupture, gmfs, multi_peril
             raise InvalidFile(
                 '%(job_ini)s: missing gmfs_csv, multi_peril_csv' %
                 oq.inputs)
         if hasattr(calc, 'csm'):  # no scenario
             self.csm = calc.csm
     else:
         self.read_inputs()
     if self.riskmodel:
         self.save_riskmodel()
Esempio n. 12
0
def _get_exposure(fname, stop=None):
    """
    :param fname:
        path of the XML file containing the exposure
    :param stop:
        node at which to stop parsing (or None)
    :returns:
        a pair (Exposure instance, list of asset nodes)
    """
    [exposure] = nrml.read(fname, stop=stop)
    if not exposure.tag.endswith('exposureModel'):
        raise InvalidFile('%s: expected exposureModel, got %s' %
                          (fname, exposure.tag))
    description = exposure.description
    try:
        conversions = exposure.conversions
    except AttributeError:
        conversions = Node('conversions', nodes=[Node('costTypes', [])])
    try:
        inslimit = conversions.insuranceLimit
    except AttributeError:
        inslimit = Node('insuranceLimit', text=True)
    try:
        deductible = conversions.deductible
    except AttributeError:
        deductible = Node('deductible', text=True)
    try:
        area = conversions.area
    except AttributeError:
        # NB: the area type cannot be an empty string because when sending
        # around the CostCalculator object we would run into this numpy bug
        # about pickling dictionaries with empty strings:
        # https://github.com/numpy/numpy/pull/5475
        area = Node('area', dict(type='?'))
    try:
        occupancy_periods = exposure.occupancyPeriods.text or ''
    except AttributeError:
        occupancy_periods = ''
    try:
        tagNames = exposure.tagNames
    except AttributeError:
        tagNames = Node('tagNames', text='')
    tagnames = ~tagNames or []
    tagnames.insert(0, 'taxonomy')

    # read the cost types and make some check
    cost_types = []
    retrofitted = False
    for ct in conversions.costTypes:
        with context(fname, ct):
            ctname = ct['name']
            if ctname == 'structural' and 'retrofittedType' in ct.attrib:
                if ct['retrofittedType'] != ct['type']:
                    raise ValueError(
                        'The retrofittedType %s is different from the type'
                        '%s' % (ct['retrofittedType'], ct['type']))
                if ct['retrofittedUnit'] != ct['unit']:
                    raise ValueError(
                        'The retrofittedUnit %s is different from the unit'
                        '%s' % (ct['retrofittedUnit'], ct['unit']))
                retrofitted = True
            cost_types.append(
                (ctname, valid.cost_type_type(ct['type']), ct['unit']))
    if 'occupants' in cost_types:
        cost_types.append(('occupants', 'per_area', 'people'))
    cost_types.sort(key=operator.itemgetter(0))
    cost_types = numpy.array(cost_types, cost_type_dt)
    insurance_limit_is_absolute = il = inslimit.get('isAbsolute')
    deductible_is_absolute = de = deductible.get('isAbsolute')
    cc = CostCalculator(
        {},
        {},
        {},
        True if de is None else de,
        True if il is None else il,
        {name: i
         for i, name in enumerate(tagnames)},
    )
    for ct in cost_types:
        name = ct['name']  # structural, nonstructural, ...
        cc.cost_types[name] = ct['type']  # aggregated, per_asset, per_area
        cc.area_types[name] = area['type']
        cc.units[name] = ct['unit']
    assets = []
    asset_refs = []
    exp = Exposure(exposure['id'], exposure['category'], description.text,
                   cost_types, occupancy_periods, insurance_limit_is_absolute,
                   deductible_is_absolute, retrofitted, area.attrib, assets,
                   asset_refs, cc, TagCollection(tagnames))
    return exp, exposure.assets
Esempio n. 13
0
def info(calculators, gsims, views, exports, extracts, parameters,
         report, input_file=''):
    """
    Give information. You can pass the name of an available calculator,
    a job.ini file, or a zip archive with the input files.
    """
    if calculators:
        for calc in sorted(base.calculators):
            print(calc)
    if gsims:
        for gs in gsim.get_available_gsims():
            print(gs)
    if views:
        for name in sorted(view):
            print(name)
    if exports:
        dic = groupby(export, operator.itemgetter(0),
                      lambda group: [r[1] for r in group])
        n = 0
        for exporter, formats in dic.items():
            print(exporter, formats)
            n += len(formats)
        print('There are %d exporters defined.' % n)
    if extracts:
        for key in extract:
            func = extract[key]
            if hasattr(func, '__wrapped__'):
                fm = FunctionMaker(func.__wrapped__)
            else:
                fm = FunctionMaker(func)
            print('%s(%s)%s' % (fm.name, fm.signature, fm.doc))
    if parameters:
        params = []
        for val in vars(OqParam).values():
            if hasattr(val, 'name'):
                params.append(val)
        params.sort(key=lambda x: x.name)
        for param in params:
            print(param.name)
    if os.path.isdir(input_file) and report:
        with Monitor('info', measuremem=True) as mon:
            with mock.patch.object(logging.root, 'info'):  # reduce logging
                do_build_reports(input_file)
        print(mon)
    elif input_file.endswith('.xml'):
        node = nrml.read(input_file)
        if node[0].tag.endswith('sourceModel'):
            if node['xmlns'].endswith('nrml/0.4'):
                raise InvalidFile(
                    '%s is in NRML 0.4 format, please run the following '
                    'command:\noq upgrade_nrml %s' % (
                        input_file, os.path.dirname(input_file) or '.'))
            print(source_model_info([node[0]]))
        elif node[0].tag.endswith('logicTree'):
            nodes = [nrml.read(sm_path)[0]
                     for sm_path in logictree.collect_info(input_file).smpaths]
            print(source_model_info(nodes))
        else:
            print(node.to_str())
    elif input_file.endswith(('.ini', '.zip')):
        with Monitor('info', measuremem=True) as mon:
            if report:
                print('Generated', reportwriter.build_report(input_file))
            else:
                print_csm_info(input_file)
        if mon.duration > 1:
            print(mon)
    elif input_file:
        print("No info for '%s'" % input_file)
Esempio n. 14
0
    def execute(self):
        oq = self.oqparam
        self.set_param()
        self.offset = 0
        srcfilter = self.src_filter(self.datastore.tempname)
        self.indices = AccumDict(accum=[])  # sid, idx -> indices
        if oq.hazard_calculation_id:  # from ruptures
            self.datastore.parent = util.read(oq.hazard_calculation_id)
            self.init_logic_tree(self.datastore.parent['csm_info'])
        else:  # from sources
            self.build_events_from_sources(srcfilter)
            if (oq.ground_motion_fields is False
                    and oq.hazard_curves_from_gmfs is False):
                return {}
        if not oq.imtls:
            raise InvalidFile('There are no intensity measure types in %s' %
                              oq.inputs['job_ini'])
        N = len(self.sitecol.complete)
        if oq.ground_motion_fields:
            nrups = len(self.datastore['ruptures'])
            self.datastore.create_dset('gmf_data/data', oq.gmf_data_dt())
            self.datastore.create_dset('gmf_data/sigma_epsilon',
                                       sig_eps_dt(oq.imtls))
            self.datastore.create_dset('gmf_data/indices',
                                       hdf5.vuint32,
                                       shape=(N, 2),
                                       fillvalue=None)
            self.datastore.create_dset('gmf_data/events_by_sid', U32, (N, ))
            self.datastore.create_dset('gmf_data/time_by_rup',
                                       time_dt, (nrups, ),
                                       fillvalue=None)
        if oq.hazard_curves_from_gmfs:
            self.param['rlz_by_event'] = self.datastore['events']['rlz_id']

        # compute_gmfs in parallel
        self.datastore.swmr_on()
        logging.info('Reading %d ruptures', len(self.datastore['ruptures']))
        iterargs = ((rgetter, srcfilter, self.param)
                    for rgetter in gen_rupture_getters(self.datastore,
                                                       srcfilter=srcfilter))
        acc = parallel.Starmap(self.core_task.__func__,
                               iterargs,
                               h5=self.datastore.hdf5,
                               num_cores=oq.num_cores).reduce(
                                   self.agg_dicts, self.acc0())

        if self.indices:
            dset = self.datastore['gmf_data/indices']
            num_evs = self.datastore['gmf_data/events_by_sid']
            logging.info('Saving gmf_data/indices')
            with self.monitor('saving gmf_data/indices', measuremem=True):
                self.datastore['gmf_data/imts'] = ' '.join(oq.imtls)
                for sid in self.sitecol.complete.sids:
                    start = numpy.array(self.indices[sid, 0])
                    stop = numpy.array(self.indices[sid, 1])
                    dset[sid, 0] = start
                    dset[sid, 1] = stop
                    num_evs[sid] = (stop - start).sum()
            avg_events_by_sid = num_evs[()].sum() / N
            logging.info('Found ~%d GMVs per site', avg_events_by_sid)
        elif oq.ground_motion_fields:
            raise RuntimeError('No GMFs were generated, perhaps they were '
                               'all below the minimum_intensity threshold')
        return acc
Esempio n. 15
0
def get_gmfs(oqparam):
    """
    :param oqparam:
        an :class:`openquake.commonlib.oqvalidation.OqParam` instance
    :returns:
        sitecol, eids, gmf array of shape (R, N, E, M)
    """
    M = len(oqparam.imtls)
    fname = oqparam.inputs['gmfs']
    if fname.endswith('.csv'):
        array = writers.read_composite_array(fname).array
        R = len(numpy.unique(array['rlzi']))
        if R > 1:
            raise InvalidFile('%s: found %d realizations, currently only one '
                              'realization is supported' % (fname, R))
        # the array has the structure rlzi, sid, eid, gmv_PGA, gmv_...
        dtlist = [(name, array.dtype[name]) for name in array.dtype.names[:3]]
        required_imts = list(oqparam.imtls)
        imts = [name[4:] for name in array.dtype.names[3:]]
        if imts != required_imts:
            raise ValueError('Required %s, but %s contains %s' % (
                required_imts, fname, imts))
        dtlist.append(('gmv', (F32, M)))
        eids = numpy.unique(array['eid'])
        E = len(eids)
        found_eids = set(eids)
        expected_eids = set(range(E))  # expected incremental eids
        missing_eids = expected_eids - found_eids
        if missing_eids:
            raise InvalidFile('Missing eids in the gmfs.csv file: %s'
                              % missing_eids)
        assert expected_eids == found_eids, (expected_eids, found_eids)
        eidx = {eid: e for e, eid in enumerate(eids)}
        sitecol = get_site_collection(oqparam)
        expected_sids = set(sitecol.sids)
        found_sids = set(numpy.unique(array['sid']))
        missing_sids = found_sids - expected_sids
        if missing_sids:
            raise InvalidFile(
                'Found site IDs missing in the sites.csv file: %s' %
                missing_sids)
        N = len(sitecol)
        gmfs = numpy.zeros((R, N, E, M), F32)
        counter = collections.Counter()
        for row in array.view(dtlist):
            key = row['rlzi'], row['sid'], eidx[row['eid']]
            gmfs[key] = row['gmv']
            counter[key] += 1
        dupl = [key for key in counter if counter[key] > 1]
        if dupl:
            raise InvalidFile('Duplicated (rlzi, sid, eid) in the GMFs file: '
                              '%s' % dupl)
    elif fname.endswith('.xml'):
        eids, gmfs_by_imt = get_scenario_from_nrml(oqparam, fname)
        N, E = gmfs_by_imt.shape
        gmfs = numpy.zeros((1, N, E, M), F32)
        for imti, imtstr in enumerate(oqparam.imtls):
            gmfs[0, :, :, imti] = gmfs_by_imt[imtstr]
    else:
        raise NotImplemented('Reading from %s' % fname)
    return eids, gmfs
Esempio n. 16
0
    def __init__(self, **names_vals):
        for name in list(names_vals):
            if name == 'quantile_hazard_curves':
                names_vals['quantiles'] = names_vals.pop(name)
        super().__init__(**names_vals)
        job_ini = self.inputs['job_ini']
        if 'calculation_mode' not in names_vals:
            raise InvalidFile('Missing calculation_mode in %s' % job_ini)
        if 'region_constraint' in names_vals:
            if 'region' in names_vals:
                raise InvalidFile('You cannot have both region and '
                                  'region_constraint in %s' % job_ini)
            logging.warning(
                'region_constraint is obsolete, use region instead')
            self.region = valid.wkt_polygon(
                names_vals.pop('region_constraint'))
        self.risk_investigation_time = (self.risk_investigation_time
                                        or self.investigation_time)
        if ('intensity_measure_types_and_levels' in names_vals
                and 'intensity_measure_types' in names_vals):
            logging.warning('Ignoring intensity_measure_types since '
                            'intensity_measure_types_and_levels is set')
        if 'iml_disagg' in names_vals:
            self.hazard_imtls = self.iml_disagg
            if 'intensity_measure_types_and_levels' in names_vals:
                raise InvalidFile(
                    'Please remove the intensity_measure_types_and_levels '
                    'from %s: they will be inferred from the iml_disagg '
                    'dictionary' % job_ini)
        elif 'intensity_measure_types_and_levels' in names_vals:
            self.hazard_imtls = self.intensity_measure_types_and_levels
            delattr(self, 'intensity_measure_types_and_levels')
        elif 'intensity_measure_types' in names_vals:
            self.hazard_imtls = dict.fromkeys(self.intensity_measure_types)
            delattr(self, 'intensity_measure_types')
        self._file_type, self._risk_files = get_risk_files(self.inputs)

        self.check_source_model()
        if (self.hazard_calculation_id
                and self.calculation_mode == 'ucerf_risk'):
            raise ValueError('You cannot use the --hc option with ucerf_risk')
        if self.hazard_precomputed() and self.job_type == 'risk':
            self.check_missing('site_model', 'debug')
            self.check_missing('gsim_logic_tree', 'debug')
            self.check_missing('source_model_logic_tree', 'debug')

        # check the gsim_logic_tree
        if self.inputs.get('gsim_logic_tree'):
            if self.gsim != '[FromFile]':
                raise InvalidFile('%s: if `gsim_logic_tree_file` is set, there'
                                  ' must be no `gsim` key' % job_ini)
            path = os.path.join(self.base_path, self.inputs['gsim_logic_tree'])
            gsim_lt = logictree.GsimLogicTree(path, ['*'])

            # check the number of branchsets
            branchsets = len(gsim_lt._ltnode)
            if 'scenario' in self.calculation_mode and branchsets > 1:
                raise InvalidFile(
                    '%s: %s for a scenario calculation must contain a single '
                    'branchset, found %d!' % (job_ini, path, branchsets))

            # check the IMTs vs the GSIMs
            self._gsims_by_trt = gsim_lt.values
            for gsims in self._gsims_by_trt.values():
                self.check_gsims(gsims)
        elif self.gsim is not None:
            self.check_gsims([valid.gsim(self.gsim)])

        # checks for disaggregation
        if self.calculation_mode == 'disaggregation':
            if not self.poes_disagg and not self.iml_disagg:
                raise InvalidFile('poes_disagg or iml_disagg must be set '
                                  'in %(job_ini)s' % self.inputs)
            elif self.poes_disagg and self.iml_disagg:
                raise InvalidFile(
                    '%s: iml_disagg and poes_disagg cannot be set '
                    'at the same time' % job_ini)
            for k in ('mag_bin_width', 'distance_bin_width',
                      'coordinate_bin_width', 'num_epsilon_bins'):
                if k not in vars(self):
                    raise InvalidFile('%s must be set in %s' % (k, job_ini))

        # checks for classical_damage
        if self.calculation_mode == 'classical_damage':
            if self.conditional_loss_poes:
                raise InvalidFile('%s: conditional_loss_poes are not defined '
                                  'for classical_damage calculations' %
                                  job_ini)

        # checks for event_based_risk
        if (self.calculation_mode == 'event_based_risk'
                and self.asset_correlation not in (0, 1)):
            raise ValueError('asset_correlation != {0, 1} is no longer'
                             ' supported')

        # checks for ebrisk
        if self.calculation_mode == 'ebrisk':
            if self.insured_losses:
                raise ValueError('ebrisk does not support insured losses')
            elif self.number_of_logic_tree_samples == 0:
                logging.warning('ebrisk is not meant for full enumeration')

        # check for GMFs from file
        if (self.inputs.get('gmfs', '').endswith('.csv') and not self.sites
                and 'sites' not in self.inputs):
            raise InvalidFile('%s: You forgot sites|sites_csv' % job_ini)
        elif (self.inputs.get('gmfs', '').endswith('.xml')
              and 'sites' in self.inputs):
            raise InvalidFile('%s: You cannot have both sites_csv and '
                              'gmfs_file' % job_ini)

        # checks for event_based
        if 'event_based' in self.calculation_mode:
            if self.ses_per_logic_tree_path >= TWO32:
                raise ValueError('ses_per_logic_tree_path too big: %d' %
                                 self.ses_per_logic_tree_path)
            if self.number_of_logic_tree_samples >= TWO16:
                raise ValueError('number_of_logic_tree_samples too big: %d' %
                                 self.number_of_logic_tree_samples)

        # check grid + sites
        if (self.region_grid_spacing and 'site_model' in self.inputs
                and 'exposure' in self.inputs):
            logging.warning(
                'You are specifying a grid, a site model and an exposure at '
                'the same time: consider using `oq prepare_site_model`')
Esempio n. 17
0
def ffconvert(fname, limit_states, ff, min_iml=1E-10):
    """
    Convert a fragility function into a numpy array plus a bunch
    of attributes.

    :param fname: path to the fragility model file
    :param limit_states: expected limit states
    :param ff: fragility function node
    :returns: a pair (array, dictionary)
    """
    with context(fname, ff):
        ffs = ff[1:]
        imls = ff.imls
    nodamage = imls.attrib.get('noDamageLimit')
    if nodamage == 0:
        # use a cutoff to avoid log(0) in GMPE.to_distribution_values
        logging.warning('Found a noDamageLimit=0 in %s, line %s, '
                        'using %g instead', fname, ff.lineno, min_iml)
        nodamage = min_iml
    with context(fname, imls):
        attrs = dict(format=ff['format'],
                     imt=imls['imt'],
                     id=ff['id'],
                     nodamage=nodamage)

    LS = len(limit_states)
    if LS != len(ffs):
        with context(fname, ff):
            raise InvalidFile('expected %d limit states, found %d' %
                              (LS, len(ffs)))
    if ff['format'] == 'continuous':
        minIML = float(imls['minIML'])
        if minIML == 0:
            # use a cutoff to avoid log(0) in GMPE.to_distribution_values
            logging.warning('Found minIML=0 in %s, line %s, using %g instead',
                            fname, imls.lineno, min_iml)
            minIML = min_iml
        attrs['minIML'] = minIML
        attrs['maxIML'] = float(imls['maxIML'])
        array = numpy.zeros(LS, [('mean', F64), ('stddev', F64)])
        for i, ls, node in zip(range(LS), limit_states, ff[1:]):
            if ls != node['ls']:
                with context(fname, node):
                    raise InvalidFile('expected %s, found' %
                                      (ls, node['ls']))
            array['mean'][i] = node['mean']
            array['stddev'][i] = node['stddev']
    elif ff['format'] == 'discrete':
        attrs['imls'] = ~imls
        valid.check_levels(attrs['imls'], attrs['imt'], min_iml)
        num_poes = len(attrs['imls'])
        array = numpy.zeros((LS, num_poes))
        for i, ls, node in zip(range(LS), limit_states, ff[1:]):
            with context(fname, node):
                if ls != node['ls']:
                    raise InvalidFile('expected %s, found' %
                                      (ls, node['ls']))
                poes = (~node if isinstance(~node, list)
                        else valid.probabilities(~node))
                if len(poes) != num_poes:
                    raise InvalidFile('expected %s, found' %
                                      (num_poes, len(poes)))
                array[i, :] = poes
    # NB: the format is constrained in nrml.FragilityNode to be either
    # discrete or continuous, there is no third option
    return array, attrs
Esempio n. 18
0
def get_mesh(oqparam):
    """
    Extract the mesh of points to compute from the sites,
    the sites_csv, the region, the site model, the exposure in this order.

    :param oqparam:
        an :class:`openquake.commonlib.oqvalidation.OqParam` instance
    """
    global pmap, exposure, gmfs, eids
    if 'exposure' in oqparam.inputs and exposure is None:
        # read it only once
        exposure = get_exposure(oqparam)
    if oqparam.sites:
        return geo.Mesh.from_coords(oqparam.sites)
    elif 'sites' in oqparam.inputs:
        fname = oqparam.inputs['sites']
        header = get_csv_header(fname)
        if 'lon' in header:
            data = []
            for i, row in enumerate(
                    csv.DictReader(open(fname, encoding='utf-8-sig'))):
                if header[0] == 'site_id' and row['site_id'] != str(i):
                    raise InvalidFile('%s: expected site_id=%d, got %s' %
                                      (fname, i, row['site_id']))
                data.append(' '.join([row['lon'], row['lat']]))
        elif 'gmfs' in oqparam.inputs:
            raise InvalidFile('Missing header in %(sites)s' % oqparam.inputs)
        else:
            data = [
                line.replace(',', ' ')
                for line in open(fname, encoding='utf-8-sig')
            ]
        coords = valid.coordinates(','.join(data))
        start, stop = oqparam.sites_slice
        c = (coords[start:stop]
             if header[0] == 'site_id' else sorted(coords[start:stop]))
        # NB: Notice the sort=False below
        # Calculations starting from ground motion fields input by the user
        # require at least two input files related to the gmf data:
        #   1. A sites.csv file, listing {site_id, lon, lat} tuples
        #   2. A gmfs.csv file, listing {event_id, site_id, gmv[IMT1],
        #      gmv[IMT2], ...} tuples
        # The site coordinates defined in the sites file do not need to be in
        # sorted order.
        # We must only ensure uniqueness of the provided site_ids and
        # coordinates.
        # When creating the site mesh from the site coordinates read from
        # the csv file, the sort=False flag maintains the user-specified
        # site_ids instead of reassigning them after sorting.
        return geo.Mesh.from_coords(c, sort=False)
    elif 'hazard_curves' in oqparam.inputs:
        fname = oqparam.inputs['hazard_curves']
        if isinstance(fname, list):  # for csv
            mesh, pmap = get_pmap_from_csv(oqparam, fname)
        else:
            raise NotImplementedError('Reading from %s' % fname)
        return mesh
    elif oqparam.region_grid_spacing:
        if oqparam.region:
            poly = geo.Polygon.from_wkt(oqparam.region)
        elif exposure:
            # in case of implicit grid the exposure takes precedence over
            # the site model
            poly = exposure.mesh.get_convex_hull()
        elif 'site_model' in oqparam.inputs:
            # this happens in event_based/case_19, where there is an implicit
            # grid over the site model
            sm = get_site_model(oqparam)
            poly = geo.Mesh(sm['lon'], sm['lat']).get_convex_hull()
        else:
            raise InvalidFile('There is a grid spacing but not a region, '
                              'nor a site model, nor an exposure in %s' %
                              oqparam.inputs['job_ini'])
        try:
            logging.info('Inferring the hazard grid from the exposure')
            mesh = poly.dilate(oqparam.region_grid_spacing).discretize(
                oqparam.region_grid_spacing)
            return geo.Mesh.from_coords(zip(mesh.lons, mesh.lats))
        except Exception:
            raise ValueError('Could not discretize region with grid spacing '
                             '%(region_grid_spacing)s' % vars(oqparam))
    # the site model has the precedence over the exposure, see the
    # discussion in https://github.com/gem/oq-engine/pull/5217
    elif 'site_model' in oqparam.inputs:
        logging.info('Extracting the hazard sites from the site model')
        sm = get_site_model(oqparam)
        return geo.Mesh(sm['lon'], sm['lat'])
    elif 'exposure' in oqparam.inputs:
        return exposure.mesh
Esempio n. 19
0
def get_vulnerability_functions_05(node, fname):
    """
    :param node:
        a vulnerabilityModel node
    :param fname:
        path of the vulnerability filter
    :returns:
        a dictionary imt, vf_id -> vulnerability function
    """
    # NB: the IMTs can be duplicated and with different levels, each
    # vulnerability function in a set will get its own levels
    vf_ids = set()
    vmodel = scientific.VulnerabilityModel(**node.attrib)
    # imt, vf_id -> vulnerability function
    for vfun in node.getnodes('vulnerabilityFunction'):
        with context(fname, vfun):
            imt = vfun.imls['imt']
            imls = numpy.array(~vfun.imls)
            vf_id = vfun['id']
        if vf_id in vf_ids:
            raise InvalidFile(
                'Duplicated vulnerabilityFunctionID: %s: %s, line %d' %
                (vf_id, fname, vfun.lineno))
        vf_ids.add(vf_id)
        num_probs = None
        if vfun['dist'] == 'PM':
            loss_ratios, probs = [], []
            for probabilities in vfun[1:]:
                loss_ratios.append(probabilities['lr'])
                probs.append(valid.probabilities(~probabilities))
                if num_probs is None:
                    num_probs = len(probs[-1])
                elif len(probs[-1]) != num_probs:
                    raise ValueError(
                        'Wrong number of probabilities (expected %d, '
                        'got %d) in %s, line %d' %
                        (num_probs, len(probs[-1]), fname,
                         probabilities.lineno))
            all_probs = numpy.array(probs)
            assert all_probs.shape == (len(loss_ratios), len(imls)), (
                len(loss_ratios), len(imls))
            vmodel[imt, vf_id] = (
                scientific.VulnerabilityFunctionWithPMF(
                    vf_id, imt, imls, numpy.array(loss_ratios),
                    all_probs))
            # the seed will be set by readinput.get_crmodel
        else:
            with context(fname, vfun):
                loss_ratios = ~vfun.meanLRs
                coefficients = ~vfun.covLRs
            if len(loss_ratios) != len(imls):
                raise InvalidFile(
                    'There are %d loss ratios, but %d imls: %s, line %d' %
                    (len(loss_ratios), len(imls), fname,
                     vfun.meanLRs.lineno))
            if len(coefficients) != len(imls):
                raise InvalidFile(
                    'There are %d coefficients, but %d imls: %s, '
                    'line %d' % (len(coefficients), len(imls), fname,
                                 vfun.covLRs.lineno))
            with context(fname, vfun):
                vmodel[imt, vf_id] = scientific.VulnerabilityFunction(
                    vf_id, imt, imls, loss_ratios, coefficients,
                    vfun['dist'])
    return vmodel
Esempio n. 20
0
def _get_exposure(fname, stop=None):
    """
    :param fname:
        path of the XML file containing the exposure
    :param stop:
        node at which to stop parsing (or None)
    :returns:
        a pair (Exposure instance, list of asset nodes)
    """
    [exposure] = nrml.read(fname, stop=stop)
    if not exposure.tag.endswith('exposureModel'):
        raise InvalidFile('%s: expected exposureModel, got %s' %
                          (fname, exposure.tag))
    description = exposure.description
    try:
        conversions = exposure.conversions
    except AttributeError:
        conversions = Node('conversions', nodes=[Node('costTypes', [])])
    try:
        area = conversions.area
    except AttributeError:
        # NB: the area type cannot be an empty string because when sending
        # around the CostCalculator object we would run into this numpy bug
        # about pickling dictionaries with empty strings:
        # https://github.com/numpy/numpy/pull/5475
        area = Node('area', dict(type='?'))
    try:
        occupancy_periods = exposure.occupancyPeriods.text or ''
    except AttributeError:
        occupancy_periods = ''
    try:
        tagNames = exposure.tagNames
    except AttributeError:
        tagNames = Node('tagNames', text='')
    tagnames = ['id'] + (~tagNames or [])
    if set(tagnames) & {'taxonomy', 'exposure', 'country'}:
        raise InvalidFile('taxonomy, exposure and country are reserved names '
                          'you cannot use it in <tagNames>: %s' % fname)
    tagnames.insert(0, 'taxonomy')

    # read the cost types and make some check
    cost_types = []
    retrofitted = False
    for ct in conversions.costTypes:
        with context(fname, ct):
            ctname = ct['name']
            if ctname == 'structural' and 'retrofittedType' in ct.attrib:
                if ct['retrofittedType'] != ct['type']:
                    raise ValueError(
                        'The retrofittedType %s is different from the type'
                        '%s' % (ct['retrofittedType'], ct['type']))
                if ct['retrofittedUnit'] != ct['unit']:
                    raise ValueError(
                        'The retrofittedUnit %s is different from the unit'
                        '%s' % (ct['retrofittedUnit'], ct['unit']))
                retrofitted = True
            cost_types.append(
                (ctname, valid.cost_type_type(ct['type']), ct['unit']))
    if 'occupants' in cost_types:
        cost_types.append(('occupants', 'per_area', 'people'))
    cost_types.sort(key=operator.itemgetter(0))
    cost_types = numpy.array(cost_types, cost_type_dt)
    cc = CostCalculator(
        {}, {}, {}, {name: i for i, name in enumerate(tagnames)})
    for ct in cost_types:
        name = ct['name']  # structural, nonstructural, ...
        cc.cost_types[name] = ct['type']  # aggregated, per_asset, per_area
        cc.area_types[name] = area['type']
        cc.units[name] = ct['unit']
    exp = Exposure(
        exposure['id'], exposure['category'],
        description.text, cost_types, occupancy_periods, retrofitted,
        area.attrib, [], [], cc, TagCollection(tagnames))
    assets_text = exposure.assets.text.strip()
    if assets_text:
        # the <assets> tag contains a list of file names
        dirname = os.path.dirname(fname)
        exp.datafiles = [os.path.join(dirname, f) for f in assets_text.split()]
    else:
        exp.datafiles = []
    return exp, exposure.assets
Esempio n. 21
0
    def __init__(self, **names_vals):
        if '_job_id' in names_vals:  # called from engine
            del names_vals['_job_id']

        # support legacy names
        for name in list(names_vals):
            if name == 'quantile_hazard_curves':
                names_vals['quantiles'] = names_vals.pop(name)
            elif name == 'mean_hazard_curves':
                names_vals['mean'] = names_vals.pop(name)
            elif name == 'max':
                names_vals['max'] = names_vals.pop(name)
        super().__init__(**names_vals)
        if 'job_ini' not in self.inputs:
            self.inputs['job_ini'] = '<in-memory>'
        job_ini = self.inputs['job_ini']
        if 'calculation_mode' not in names_vals:
            raise InvalidFile('Missing calculation_mode in %s' % job_ini)
        if 'region_constraint' in names_vals:
            if 'region' in names_vals:
                raise InvalidFile('You cannot have both region and '
                                  'region_constraint in %s' % job_ini)
            logging.warning(
                'region_constraint is obsolete, use region instead')
            self.region = valid.wkt_polygon(
                names_vals.pop('region_constraint'))
        self.risk_investigation_time = (self.risk_investigation_time
                                        or self.investigation_time)
        self.collapse_level = int(self.collapse_level)
        if ('intensity_measure_types_and_levels' in names_vals
                and 'intensity_measure_types' in names_vals):
            logging.warning('Ignoring intensity_measure_types since '
                            'intensity_measure_types_and_levels is set')
        if 'iml_disagg' in names_vals:
            self.iml_disagg.pop('default')
            # normalize things like SA(0.10) -> SA(0.1)
            self.iml_disagg = {
                str(from_string(imt)): val
                for imt, val in self.iml_disagg.items()
            }
            self.hazard_imtls = self.iml_disagg
            if 'intensity_measure_types_and_levels' in names_vals:
                raise InvalidFile(
                    'Please remove the intensity_measure_types_and_levels '
                    'from %s: they will be inferred from the iml_disagg '
                    'dictionary' % job_ini)
        elif 'intensity_measure_types_and_levels' in names_vals:
            self.hazard_imtls = self.intensity_measure_types_and_levels
            delattr(self, 'intensity_measure_types_and_levels')
            lens = set(map(len, self.hazard_imtls.values()))
            if len(lens) > 1:
                dic = {imt: len(ls) for imt, ls in self.hazard_imtls.items()}
                raise ValueError(
                    'Each IMT must have the same number of levels, instead '
                    'you have %s' % dic)
        elif 'intensity_measure_types' in names_vals:
            self.hazard_imtls = dict.fromkeys(self.intensity_measure_types)
            if 'maximum_intensity' in names_vals:
                minint = self.minimum_intensity or {'default': 1E-2}
                for imt in self.hazard_imtls:
                    i1 = calc.filters.getdefault(minint, imt)
                    i2 = calc.filters.getdefault(self.maximum_intensity, imt)
                    self.hazard_imtls[imt] = list(valid.logscale(i1, i2, 20))
            delattr(self, 'intensity_measure_types')
        if ('ps_grid_spacing' in names_vals
                and 'pointsource_distance' not in names_vals):
            raise InvalidFile('%s: ps_grid_spacing requires setting a '
                              'pointsource_distance!' % self.inputs['job_ini'])

        self._risk_files = get_risk_files(self.inputs)

        if self.hazard_precomputed() and self.job_type == 'risk':
            self.check_missing('site_model', 'debug')
            self.check_missing('gsim_logic_tree', 'debug')
            self.check_missing('source_model_logic_tree', 'debug')

        # check investigation_time
        if (self.investigation_time
                and self.calculation_mode.startswith('scenario')):
            raise ValueError('%s: there cannot be investigation_time in %s' %
                             (self.inputs['job_ini'], self.calculation_mode))

        # check the gsim_logic_tree
        if self.inputs.get('gsim_logic_tree'):
            if self.gsim != '[FromFile]':
                raise InvalidFile('%s: if `gsim_logic_tree_file` is set, there'
                                  ' must be no `gsim` key' % job_ini)
            path = os.path.join(self.base_path, self.inputs['gsim_logic_tree'])
            gsim_lt = logictree.GsimLogicTree(path, ['*'])

            # check the IMTs vs the GSIMs
            self._trts = set(gsim_lt.values)
            for gsims in gsim_lt.values.values():
                self.check_gsims(gsims)
        elif self.gsim is not None:
            self.check_gsims([valid.gsim(self.gsim, self.base_path)])

        # check inputs
        unknown = set(self.inputs) - self.KNOWN_INPUTS
        if unknown:
            raise ValueError('Unknown key %s_file in %s' %
                             (unknown.pop(), self.inputs['job_ini']))

        # checks for disaggregation
        if self.calculation_mode == 'disaggregation':
            if not self.poes_disagg and self.poes:
                self.poes_disagg = self.poes
            elif not self.poes and self.poes_disagg:
                self.poes = self.poes_disagg
            elif self.poes != self.poes_disagg:
                raise InvalidFile(
                    'poes_disagg != poes: %s!=%s in %s' %
                    (self.poes_disagg, self.poes, self.inputs['job_ini']))
            if not self.poes_disagg and not self.iml_disagg:
                raise InvalidFile('poes_disagg or iml_disagg must be set '
                                  'in %(job_ini)s' % self.inputs)
            elif self.poes_disagg and self.iml_disagg:
                raise InvalidFile(
                    '%s: iml_disagg and poes_disagg cannot be set '
                    'at the same time' % job_ini)
            for k in ('mag_bin_width', 'distance_bin_width',
                      'coordinate_bin_width', 'num_epsilon_bins'):
                if k not in vars(self):
                    raise InvalidFile('%s must be set in %s' % (k, job_ini))
            if self.disagg_outputs and not any('Eps' in out
                                               for out in self.disagg_outputs):
                self.num_epsilon_bins = 1
            if (self.rlz_index is not None
                    and self.num_rlzs_disagg is not None):
                raise InvalidFile('%s: you cannot set rlzs_index and '
                                  'num_rlzs_disagg at the same time' % job_ini)

        # checks for classical_damage
        if self.calculation_mode == 'classical_damage':
            if self.conditional_loss_poes:
                raise InvalidFile('%s: conditional_loss_poes are not defined '
                                  'for classical_damage calculations' %
                                  job_ini)

        # checks for event_based_risk
        if (self.calculation_mode == 'event_based_risk'
                and self.asset_correlation not in (0, 1)):
            raise ValueError('asset_correlation != {0, 1} is no longer'
                             ' supported')

        # checks for ebrisk
        if self.calculation_mode == 'ebrisk':
            if self.risk_investigation_time is None:
                raise InvalidFile('Please set the risk_investigation_time in'
                                  ' %s' % job_ini)
        elif self.aggregate_by:
            raise InvalidFile('aggregate_by cannot be set in %s [not ebrisk]' %
                              job_ini)

        # check for GMFs from file
        if (self.inputs.get('gmfs', '').endswith('.csv')
                and 'sites' not in self.inputs and self.sites is None):
            raise InvalidFile('%s: You forgot sites|sites_csv' % job_ini)
        elif self.inputs.get('gmfs', '').endswith('.xml'):
            raise InvalidFile('%s: GMFs in XML are not supported anymore' %
                              job_ini)

        # checks for event_based
        if 'event_based' in self.calculation_mode:
            if self.ps_grid_spacing:
                logging.warning('ps_grid_spacing is ignored in event_based '
                                'calculations"')

            if self.ses_per_logic_tree_path >= TWO32:
                raise ValueError('ses_per_logic_tree_path too big: %d' %
                                 self.ses_per_logic_tree_path)
            if self.number_of_logic_tree_samples >= TWO16:
                raise ValueError('number_of_logic_tree_samples too big: %d' %
                                 self.number_of_logic_tree_samples)

        # check grid + sites
        if self.region_grid_spacing and ('sites' in self.inputs or self.sites):
            raise ValueError('You are specifying grid and sites at the same '
                             'time: which one do you want?')

        # check for amplification
        if ('amplification' in self.inputs and self.imtls
                and self.calculation_mode
                in ['classical', 'classical_risk', 'disaggregation']):
            check_same_levels(self.imtls)