예제 #1
0
    def build_events_from_sources(self, srcfilter):
        """
        Prefilter the composite source model and store the source_info
        """
        oq = self.oqparam
        gsims_by_trt = self.csm.info.get_gsims_by_trt()
        logging.info('Building ruptures')
        eff_ruptures = AccumDict(accum=0)  # grp_id => potential ruptures
        calc_times = AccumDict(accum=numpy.zeros(3, F32))  # nr, ns, dt
        ses_idx = 0
        allargs = []
        for sm_id, sm in enumerate(self.csm.source_models):
            logging.info('Sending %s', sm)
            for sg in sm.src_groups:
                if not sg.sources:
                    continue
                par = self.param.copy()
                par['gsims'] = gsims_by_trt[sg.trt]
                if sg.atomic:  # do not split the group
                    allargs.append((sg, srcfilter, par))
                else:  # traditional groups
                    for block in self.block_splitter(sg.sources, key=by_grp):
                        if 'ucerf' in oq.calculation_mode:
                            for i in range(oq.ses_per_logic_tree_path):
                                par = par.copy()  # avoid mutating the dict
                                par['ses_seeds'] = [(ses_idx,
                                                     oq.ses_seed + i + 1)]
                                allargs.append((block, srcfilter, par))
                                ses_idx += 1
                        else:
                            allargs.append((block, srcfilter, par))
        smap = parallel.Starmap(self.build_ruptures.__func__,
                                allargs,
                                h5=self.datastore.hdf5)
        mon = self.monitor('saving ruptures')
        for dic in smap:
            if dic['calc_times']:
                calc_times += dic['calc_times']
            if dic['eff_ruptures']:
                eff_ruptures += dic['eff_ruptures']
            if dic['rup_array']:
                with mon:
                    self.rupser.save(dic['rup_array'])
        self.rupser.close()
        if not self.rupser.nruptures:
            raise RuntimeError('No ruptures were generated, perhaps the '
                               'investigation time is too short')

        # logic tree reduction, must be called before storing the events
        self.store_rlz_info(eff_ruptures)
        self.init_logic_tree(self.csm.info)
        with self.monitor('store source_info'):
            self.store_source_info(calc_times)
        logging.info('Reordering the ruptures and storing the events')
        attrs = self.datastore.getitem('ruptures').attrs
        sorted_ruptures = self.datastore.getitem('ruptures')[()]
        # order the ruptures by rup_id
        sorted_ruptures.sort(order='serial')
        ngroups = len(self.csm.info.trt_by_grp)
        grp_indices = numpy.zeros((ngroups, 2), U32)
        grp_ids = sorted_ruptures['grp_id']
        for grp_id, [startstop] in get_indices(grp_ids).items():
            grp_indices[grp_id] = startstop
        self.datastore['ruptures'] = sorted_ruptures
        self.datastore['ruptures']['id'] = numpy.arange(len(sorted_ruptures))
        self.datastore.set_attrs('ruptures', grp_indices=grp_indices, **attrs)
        with self.monitor('saving events'):
            self.save_events(sorted_ruptures)
예제 #2
0
    def execute(self):
        """
        Run in parallel `core_task(sources, sitecol, monitor)`, by
        parallelizing on the sources according to their weight and
        tectonic region type.
        """
        oq = self.oqparam
        if oq.hazard_calculation_id and not oq.compare_with_classical:
            with util.read(self.oqparam.hazard_calculation_id) as parent:
                self.full_lt = parent['full_lt']
            self.calc_stats()  # post-processing
            return {}

        assert oq.max_sites_per_tile > oq.max_sites_disagg, (
            oq.max_sites_per_tile, oq.max_sites_disagg)
        psd = self.set_psd()  # must go before to set the pointsource_distance
        run_preclassical(self.csm, oq, self.datastore)

        # exit early if we want to perform only a preclassical
        if oq.calculation_mode == 'preclassical':
            recs = [tuple(row) for row in self.csm.source_info.values()]
            self.datastore['source_info'] = numpy.array(
                recs, readinput.source_info_dt)
            self.datastore['full_lt'] = self.csm.full_lt
            self.datastore.swmr_on()  # fixes HDF5 error in build_hazard
            return

        acc0 = self.acc0()  # create the rup/ datasets BEFORE swmr_on()
        smap = parallel.Starmap(classical,
                                self.get_args(acc0),
                                h5=self.datastore.hdf5)
        smap.monitor.save('srcfilter', self.src_filter())
        self.datastore.swmr_on()
        smap.h5 = self.datastore.hdf5
        self.calc_times = AccumDict(accum=numpy.zeros(3, F32))
        try:
            acc = smap.reduce(self.agg_dicts, acc0)
            self.store_rlz_info(acc.eff_ruptures)
        finally:
            source_ids = self.store_source_info(self.calc_times)
            if self.by_task:
                logging.info('Storing by_task information')
                num_tasks = max(self.by_task) + 1,
                er = self.datastore.create_dset('by_task/eff_ruptures', U32,
                                                num_tasks)
                es = self.datastore.create_dset('by_task/eff_sites', U32,
                                                num_tasks)
                si = self.datastore.create_dset('by_task/srcids',
                                                hdf5.vstr,
                                                num_tasks,
                                                fillvalue=None)
                for task_no, rec in self.by_task.items():
                    effrups, effsites, srcids = rec
                    er[task_no] = effrups
                    es[task_no] = effsites
                    si[task_no] = ' '.join(source_ids[s] for s in srcids)
                self.by_task.clear()
        if self.calc_times:  # can be empty in case of errors
            self.numctxs = sum(arr[0] for arr in self.calc_times.values())
            numsites = sum(arr[1] for arr in self.calc_times.values())
            logging.info('Total number of contexts: {:_d}'.format(
                int(self.numctxs)))
            logging.info('Average number of sites per context: %d',
                         numsites / self.numctxs)
        if psd:
            psdist = max(max(psd.ddic[trt].values()) for trt in psd.ddic)
            if psdist and self.maxradius >= psdist / 2:
                logging.warning(
                    'The pointsource_distance of %d km is too '
                    'small compared to a maxradius of %d km', psdist,
                    self.maxradius)
        self.calc_times.clear()  # save a bit of memory
        return acc
예제 #3
0
    def acc0(self):
        """
        Initial accumulator, a dict grp_id -> ProbabilityMap(L, G)
        """
        zd = AccumDict()
        num_levels = len(self.oqparam.imtls.array)
        rparams = {'grp_id', 'occurrence_rate',
                   'weight', 'probs_occur', 'clon_', 'clat_', 'rrup_'}
        gsims_by_trt = self.full_lt.get_gsims_by_trt()
        n = len(self.full_lt.sm_rlzs)
        trts = list(self.full_lt.gsim_lt.values)
        for sm in self.full_lt.sm_rlzs:
            for grp_id in self.full_lt.grp_ids(sm.ordinal):
                trt = trts[grp_id // n]
                gsims = gsims_by_trt[trt]
                cm = ContextMaker(trt, gsims)
                rparams.update(cm.REQUIRES_RUPTURE_PARAMETERS)
                for dparam in cm.REQUIRES_DISTANCES:
                    rparams.add(dparam + '_')
        zd.eff_ruptures = AccumDict(accum=0)  # trt -> eff_ruptures
        if self.few_sites:
            self.rparams = sorted(rparams)
            for k in self.rparams:
                # variable length arrays
                if k == 'grp_id':
                    self.datastore.create_dset('rup/' + k, U16)
                elif k == 'probs_occur':  # vlen
                    self.datastore.create_dset('rup/' + k, hdf5.vfloat64)
                elif k.endswith('_'):  # array of shape (U, N)
                    self.datastore.create_dset(
                        'rup/' + k, F32, shape=(None, self.N),
                        compression='gzip')
                else:
                    self.datastore.create_dset('rup/' + k, F32)
        else:
            self.rparams = {}
        self.by_task = {}  # task_no => src_ids
        self.totrups = 0  # total number of ruptures before collapsing
        self.maxradius = 0
        self.gidx = {tuple(grp_ids): i
                     for i, grp_ids in enumerate(self.datastore['grp_ids'])}

        # estimate max memory per core
        max_num_gsims = max(len(gsims) for gsims in gsims_by_trt.values())
        max_num_grp_ids = max(len(grp_ids) for grp_ids in self.gidx)
        pmapbytes = self.N * num_levels * max_num_gsims * max_num_grp_ids * 8
        if pmapbytes > TWO32:
            logging.warning(
                TOOBIG % (self.N, num_levels, max_num_gsims, max_num_grp_ids,
                          humansize(pmapbytes)))
        logging.info(MAXMEMORY % (self.N, num_levels, max_num_gsims,
                                  max_num_grp_ids, humansize(pmapbytes)))

        self.Ns = len(self.csm.source_info)
        if self.oqparam.disagg_by_src:
            sources = self.get_source_ids()
            self.datastore.create_dset(
                'disagg_by_src', F32,
                (self.N, self.R, self.M, self.L1, self.Ns))
            self.datastore.set_shape_attrs(
                'disagg_by_src', site_id=self.N, rlz_id=self.R,
                imt=list(self.oqparam.imtls), lvl=self.L1, src_id=sources)
        return zd
예제 #4
0
def get_fragility_functions(fname,
                            continuous_fragility_discretization,
                            steps_per_interval=None):
    """
    :param fname:
        path of the fragility file
    :param continuous_fragility_discretization:
        continuous_fragility_discretization parameter
    :param steps_per_interval:
        steps_per_interval parameter
    :returns:
        damage_states list and dictionary taxonomy -> functions
    """
    [fmodel] = read_nodes(fname, lambda el: el.tag.endswith('fragilityModel'),
                          nodefactory['fragilityModel'])
    # ~fmodel.description is ignored
    limit_states = ~fmodel.limitStates
    tag = 'ffc' if fmodel['format'] == 'continuous' else 'ffd'
    fragility_functions = AccumDict()  # taxonomy -> functions
    for ffs in fmodel.getnodes('ffs'):
        add_zero_value = False
        # NB: the noDamageLimit is only defined for discrete fragility
        # functions. It is a way to set the starting point of the functions:
        # if noDamageLimit is at the left of each IMLs, it means that the
        # function starts at zero at the given point, so we need to add
        # noDamageLimit to the list of IMLs and zero to the list of poes
        nodamage = ffs.attrib.get('noDamageLimit')
        taxonomy = ~ffs.taxonomy
        imt_str, imls, min_iml, max_iml, imlUnit = ~ffs.IML

        if fmodel['format'] == 'discrete':
            if nodamage is not None and nodamage < imls[0]:
                # discrete fragility
                imls = [nodamage] + imls
                add_zero_value = True
            if steps_per_interval:
                gen_imls = scientific.fine_graining(imls, steps_per_interval)
            else:
                gen_imls = imls
        else:  # continuous:
            if min_iml is None:
                raise InvalidFile('Missing attribute minIML, line %d' %
                                  ffs.IML.lineno)
            elif max_iml is None:
                raise InvalidFile('Missing attribute maxIML, line %d' %
                                  ffs.IML.lineno)
            gen_imls = numpy.linspace(min_iml, max_iml,
                                      continuous_fragility_discretization)
        fragility_functions[taxonomy] = scientific.FragilityFunctionList(
            [],
            imt=imt_str,
            imls=list(gen_imls),
            no_damage_limit=nodamage,
            continuous_fragility_discretization=
            continuous_fragility_discretization,
            steps_per_interval=steps_per_interval)
        lstates = []
        for ff in ffs.getnodes(tag):
            ls = ff['ls']  # limit state
            lstates.append(ls)
            if tag == 'ffc':
                with context(fname, ff):
                    mean_stddev = ~ff.params
                fragility_functions[taxonomy].append(
                    scientific.FragilityFunctionContinuous(ls, *mean_stddev))
            else:  # discrete
                with context(fname, ff):
                    poes = ~ff.poEs
                if add_zero_value:
                    poes = [0.] + poes

                fragility_functions[taxonomy].append(
                    scientific.FragilityFunctionDiscrete(
                        ls, imls, poes, nodamage))

        if lstates != limit_states:
            raise InvalidFile("Expected limit states %s, got %s in %s" %
                              (limit_states, lstates, fname))

    fragility_functions.damage_states = ['no_damage'] + limit_states
    return fragility_functions
예제 #5
0
def get_composite_source_model(oqparam, h5=None):
    """
    Parse the XML and build a complete composite source model in memory.

    :param oqparam:
        an :class:`openquake.commonlib.oqvalidation.OqParam` instance
    :param h5:
         an open hdf5.File where to store the source info
    """
    logging.info('Reading the CompositeSourceModel')
    full_lt = get_full_lt(oqparam)
    if oqparam.cachedir and not oqparam.is_ucerf():
        csm = _get_cachedir(oqparam, full_lt, h5)
    else:
        csm = get_csm(oqparam, full_lt, h5)
    et_ids = csm.get_et_ids()
    logging.info('%d effective smlt realization(s)', len(full_lt.sm_rlzs))
    grp_id = {tuple(arr): i for i, arr in enumerate(et_ids)}
    data = {}  # src_id -> row
    mags = AccumDict(accum=set())  # trt -> mags
    wkts = []
    lens = []
    for sg in csm.src_groups:
        if hasattr(sg, 'mags'):  # UCERF
            mags[sg.trt].update('%.2f' % mag for mag in sg.mags)
        for src in sg:
            lens.append(len(src.et_ids))
            src.grp_id = grp_id[tuple(src.et_ids)]
            row = [
                src.source_id, src.grp_id, src.code, 0, 0, 0, src.id,
                full_lt.trti[src.tectonic_region_type]
            ]
            wkts.append(src._wkt)  # this is a bit slow but okay
            data[src.source_id] = row
            if hasattr(src, 'mags'):  # UCERF
                continue  # already accounted for in sg.mags
            elif hasattr(src, 'data'):  # nonparametric
                srcmags = ['%.2f' % item[0].mag for item in src.data]
            else:
                srcmags = [
                    '%.2f' % item[0]
                    for item in src.get_annual_occurrence_rates()
                ]
            mags[sg.trt].update(srcmags)
    logging.info('There are %d groups and %d sources with len(et_ids)=%.1f',
                 len(csm.src_groups), sum(len(sg) for sg in csm.src_groups),
                 numpy.mean(lens))
    if h5:
        attrs = dict(atomic=any(grp.atomic for grp in csm.src_groups))
        # avoid hdf5 damned bug by creating source_info in advance
        hdf5.create(h5, 'source_info', source_info_dt, attrs=attrs)
        h5['source_wkt'] = numpy.array(wkts, hdf5.vstr)
        h5['et_ids'] = et_ids
        mags_by_trt = {}
        for trt in mags:
            mags_by_trt[trt] = arr = numpy.array(sorted(mags[trt]))
            h5['source_mags/' + trt] = arr
        oqparam.maximum_distance.interp(mags_by_trt)
    csm.gsim_lt.check_imts(oqparam.imtls)
    csm.source_info = data  # src_id -> row
    if os.environ.get('OQ_CHECK_INPUT'):
        source.check_complex_faults(csm.get_sources())
    return csm
예제 #6
0
def compute_ruptures(sources, sitecol, siteidx, rlzs_assoc, monitor):
    """
    :param sources:
        List of commonlib.source.Source tuples
    :param sitecol:
        a :class:`openquake.hazardlib.site.SiteCollection` instance
    :param siteidx:
        always equal to 0
    :param rlzs_assoc:
        a :class:`openquake.commonlib.source.RlzsAssoc` instance
    :param monitor:
        monitor instance
    :returns:
        a dictionary trt_model_id -> [Rupture instances]
    """
    assert siteidx == 0, (
        'siteidx can be nonzero only for the classical_tiling calculations: '
        'tiling with the EventBasedRuptureCalculator is an error')
    # NB: by construction each block is a non-empty list with
    # sources of the same trt_model_id
    trt_model_id = sources[0].trt_model_id
    oq = monitor.oqparam
    trt = sources[0].tectonic_region_type
    try:
        max_dist = oq.maximum_distance[trt]
    except KeyError:
        max_dist = oq.maximum_distance['default']
    cmaker = ContextMaker(rlzs_assoc.gsims_by_trt_id[trt_model_id])
    params = cmaker.REQUIRES_RUPTURE_PARAMETERS
    rup_data_dt = numpy.dtype(
        [('rupserial', U32), ('multiplicity', U16), ('numsites', U32)] + [
            (param, F32) for param in params])

    eb_ruptures = []
    rup_data = []
    calc_times = []
    rup_mon = monitor('filtering ruptures', measuremem=False)

    # Compute and save stochastic event sets
    for src in sources:
        t0 = time.time()
        s_sites = src.filter_sites_by_distance_to_source(max_dist, sitecol)
        if s_sites is None:
            continue

        rupture_filter = RuptureFilter(
            s_sites, max_dist, oq.imtls, cmaker.gsims,
            oq.truncation_level, oq.minimum_intensity)
        num_occ_by_rup = sample_ruptures(
            src, oq.ses_per_logic_tree_path, rlzs_assoc.csm_info)
        # NB: the number of occurrences is very low, << 1, so it is
        # more efficient to filter only the ruptures that occur, i.e.
        # to call sample_ruptures *before* the filtering
        for ebr in build_eb_ruptures(
                src, num_occ_by_rup, rupture_filter, oq.random_seed, rup_mon):
            nsites = len(ebr.indices)
            rc = cmaker.make_rupture_context(ebr.rupture)
            ruptparams = tuple(getattr(rc, param) for param in params)
            rup_data.append((ebr.serial, len(ebr.etags), nsites) + ruptparams)
            eb_ruptures.append(ebr)
        dt = time.time() - t0
        calc_times.append((src.id, dt))
    res = AccumDict({trt_model_id: eb_ruptures})
    res.calc_times = calc_times
    res.rup_data = numpy.array(rup_data, rup_data_dt)
    res.trt = trt
    return res
예제 #7
0
    def full_disaggregation(self, curves):
        """
        Run the disaggregation phase.

        :param curves: a list of hazard curves, one per site

        The curves can be all None if iml_disagg is set in the job.ini
        """
        oq = self.oqparam
        tl = oq.truncation_level
        src_filter = SourceFilter(self.sitecol,
                                  oq.maximum_distance,
                                  use_rtree=False)
        csm = self.csm.filter(src_filter)  # fine filtering
        self.datastore['csm_info'] = csm.info
        eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1)
        self.bin_edges = {}

        # build trt_edges
        trts = tuple(
            sorted(
                set(sg.trt for smodel in csm.source_models
                    for sg in smodel.src_groups)))
        trt_num = {trt: i for i, trt in enumerate(trts)}
        self.trts = trts

        # build mag_edges
        min_mag = min(sg.min_mag for smodel in csm.source_models
                      for sg in smodel.src_groups)
        max_mag = max(sg.max_mag for smodel in csm.source_models
                      for sg in smodel.src_groups)
        mag_edges = oq.mag_bin_width * numpy.arange(
            int(numpy.floor(min_mag / oq.mag_bin_width)),
            int(numpy.ceil(max_mag / oq.mag_bin_width) + 1))

        # build dist_edges
        maxdist = max(oq.maximum_distance(trt, max_mag) for trt in trts)
        dist_edges = oq.distance_bin_width * numpy.arange(
            0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1))

        # build eps_edges
        eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1)

        # build lon_edges, lat_edges per sid
        bbs = src_filter.get_bounding_boxes(mag=max_mag)
        lon_edges, lat_edges = {}, {}  # by sid
        for sid, bb in zip(self.sitecol.sids, bbs):
            lon_edges[sid], lat_edges[sid] = disagg.lon_lat_bins(
                bb, oq.coordinate_bin_width)
        self.bin_edges = mag_edges, dist_edges, lon_edges, lat_edges, eps_edges
        self.save_bin_edges()

        # build all_args
        all_args = []
        maxweight = csm.get_maxweight(oq.concurrent_tasks)
        mon = self.monitor('disaggregation')
        R = len(self.rlzs_assoc.realizations)
        iml4 = disagg.make_iml4(R, oq.imtls, oq.iml_disagg, oq.poes_disagg
                                or (None, ), curves)
        self.imldict = {}  # sid, rlzi, poe, imt -> iml
        for s in self.sitecol.sids:
            for r in range(R):
                for p, poe in enumerate(oq.poes_disagg or [None]):
                    for m, imt in enumerate(oq.imtls):
                        self.imldict[s, r, poe, imt] = iml4[s, r, m, p]

        for smodel in csm.source_models:
            sm_id = smodel.ordinal
            for trt, groups in groupby(smodel.src_groups,
                                       operator.attrgetter('trt')).items():
                trti = trt_num[trt]
                sources = sum([grp.sources for grp in groups], [])
                rlzs_by_gsim = self.rlzs_assoc.get_rlzs_by_gsim(trt, sm_id)
                cmaker = ContextMaker(rlzs_by_gsim,
                                      src_filter.integration_distance)
                for block in csm.split_in_blocks(maxweight, sources):
                    all_args.append((src_filter, block, cmaker, iml4, trti,
                                     self.bin_edges, oq, mon))

        self.num_ruptures = [0] * len(self.trts)
        self.cache_info = numpy.zeros(3)  # operations, cache_hits, num_zeros
        results = parallel.Starmap(compute_disagg,
                                   all_args).reduce(self.agg_result,
                                                    AccumDict(accum={}))
        ops, hits, num_zeros = self.cache_info
        logging.info('Cache speedup %s', ops / (ops - hits))
        logging.info('Discarded zero matrices: %d', num_zeros)
        return results
예제 #8
0
    def build_events_from_sources(self):
        """
        Prefilter the composite source model and store the source_info
        """
        gsims_by_trt = self.csm.full_lt.get_gsims_by_trt()
        sources = self.csm.get_sources()
        # weighting the heavy sources
        nrups = parallel.Starmap(count_ruptures,
                                 [(src, )
                                  for src in sources if src.code in b'AMC'],
                                 h5=self.datastore.hdf5).reduce()
        for src in sources:
            src.nsites = 1  # avoid 0 weight
            try:
                src.num_ruptures = nrups[src.source_id]
            except KeyError:
                src.num_ruptures = src.count_ruptures()
        maxweight = sum(sg.weight for sg in self.csm.src_groups) / (
            self.oqparam.concurrent_tasks or 1)
        eff_ruptures = AccumDict(accum=0)  # trt => potential ruptures
        calc_times = AccumDict(accum=numpy.zeros(3, F32))  # nr, ns, dt
        allargs = []
        if self.oqparam.is_ucerf():
            # manage the filtering in a special way
            for sg in self.csm.src_groups:
                for src in sg:
                    src.src_filter = self.srcfilter
            srcfilter = nofilter  # otherwise it would be ultra-slow
        else:
            srcfilter = self.srcfilter
        logging.info('Building ruptures')
        for sg in self.csm.src_groups:
            if not sg.sources:
                continue
            logging.info('Sending %s', sg)
            par = self.param.copy()
            par['gsims'] = gsims_by_trt[sg.trt]
            for src_group in sg.split(maxweight):
                allargs.append((src_group, srcfilter, par))
        smap = parallel.Starmap(sample_ruptures,
                                allargs,
                                h5=self.datastore.hdf5)
        mon = self.monitor('saving ruptures')
        self.nruptures = 0
        for dic in smap:
            # NB: dic should be a dictionary, but when the calculation dies
            # for an OOM it can become None, thus giving a very confusing error
            if dic is None:
                raise MemoryError('You ran out of memory!')
            rup_array = dic['rup_array']
            if len(rup_array) == 0:
                continue
            if dic['calc_times']:
                calc_times += dic['calc_times']
            if dic['eff_ruptures']:
                eff_ruptures += dic['eff_ruptures']
            with mon:
                n = len(rup_array)
                rup_array['id'] = numpy.arange(self.nruptures,
                                               self.nruptures + n)
                self.nruptures += n
                hdf5.extend(self.datastore['ruptures'], rup_array)
                hdf5.extend(self.datastore['rupgeoms'], rup_array.geom)
        if len(self.datastore['ruptures']) == 0:
            raise RuntimeError('No ruptures were generated, perhaps the '
                               'investigation time is too short')

        # must be called before storing the events
        self.store_rlz_info(eff_ruptures)  # store full_lt
        self.store_source_info(calc_times)
        imp = calc.RuptureImporter(self.datastore)
        with self.monitor('saving ruptures and events'):
            imp.import_rups(self.datastore.getitem('ruptures')[()])
예제 #9
0
    def build_events_from_sources(self):
        """
        Prefilter the composite source model and store the source_info
        """
        oq = self.oqparam
        gsims_by_trt = self.csm.gsim_lt.values

        def weight_src(src):
            return src.num_ruptures

        logging.info('Building ruptures')
        smap = parallel.Starmap(
            self.build_ruptures.__func__, monitor=self.monitor())
        eff_ruptures = AccumDict(accum=0)  # grp_id => potential ruptures
        calc_times = AccumDict(accum=numpy.zeros(3, F32))
        ses_idx = 0
        for sm_id, sm in enumerate(self.csm.source_models):
            logging.info('Sending %s', sm)
            for sg in sm.src_groups:
                if not sg.sources:
                    continue
                par = self.param.copy()
                par['gsims'] = gsims_by_trt[sg.trt]
                for block in self.block_splitter(
                        sg.sources, weight_src, by_grp):
                    if 'ucerf' in oq.calculation_mode:
                        for i in range(oq.ses_per_logic_tree_path):
                            par['ses_seeds'] = [(ses_idx, oq.ses_seed + i + 1)]
                            smap.submit(block, self.src_filter, par)
                            ses_idx += 1
                    else:
                        smap.submit(block, self.src_filter, par)
        mon = self.monitor('saving ruptures')
        for dic in smap:
            if dic['calc_times']:
                calc_times += dic['calc_times']
            if dic['eff_ruptures']:
                eff_ruptures += dic['eff_ruptures']
            if dic['rup_array']:
                with mon:
                    self.rupser.save(dic['rup_array'])
        self.rupser.close()
        if not self.rupser.nruptures:
            raise RuntimeError('No ruptures were generated, perhaps the '
                               'investigation time is too short')

        # logic tree reduction, must be called before storing the events
        self.store_rlz_info(eff_ruptures)
        store_rlzs_by_grp(self.datastore)
        self.init_logic_tree(self.csm.info)
        with self.monitor('store source_info', autoflush=True):
            self.store_source_info(calc_times)
        logging.info('Reordering the ruptures and storing the events')
        attrs = self.datastore.getitem('ruptures').attrs
        sorted_ruptures = self.datastore.getitem('ruptures').value
        # order the ruptures by serial
        sorted_ruptures.sort(order='serial')
        ngroups = len(self.csm.info.trt_by_grp)
        grp_indices = numpy.zeros((ngroups, 2), U32)
        grp_ids = sorted_ruptures['grp_id']
        for grp_id, [startstop] in get_indices(grp_ids).items():
            grp_indices[grp_id] = startstop
        self.datastore['ruptures'] = sorted_ruptures
        self.datastore.set_attrs('ruptures', grp_indices=grp_indices, **attrs)
        self.save_events(sorted_ruptures)
예제 #10
0
def scenario_damage(riskinputs, param, monitor):
    """
    Core function for a damage computation.

    :param riskinputs:
        :class:`openquake.risklib.riskinput.RiskInput` objects
    :param monitor:
        :class:`openquake.baselib.performance.Monitor` instance
    :param param:
        dictionary of extra parameters
    :returns:
        a dictionary {'d_asset': [(l, r, a, mean-stddev), ...],
                      'd_event': dict eid -> array of shape (L, D)
                      + optional consequences}

    `d_asset` and `d_tag` are related to the damage distributions.
    """
    crmodel = monitor.read('crmodel')
    L = len(crmodel.loss_types)
    D = len(crmodel.damage_states)
    consequences = crmodel.get_consequences()
    # algorithm used to compute the discrete damage distributions
    approx_ddd = param['approx_ddd']
    z = numpy.zeros((L, D - 1), F32 if approx_ddd else U32)
    d_event = AccumDict(accum=z)
    res = {'d_event': d_event, 'd_asset': []}
    for name in consequences:
        res['avg_' + name] = []
        res[name + '_by_event'] = AccumDict(accum=numpy.zeros(L, F64))
        # using F64 here is necessary: with F32 the non-commutativity
        # of addition would hurt too much with multiple tasks
    seed = param['master_seed']
    num_events = param['num_events']  # per realization
    for ri in riskinputs:
        # here instead F32 floats are ok
        acc = []  # (aid, eid, lid, ds...)
        ri.hazard_getter.init()
        for out in ri.gen_outputs(crmodel, monitor):
            r = out.rlzi
            ne = num_events[r]  # total number of events
            for l, loss_type in enumerate(crmodel.loss_types):
                for asset, fractions in zip(ri.assets, out[loss_type]):
                    aid = asset['ordinal']
                    if approx_ddd:
                        ddds = fractions * asset['number']
                    else:
                        ddds = bin_ddd(fractions, asset['number'], seed + aid)
                    # ddds has shape E', D with E' == len(out.eids)
                    for e, ddd in enumerate(ddds):
                        dmg = ddd[1:]
                        if dmg.sum():
                            eid = out.eids[e]  # (aid, eid, l) is unique
                            acc.append((aid, eid, l) + tuple(dmg))
                            d_event[eid][l] += ddd[1:]
                    tot = ddds.sum(axis=0)  # shape D
                    nodamage = asset['number'] * (ne - len(ddds))
                    tot[0] += nodamage
                    res['d_asset'].append((l, r, aid, tot))
                    # TODO: use the ddd, not the fractions in compute_csq
                    csq = crmodel.compute_csq(asset, fractions, loss_type)
                    for name, values in csq.items():
                        res['avg_%s' % name].append(
                            (l, r, asset['ordinal'], values.sum(axis=0)))
                        by_event = res[name + '_by_event']
                        for eid, value in zip(out.eids, values):
                            by_event[eid][l] += value
        res['aed'] = numpy.array(acc, param['asset_damage_dt'])
    return res
예제 #11
0
def scenario_damage(riskinputs, crmodel, param, monitor):
    """
    Core function for a damage computation.

    :param riskinputs:
        :class:`openquake.risklib.riskinput.RiskInput` objects
    :param crmodel:
        a :class:`openquake.risklib.riskinput.CompositeRiskModel` instance
    :param monitor:
        :class:`openquake.baselib.performance.Monitor` instance
    :param param:
        dictionary of extra parameters
    :returns:
        a dictionary {'d_asset': [(l, r, a, mean-stddev), ...],
                      'd_event': dict eid -> array of shape (L, D)
                      + optional consequences}

    `d_asset` and `d_tag` are related to the damage distributions.
    """
    L = len(crmodel.loss_types)
    D = len(crmodel.damage_states)
    consequences = crmodel.get_consequences()
    haz_mon = monitor('getting hazard', measuremem=False)
    rsk_mon = monitor('aggregating risk', measuremem=False)
    d_event = AccumDict(accum=numpy.zeros((L, D - 1), U32))
    res = {'d_event': d_event}
    for name in consequences:
        res[name + '_by_event'] = AccumDict(accum=numpy.zeros(L, F64))
        # using F64 here is necessary: with F32 the non-commutativity
        # of addition would hurt too much with multiple tasks
    seed = param['master_seed']
    # algorithm used to compute the discrete damage distributions
    make_ddd = approx_ddd if param['approx_ddd'] else bin_ddd
    for ri in riskinputs:
        # otherwise test 4b will randomly break with last digit changes
        # in dmg_by_event :-(
        result = dict(d_asset=[])
        for name in consequences:
            result[name + '_by_asset'] = []
        ddic = AccumDict(accum=numpy.zeros((L, D - 1), F32))  # aid,eid->dd
        with haz_mon:
            ri.hazard_getter.init()
        for out in ri.gen_outputs(crmodel, monitor):
            with rsk_mon:
                r = out.rlzi
                for l, loss_type in enumerate(crmodel.loss_types):
                    for asset, fractions in zip(ri.assets, out[loss_type]):
                        aid = asset['ordinal']
                        ddds = make_ddd(fractions, asset['number'], seed + aid)
                        for e, ddd in enumerate(ddds):
                            eid = out.eids[e]
                            ddic[aid, eid][l] = ddd[1:]
                            d_event[eid][l] += ddd[1:]
                        if make_ddd is approx_ddd:
                            ms = mean_std(fractions * asset['number'])
                        else:
                            ms = mean_std(ddds)
                        result['d_asset'].append((l, r, asset['ordinal'], ms))
                        # TODO: use the ddd, not the fractions in compute_csq
                        csq = crmodel.compute_csq(asset, fractions, loss_type)
                        for name, values in csq.items():
                            result[name + '_by_asset'].append(
                                (l, r, asset['ordinal'], mean_std(values)))
                            by_event = res[name + '_by_event']
                            for eid, value in zip(out.eids, values):
                                by_event[eid][l] += value
        with rsk_mon:
            result['aed'] = aed = numpy.zeros(len(ddic), param['aed_dt'])
            for i, ((aid, eid), dd) in enumerate(sorted(ddic.items())):
                aed[i] = (aid, eid, dd)
        yield result
    yield res
예제 #12
0
def run_preclassical(csm, oqparam, h5):
    """
    :param csm: a CompositeSourceModel with attribute .srcfilter
    :param oqparam: the parameters in job.ini file
    :param h5: a DataStore instance
    """
    # do nothing for atomic sources except counting the ruptures
    for src in csm.get_sources(atomic=True):
        src.num_ruptures = src.count_ruptures()
        src.nsites = len(csm.sitecol) if csm.sitecol else 1

    # run preclassical for non-atomic sources
    sources_by_grp = groupby(csm.get_sources(atomic=False), lambda src:
                             (src.grp_id, msr_name(src)))
    param = dict(maximum_distance=oqparam.maximum_distance,
                 pointsource_distance=oqparam.pointsource_distance,
                 ps_grid_spacing=oqparam.ps_grid_spacing,
                 split_sources=oqparam.split_sources)
    srcfilter = SourceFilter(
        csm.sitecol.reduce(10000) if csm.sitecol else None,
        oqparam.maximum_distance)
    if csm.sitecol:
        logging.info('Sending %s', srcfilter.sitecol)
    if oqparam.ps_grid_spacing:
        # produce a preclassical task for each group
        allargs = ((srcs, srcfilter, param)
                   for srcs in sources_by_grp.values())
    else:
        # produce many preclassical task
        maxw = sum(len(srcs) for srcs in sources_by_grp.values()) / (
            oqparam.concurrent_tasks or 1)
        allargs = ((blk, srcfilter, param) for srcs in sources_by_grp.values()
                   for blk in block_splitter(srcs, maxw))
    res = parallel.Starmap(
        preclassical,
        allargs,
        h5=h5,
        distribute=None if len(sources_by_grp) > 1 else 'no').reduce()

    if res and res['before'] != res['after']:
        logging.info(
            'Reduced the number of sources from {:_d} -> {:_d}'.format(
                res['before'], res['after']))

    if res and h5:
        csm.update_source_info(res['calc_times'], nsites=True)

    acc = AccumDict(accum=0)
    code2cls = get_code2cls()
    for grp_id, srcs in res.items():
        # srcs can be empty if the minimum_magnitude filter is on
        if srcs and not isinstance(grp_id, str):
            newsg = SourceGroup(srcs[0].tectonic_region_type)
            newsg.sources = srcs
            csm.src_groups[grp_id] = newsg
            for src in srcs:
                acc[src.code] += int(src.num_ruptures)
    for val, key in sorted((val, key) for key, val in acc.items()):
        cls = code2cls[key].__name__
        logging.info('{} ruptures: {:_d}'.format(cls, val))

    # sanity check
    for sg in csm.src_groups:
        for src in sg:
            assert src.num_ruptures
            assert src.nsites

    # store ps_grid data, if any
    for key, sources in res.items():
        if isinstance(key, str) and key.startswith('ps_grid/'):
            arrays = []
            for ps in sources:
                if hasattr(ps, 'location'):
                    lonlats = [ps.location.x, ps.location.y]
                    for src in getattr(ps, 'pointsources', []):
                        lonlats.extend([src.location.x, src.location.y])
                    arrays.append(F32(lonlats))
            h5[key] = arrays
예제 #13
0
    def execute(self):
        """
        Run in parallel `core_task(sources, sitecol, monitor)`, by
        parallelizing on the sources according to their weight and
        tectonic region type.
        """
        oq = self.oqparam
        if oq.hazard_calculation_id and not oq.compare_with_classical:
            with datastore.read(self.oqparam.hazard_calculation_id) as parent:
                self.full_lt = parent['full_lt']
            self.store_stats()  # post-processing
            return {}

        assert oq.max_sites_per_tile > oq.max_sites_disagg, (
            oq.max_sites_per_tile, oq.max_sites_disagg)
        psd = self.set_psd()  # must go before to set the pointsource_distance
        run_preclassical(self.csm, oq, self.datastore)

        # exit early if we want to perform only a preclassical
        if oq.calculation_mode == 'preclassical':
            recs = [tuple(row) for row in self.csm.source_info.values()]
            self.datastore['source_info'] = numpy.array(
                recs, readinput.source_info_dt)
            self.datastore['full_lt'] = self.csm.full_lt
            self.datastore.swmr_on()  # fixes HDF5 error in build_hazard
            return

        self.create_dsets()  # create the rup/ datasets BEFORE swmr_on()
        grp_ids = numpy.arange(len(self.csm.src_groups))
        self.calc_times = AccumDict(accum=numpy.zeros(3, F32))
        weights = [rlz.weight for rlz in self.realizations]
        pgetter = getters.PmapGetter(self.datastore, weights,
                                     self.sitecol.sids, oq.imtls)
        srcidx = {
            rec[0]: i
            for i, rec in enumerate(self.csm.source_info.values())
        }
        self.haz = Hazard(self.datastore, self.full_lt, pgetter, srcidx,
                          self.monitor('storing _poes', measuremem=True))
        args = self.get_args(grp_ids, self.haz.cmakers)
        self.counts = collections.Counter(arg[0][0].grp_id for arg in args)
        logging.info('grp_id->ntasks: %s', list(self.counts.values()))
        h5 = self.datastore.hdf5
        if self.N > oq.max_sites_per_tile:
            smap = parallel.Starmap(classical_tile, args, h5=h5)
        else:
            smap = parallel.Starmap(classical, args, h5=h5)
        smap.monitor.save('sitecol', self.sitecol)
        self.datastore.swmr_on()
        smap.h5 = self.datastore.hdf5
        acc = {}
        for grp_id, num_tasks in self.counts.items():
            if num_tasks > 1:
                self.haz.init(acc, grp_id)
        logging.info('Sending %d tasks', len(args))
        smap.reduce(self.agg_dicts, acc)
        logging.debug("busy time: %s", smap.busytime)
        self.haz.store_disagg(acc)
        if not oq.hazard_calculation_id:
            self.haz.store_disagg()
        self.store_info(psd)
        logging.info('Saving _poes')
        for grp_id in list(acc):
            if isinstance(grp_id, int):
                self.haz.store_poes(grp_id, acc.pop(grp_id))
        return True
예제 #14
0
def scenario_damage(riskinputs, param, monitor):
    """
    Core function for a damage computation.

    :param riskinputs:
        :class:`openquake.risklib.riskinput.RiskInput` objects
    :param monitor:
        :class:`openquake.baselib.performance.Monitor` instance
    :param param:
        dictionary of extra parameters
    :returns:
        a dictionary of arrays
    """
    crmodel = monitor.read('crmodel')
    L = len(crmodel.loss_types)
    D = len(crmodel.damage_states)
    consequences = crmodel.get_consequences()
    # algorithm used to compute the discrete damage distributions
    float_dmg_dist = param['float_dmg_dist']
    z = numpy.zeros((L, D - 1), F32 if float_dmg_dist else U32)
    d_event = AccumDict(accum=z)
    res = {'d_event': d_event, 'd_asset': []}
    for name in consequences:
        res['avg_' + name] = []
        res[name + '_by_event'] = AccumDict(accum=numpy.zeros(L, F64))
        # using F64 here is necessary: with F32 the non-associativity
        # of addition would hurt too much with multiple tasks
    seed = param['master_seed']
    num_events = param['num_events']  # per realization
    acc = []  # (aid, eid, lid, ds...)
    sec_sims = param['secondary_simulations'].items()
    for ri in riskinputs:
        # here instead F32 floats are ok
        R = ri.hazard_getter.num_rlzs
        for out in ri.gen_outputs(crmodel, monitor):
            for r in range(R):
                ne = num_events[r]  # total number of events
                ok = out['haz'].rlz.to_numpy() == r  # events beloging to rlz r
                if ok.sum() == 0:
                    continue
                eids = out['eids'][ok]
                for lti, loss_type in enumerate(crmodel.loss_types):
                    for asset, fractions in zip(
                            out['assets'], out[loss_type][:, ok]):
                        aid = asset['ordinal']
                        if float_dmg_dist:
                            damages = fractions * asset['number']
                            if sec_sims:
                                run_sec_sims(
                                    damages, out['haz'][ok], sec_sims,
                                    seed + aid)
                        else:
                            damages = bin_ddd(
                                fractions, asset['number'], seed + aid)
                        # damages has shape E', D with E' == len(eids)
                        for e, ddd in enumerate(damages):
                            dmg = ddd[1:]
                            if dmg.sum():
                                eid = eids[e]  # (aid, eid, l) is unique
                                acc.append((aid, eid, lti) + tuple(dmg))
                                d_event[eid][lti] += ddd[1:]
                        tot = damages.sum(axis=0)  # (E', D) -> D
                        nodamage = asset['number'] * (ne - len(damages))
                        tot[0] += nodamage
                        res['d_asset'].append((lti, r, aid, tot))
                        # TODO: use the ddd, not the fractions in compute_csq
                        csq = crmodel.compute_csq(asset, fractions, loss_type)
                        for name, values in csq.items():
                            res['avg_%s' % name].append(
                                (lti, r, asset['ordinal'], values.sum(axis=0)))
                            by_event = res[name + '_by_event']
                            for eid, value in zip(eids, values):
                                by_event[eid][lti] += value
    res['aed'] = numpy.array(acc, param['asset_damage_dt'])
    return res
예제 #15
0
def compute_disagg(dstore, rctx, cmaker, hmap4, trti, bin_edges, oq, monitor):
    # see https://bugs.launchpad.net/oq-engine/+bug/1279247 for an explanation
    # of the algorithm used
    """
    :param dstore:
        a DataStore instance
    :param rctx:
        an array of rupture parameters
    :param cmaker:
        a :class:`openquake.hazardlib.gsim.base.ContextMaker` instance
    :param hmap4:
        an ArrayWrapper of shape (N, M, P, Z)
    :param trti:
        tectonic region type index
    :param magi:
        magnitude bin index
    :param bin_egdes:
        a quartet (dist_edges, lon_edges, lat_edges, eps_edges)
    :param monitor:
        monitor of the currently running job
    :returns:
        a dictionary sid, imti -> 6D-array
    """
    RuptureContext.temporal_occurrence_model = PoissonTOM(
        oq.investigation_time)
    with monitor('reading contexts', measuremem=True):
        dstore.open('r')
        ctxs, close_ctxs = read_ctxs(
            dstore, rctx, req_site_params=cmaker.REQUIRES_SITES_PARAMETERS)

    magi = numpy.searchsorted(bin_edges[0], rctx[0]['mag']) - 1
    if magi == -1:  # when the magnitude is on the edge
        magi = 0
    dis_mon = monitor('disaggregate', measuremem=False)
    ms_mon = monitor('disagg mean_std', measuremem=True)
    N, M, P, Z = hmap4.shape
    g_by_z = AccumDict(accum={})  # dict s -> z -> g
    for g, rlzs in enumerate(cmaker.gsims.values()):
        for (s, z), r in numpy.ndenumerate(hmap4.rlzs):
            if r in rlzs:
                g_by_z[s][z] = g
    eps3 = disagg._eps3(cmaker.trunclevel, oq.num_epsilon_bins)
    res = {'trti': trti, 'magi': magi}
    imts = [from_string(im) for im in oq.imtls]
    with ms_mon:
        # compute mean and std for a single IMT to save memory
        # the size is N * U * G * 16 bytes
        disagg.set_mean_std(ctxs, imts, cmaker.gsims)

    # disaggregate by site, IMT
    for s, iml3 in enumerate(hmap4):
        if not g_by_z[s] or not close_ctxs[s]:
            # g_by_z[s] is empty in test case_7
            continue
        # dist_bins, lon_bins, lat_bins, eps_bins
        bins = (bin_edges[1], bin_edges[2][s], bin_edges[3][s], bin_edges[4])
        iml2 = dict(zip(imts, iml3))
        with dis_mon:
            # 7D-matrix #distbins, #lonbins, #latbins, #epsbins, M, P, Z
            matrix = disagg.disaggregate(close_ctxs[s], g_by_z[s], iml2, eps3,
                                         s, bins)  # 7D-matrix
            for m in range(M):
                mat6 = matrix[..., m, :, :]
                if mat6.any():
                    res[s, m] = output(mat6)
    return res
예제 #16
0
    def execute(self):
        oq = self.oqparam
        self.set_param()
        self.offset = 0
        srcfilter = self.src_filter(self.datastore.tempname)
        self.indices = AccumDict(accum=[])  # sid, idx -> indices
        if oq.hazard_calculation_id:  # from ruptures
            self.datastore.parent = util.read(oq.hazard_calculation_id)
            self.init_logic_tree(self.datastore.parent['full_lt'])
        else:  # from sources
            self.build_events_from_sources(srcfilter)
            if (oq.ground_motion_fields is False
                    and oq.hazard_curves_from_gmfs is False):
                return {}
        if not oq.imtls:
            raise InvalidFile('There are no intensity measure types in %s' %
                              oq.inputs['job_ini'])
        N = len(self.sitecol.complete)
        if oq.ground_motion_fields:
            nrups = len(self.datastore['ruptures'])
            self.datastore.create_dset('gmf_data/data', oq.gmf_data_dt())
            self.datastore.create_dset('gmf_data/sigma_epsilon',
                                       sig_eps_dt(oq.imtls))
            self.datastore.create_dset('gmf_data/indices',
                                       hdf5.vuint32,
                                       shape=(N, 2),
                                       fillvalue=None)
            self.datastore.create_dset('gmf_data/events_by_sid', U32, (N, ))
            self.datastore.create_dset('gmf_data/time_by_rup',
                                       time_dt, (nrups, ),
                                       fillvalue=None)
        if oq.hazard_curves_from_gmfs:
            self.param['rlz_by_event'] = self.datastore['events']['rlz_id']

        # compute_gmfs in parallel
        self.datastore.swmr_on()
        logging.info('Reading %d ruptures', len(self.datastore['ruptures']))
        iterargs = (
            (rgetter, srcfilter, self.param)
            for rgetter in gen_rupture_getters(self.datastore, srcfilter))
        acc = parallel.Starmap(self.core_task.__func__,
                               iterargs,
                               h5=self.datastore.hdf5,
                               num_cores=oq.num_cores).reduce(
                                   self.agg_dicts, self.acc0())

        if self.indices:
            dset = self.datastore['gmf_data/indices']
            num_evs = self.datastore['gmf_data/events_by_sid']
            logging.info('Saving gmf_data/indices')
            with self.monitor('saving gmf_data/indices', measuremem=True):
                self.datastore['gmf_data/imts'] = ' '.join(oq.imtls)
                for sid in self.sitecol.complete.sids:
                    start = numpy.array(self.indices[sid, 0])
                    stop = numpy.array(self.indices[sid, 1])
                    dset[sid, 0] = start
                    dset[sid, 1] = stop
                    num_evs[sid] = (stop - start).sum()
            avg_events_by_sid = num_evs[()].sum() / N
            logging.info('Found ~%d GMVs per site', avg_events_by_sid)
        elif oq.ground_motion_fields:
            raise RuntimeError('No GMFs were generated, perhaps they were '
                               'all below the minimum_intensity threshold')
        return acc
예제 #17
0
def get_risk_models(oqparam,
                    kind='vulnerability fragility consequence '
                    'vulnerability_retrofitted'):
    """
    :param oqparam:
        an OqParam instance
    :param kind:
        a space-separated string with the kinds of risk models to read
    :returns:
        a dictionary riskid -> loss_type, kind -> function
    """
    kinds = kind.split()
    rmodels = AccumDict()
    for kind in kinds:
        for key in sorted(oqparam.inputs):
            mo = re.match('(occupants|%s)_%s$' % (COST_TYPE_REGEX, kind), key)
            if mo:
                loss_type = mo.group(1)  # the cost_type in the key
                # can be occupants, structural, nonstructural, ...
                rmodel = nrml.to_python(oqparam.inputs[key])
                if len(rmodel) == 0:
                    raise InvalidFile('%s is empty!' % oqparam.inputs[key])
                rmodels[loss_type, kind] = rmodel
                if rmodel.lossCategory is None:  # NRML 0.4
                    continue
                cost_type = str(rmodel.lossCategory)
                rmodel_kind = rmodel.__class__.__name__
                kind_ = kind.replace('_retrofitted', '')  # strip retrofitted
                if not rmodel_kind.lower().startswith(kind_):
                    raise ValueError('Error in the file "%s_file=%s": is '
                                     'of kind %s, expected %s' %
                                     (key, oqparam.inputs[key], rmodel_kind,
                                      kind.capitalize() + 'Model'))
                if cost_type != loss_type:
                    raise ValueError(
                        'Error in the file "%s_file=%s": lossCategory is of '
                        'type "%s", expected "%s"' %
                        (key, oqparam.inputs[key], rmodel.lossCategory,
                         loss_type))
    rdict = AccumDict(accum={})
    rdict.limit_states = []
    for (loss_type, kind), rm in sorted(rmodels.items()):
        if kind == 'fragility':
            # build a copy of the FragilityModel with different IM levels
            newfm = rm.build(oqparam.continuous_fragility_discretization,
                             oqparam.steps_per_interval)
            for (imt, riskid), ffl in sorted(newfm.items()):
                if not rdict.limit_states:
                    rdict.limit_states.extend(rm.limitStates)
                # we are rejecting the case of loss types with different
                # limit states; this may change in the future
                assert rdict.limit_states == rm.limitStates, (
                    rdict.limit_states, rm.limitStates)
                rdict[riskid][loss_type, kind] = ffl
                # TODO: see if it is possible to remove the attribute
                # below, used in classical_damage
                ffl.steps_per_interval = oqparam.steps_per_interval
        elif kind == 'consequence':
            for riskid, cf in sorted(rm.items()):
                rdict[riskid][loss_type, kind] = cf
        else:  # vulnerability, vulnerability_retrofitted
            cl_risk = oqparam.calculation_mode in ('classical',
                                                   'classical_risk')
            # only for classical_risk reduce the loss_ratios
            # to make sure they are strictly increasing
            for (imt, riskid), rf in sorted(rm.items()):
                rdict[riskid][loss_type, kind] = (rf.strictly_increasing()
                                                  if cl_risk else rf)
    return rdict
예제 #18
0
def classical(group, src_filter, gsims, param, monitor=Monitor()):
    """
    Compute the hazard curves for a set of sources belonging to the same
    tectonic region type for all the GSIMs associated to that TRT.
    The arguments are the same as in :func:`calc_hazard_curves`, except
    for ``gsims``, which is a list of GSIM instances.

    :returns:
        a dictionary {grp_id: pmap} with attributes .grp_ids, .calc_times,
        .eff_ruptures
    """
    if getattr(group, 'src_interdep', None) == 'mutex':
        mutex_weight = {
            src.source_id: weight
            for src, weight in zip(group.sources, group.srcs_weights)
        }
    else:
        mutex_weight = None
    grp_ids = set()
    for src in group:
        grp_ids.update(src.src_group_ids)
    maxdist = src_filter.integration_distance
    imtls = param['imtls']
    trunclevel = param.get('truncation_level')
    cmaker = ContextMaker(gsims, maxdist, param['filter_distance'], monitor)
    pmap = AccumDict({
        grp_id: ProbabilityMap(len(imtls.array), len(gsims))
        for grp_id in grp_ids
    })
    # AccumDict of arrays with 4 elements weight, nsites, calc_time, split
    pmap.calc_times = AccumDict(accum=numpy.zeros(4))
    pmap.eff_ruptures = AccumDict()  # grp_id -> num_ruptures
    for src, s_sites in src_filter(group):  # filter now
        t0 = time.time()
        indep = group.rup_interdep == 'indep' if mutex_weight else True
        try:
            poemap = cmaker.poe_map(src, s_sites, imtls, trunclevel, indep)
        except Exception as err:
            etype, err, tb = sys.exc_info()
            msg = '%s (source id=%s)' % (str(err), src.source_id)
            raise etype(msg).with_traceback(tb)
        if mutex_weight:  # mutex sources
            weight = mutex_weight[src.source_id]
            for sid in poemap:
                pcurve = pmap[group.id].setdefault(sid, 0)
                pcurve += poemap[sid] * weight
        elif poemap:
            for grp_id in src.src_group_ids:
                pmap[grp_id] |= poemap
        src_id = src.source_id.split(':', 1)[0]
        pmap.calc_times[src_id] += numpy.array(
            [src.weight, len(s_sites),
             time.time() - t0, 1])
        # storing the number of contributing ruptures too
        pmap.eff_ruptures += {
            grp_id: getattr(poemap, 'eff_ruptures', 0)
            for grp_id in src.src_group_ids
        }
    if mutex_weight and group.grp_probability is not None:
        pmap[group.id] *= group.grp_probability
    return pmap
예제 #19
0
def event_based_risk(riskinputs, crmodel, param, monitor):
    """
    :param riskinputs:
        :class:`openquake.risklib.riskinput.RiskInput` objects
    :param crmodel:
        a :class:`openquake.risklib.riskinput.CompositeRiskModel` instance
    :param param:
        a dictionary of parameters
    :param monitor:
        :class:`openquake.baselib.performance.Monitor` instance
    :returns:
        a dictionary of numpy arrays of shape (L, R)
    """
    L = len(crmodel.lti)
    tempname = param['tempname']
    for ri in riskinputs:
        with monitor('getting hazard'):
            ri.hazard_getter.init()
            hazard = ri.hazard_getter.get_hazard()
        mon = monitor('build risk curves', measuremem=False)
        A = len(ri.aids)
        R = ri.hazard_getter.num_rlzs
        try:
            avg = numpy.zeros((A, R, L), F32)
        except MemoryError:
            raise MemoryError(
                'Building array avg of shape (%d, %d, %d)' % (A, R, L))
        result = dict(aids=ri.aids, avglosses=avg)
        acc = AccumDict()  # accumulator eidx -> agglosses
        aid2idx = {aid: idx for idx, aid in enumerate(ri.aids)}
        if 'builder' in param:
            builder = param['builder']
            P = len(builder.return_periods)
            all_curves = numpy.zeros((A, R, P), builder.loss_dt)
        # update the result dictionary and the agg array with each output
        for out in ri.gen_outputs(crmodel, monitor, tempname, hazard):
            if len(out.eids) == 0:  # this happens for sites with no events
                continue
            r = out.rlzi
            agglosses = numpy.zeros((len(out.eids), L), F32)
            for l, loss_type in enumerate(crmodel.loss_types):
                loss_ratios = out[loss_type]
                if loss_ratios is None:  # for GMFs below the minimum_intensity
                    continue
                avalues = riskmodels.get_values(loss_type, ri.assets)
                for a, asset in enumerate(ri.assets):
                    aval = avalues[a]
                    aid = asset['ordinal']
                    idx = aid2idx[aid]
                    ratios = loss_ratios[a]  # length E

                    # average losses
                    avg[idx, r, l] = (
                        ratios.sum(axis=0) * param['ses_ratio'] * aval)

                    # agglosses
                    agglosses[:, l] += ratios * aval
                    if 'builder' in param:
                        with mon:  # this is the heaviest part
                            all_curves[idx, r][loss_type] = (
                                builder.build_curve(aval, ratios, r))

            # NB: I could yield the agglosses per output, but then I would
            # have millions of small outputs with big data transfer and slow
            # saving time
            acc += dict(zip(out.eids, agglosses))

        if 'builder' in param:
            clp = param['conditional_loss_poes']
            result['curves-rlzs'], result['curves-stats'] = builder.pair(
                all_curves, param['stats'])
            if R > 1 and param['individual_curves'] is False:
                del result['curves-rlzs']
            if clp:
                result['loss_maps-rlzs'], result['loss_maps-stats'] = (
                    builder.build_maps(all_curves, clp, param['stats']))
                if R > 1 and param['individual_curves'] is False:
                    del result['loss_maps-rlzs']

        # store info about the GMFs, must be done at the end
        result['agglosses'] = (numpy.array(list(acc)),
                               numpy.array(list(acc.values())))
        yield result
예제 #20
0
 def reduce(self, agg=operator.add, acc=None):
     if acc is None:
         acc = AccumDict()
     for result in self:
         acc = agg(acc, result)
     return acc
예제 #21
0
    def full_disaggregation(self):
        """
        Run the disaggregation phase.
        """
        oq = self.oqparam
        tl = oq.truncation_level
        src_filter = self.src_filter()
        if hasattr(self, 'csm'):
            for sg in self.csm.src_groups:
                if sg.atomic:
                    raise NotImplementedError(
                        'Atomic groups are not supported yet')

        self.full_lt = self.datastore['full_lt']
        self.poes_disagg = oq.poes_disagg or (None,)
        self.imts = list(oq.imtls)

        self.ws = [rlz.weight for rlz in self.full_lt.get_realizations()]
        self.pgetter = getters.PmapGetter(
            self.datastore, self.ws, self.sitecol.sids)

        # build array rlzs (N, Z)
        if oq.rlz_index is None:
            Z = oq.num_rlzs_disagg
            rlzs = numpy.zeros((self.N, Z), int)
            if self.R > 1:
                for sid in self.sitecol.sids:
                    curves = numpy.array(
                        [pc.array for pc in self.pgetter.get_pcurves(sid)])
                    mean = getters.build_stat_curve(
                        curves, oq.imtls, stats.mean_curve, self.ws)
                    rlzs[sid] = util.closest_to_ref(curves, mean.array)[:Z]
                self.datastore['best_rlzs'] = rlzs
        else:
            Z = len(oq.rlz_index)
            rlzs = numpy.zeros((self.N, Z), int)
            for z in range(Z):
                rlzs[:, z] = oq.rlz_index[z]
        assert Z <= self.R, (Z, self.R)
        self.Z = Z
        self.rlzs = rlzs

        if oq.iml_disagg:
            # no hazard curves are needed
            self.poe_id = {None: 0}
            curves = [[None for z in range(Z)] for s in range(self.N)]
            self.ok_sites = set(self.sitecol.sids)
        else:
            self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)}
            curves = [self.get_curve(sid, rlzs[sid])
                      for sid in self.sitecol.sids]
            self.ok_sites = set(self.check_poes_disagg(curves, rlzs))
        self.iml4 = _iml4(rlzs, oq.iml_disagg, oq.imtls,
                          self.poes_disagg, curves)
        if oq.disagg_by_src:
            self.build_disagg_by_src(rlzs)

        eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1)

        # build trt_edges
        trts = tuple(self.full_lt.trts)
        trt_num = {trt: i for i, trt in enumerate(trts)}
        self.trts = trts

        # build mag_edges
        mags = [float(mag) for mag in self.datastore['source_mags']]
        mag_edges = oq.mag_bin_width * numpy.arange(
            int(numpy.floor(min(mags) / oq.mag_bin_width)),
            int(numpy.ceil(max(mags) / oq.mag_bin_width) + 1))

        # build dist_edges
        maxdist = max(oq.maximum_distance(trt) for trt in trts)
        dist_edges = oq.distance_bin_width * numpy.arange(
            0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1))

        # build eps_edges
        eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1)

        # build lon_edges, lat_edges per sid
        bbs = src_filter.get_bounding_boxes(mag=max(mags))
        lon_edges, lat_edges = {}, {}  # by sid
        for sid, bb in zip(self.sitecol.sids, bbs):
            lon_edges[sid], lat_edges[sid] = disagg.lon_lat_bins(
                bb, oq.coordinate_bin_width)
        self.bin_edges = mag_edges, dist_edges, lon_edges, lat_edges, eps_edges
        shapedic = self.save_bin_edges()
        del shapedic['trt']
        shapedic['N'] = self.N
        shapedic['M'] = len(oq.imtls)
        shapedic['P'] = len(oq.poes_disagg)
        shapedic['Z'] = Z
        shapedic['concurrent_tasks'] = oq.concurrent_tasks
        nbytes, msg = get_array_nbytes(shapedic)
        if nbytes > oq.max_data_transfer:
            raise ValueError('Estimated data transfer too big\n%s' % msg)
        logging.info('Estimated data transfer: %s', msg)
        self.imldict = {}  # sid, rlz, poe, imt -> iml
        for s in self.sitecol.sids:
            for z, rlz in enumerate(rlzs[s]):
                for p, poe in enumerate(self.poes_disagg):
                    for m, imt in enumerate(oq.imtls):
                        self.imldict[s, rlz, poe, imt] = self.iml4[s, m, p, z]

        # submit #groups disaggregation tasks
        dstore = (self.datastore.parent if self.datastore.parent
                  else self.datastore)
        indices = get_indices(dstore, oq.concurrent_tasks or 1)
        self.datastore.swmr_on()
        smap = parallel.Starmap(compute_disagg, h5=self.datastore.hdf5)
        for grp_id, trt in self.full_lt.trt_by_grp.items():
            logging.info('Group #%d, sending rup_data for %s', grp_id, trt)
            trti = trt_num[trt]
            cmaker = ContextMaker(
                trt, self.full_lt.get_rlzs_by_gsim(grp_id),
                {'truncation_level': oq.truncation_level,
                 'maximum_distance': src_filter.integration_distance,
                 'filter_distance': oq.filter_distance, 'imtls': oq.imtls})
            for idxs in indices[grp_id]:
                smap.submit((dstore, idxs, cmaker, self.iml4, trti,
                             self.bin_edges))
        results = smap.reduce(self.agg_result, AccumDict(accum={}))
        return results  # sid -> trti-> 8D array
예제 #22
0
def get_risk_models(oqparam, kind=None):
    """
    :param oqparam:
        an OqParam instance
    :param kind:
        vulnerability|vulnerability_retrofitted|fragility|consequence;
        if None it is extracted from the oqparam.file_type attribute
    :returns:
        a dictionary taxonomy -> loss_type -> function
    """
    kind = kind or oqparam.file_type
    rmodels = AccumDict()
    rmodels.limit_states = []
    for key in sorted(oqparam.inputs):
        mo = re.match('(occupants|%s)_%s$' % (COST_TYPE_REGEX, kind), key)
        if mo:
            key_type = mo.group(1)  # the cost_type in the key
            # can be occupants, structural, nonstructural, ...
            rmodel = nrml.to_python(oqparam.inputs[key])
            rmodels[key_type] = rmodel
            if rmodel.lossCategory is None:  # NRML 0.4
                continue
            cost_type = str(rmodel.lossCategory)
            rmodel_kind = rmodel.__class__.__name__
            kind_ = kind.replace('_retrofitted', '')  # strip retrofitted
            if not rmodel_kind.lower().startswith(kind_):
                raise ValueError('Error in the file "%s_file=%s": is '
                                 'of kind %s, expected %s' %
                                 (key, oqparam.inputs[key], rmodel_kind,
                                  kind.capitalize() + 'Model'))
            if cost_type != key_type:
                raise ValueError(
                    'Error in the file "%s_file=%s": lossCategory is of type '
                    '"%s", expected "%s"' %
                    (key, oqparam.inputs[key], rmodel.lossCategory, key_type))
    rdict = AccumDict(accum={})
    rdict.limit_states = []
    if kind == 'fragility':
        limit_states = []
        for loss_type, fm in sorted(rmodels.items()):
            # build a copy of the FragilityModel with different IM levels
            newfm = fm.build(oqparam.continuous_fragility_discretization,
                             oqparam.steps_per_interval)
            for (imt, taxo), ffl in newfm.items():
                if not limit_states:
                    limit_states.extend(fm.limitStates)
                # we are rejecting the case of loss types with different
                # limit states; this may change in the future
                assert limit_states == fm.limitStates, (limit_states,
                                                        fm.limitStates)
                rdict[taxo][loss_type] = ffl
                # TODO: see if it is possible to remove the attribute
                # below, used in classical_damage
                ffl.steps_per_interval = oqparam.steps_per_interval
        rdict.limit_states = [str(ls) for ls in limit_states]
    elif kind == 'consequence':
        rdict = rmodels
    else:  # vulnerability
        cl_risk = oqparam.calculation_mode in ('classical', 'classical_risk')
        # only for classical_risk reduce the loss_ratios
        # to make sure they are strictly increasing
        for loss_type, rm in rmodels.items():
            for (imt, taxo), rf in rm.items():
                rdict[taxo][loss_type] = (rf.strictly_increasing()
                                          if cl_risk else rf)
    return rdict
예제 #23
0
def classical(group, src_filter, gsims, param, monitor=Monitor()):
    """
    Compute the hazard curves for a set of sources belonging to the same
    tectonic region type for all the GSIMs associated to that TRT.
    The arguments are the same as in :func:`calc_hazard_curves`, except
    for ``gsims``, which is a list of GSIM instances.

    :returns:
        a dictionary {grp_id: pmap} with attributes .grp_ids, .calc_times,
        .eff_ruptures
    """
    if getattr(group, 'src_interdep', None) == 'mutex':
        mutex_weight = {
            src.source_id: weight
            for src, weight in zip(group.sources, group.srcs_weights)
        }
        srcs = group.sources
    else:
        mutex_weight = None
        srcs = sum([split_source(src) for src in group], [])
    grp_ids = set()
    for src in group:
        grp_ids.update(src.src_group_ids)
    maxdist = src_filter.integration_distance
    with GroundShakingIntensityModel.forbid_instantiation():
        imtls = param['imtls']
        trunclevel = param.get('truncation_level')
        cmaker = ContextMaker(gsims, maxdist)
        ctx_mon = monitor('make_contexts', measuremem=False)
        poe_mon = monitor('get_poes', measuremem=False)
        pmap = AccumDict({
            grp_id: ProbabilityMap(len(imtls.array), len(gsims))
            for grp_id in grp_ids
        })
        # AccumDict of arrays with 4 elements weight, nsites, calc_time, split
        pmap.calc_times = AccumDict(accum=numpy.zeros(4))
        pmap.eff_ruptures = AccumDict()  # grp_id -> num_ruptures
        for src, s_sites in src_filter(srcs):  # filter now
            t0 = time.time()
            indep = group.rup_interdep == 'indep' if mutex_weight else True
            poemap = cmaker.poe_map(src, s_sites, imtls, trunclevel, ctx_mon,
                                    poe_mon, indep)
            if mutex_weight:  # mutex sources
                weight = mutex_weight[src.source_id]
                for sid in poemap:
                    pcurve = pmap[group.id].setdefault(sid, 0)
                    pcurve += poemap[sid] * weight
            elif poemap:
                for grp_id in src.src_group_ids:
                    pmap[grp_id] |= poemap
            src_id = src.source_id.split(':', 1)[0]
            pmap.calc_times[src_id] += numpy.array(
                [src.weight, len(s_sites),
                 time.time() - t0, 1])
            # storing the number of contributing ruptures too
            pmap.eff_ruptures += {
                grp_id: getattr(poemap, 'eff_ruptures', 0)
                for grp_id in src.src_group_ids
            }
        if mutex_weight and group.grp_probability is not None:
            pmap[group.id] *= group.grp_probability
        return pmap
예제 #24
0
def ucerf_risk(riskinput, riskmodel, param, monitor):
    """
    :param riskinput:
        a :class:`openquake.risklib.riskinput.RiskInput` object
    :param riskmodel:
        a :class:`openquake.risklib.riskinput.CompositeRiskModel` instance
    :param param:
        a dictionary of parameters
    :param monitor:
        :class:`openquake.baselib.performance.Monitor` instance
    :returns:
        a dictionary of numpy arrays of shape (L, R)
    """
    with monitor('%s.init' % riskinput.hazard_getter.__class__.__name__):
        riskinput.hazard_getter.init()
    eids = riskinput.hazard_getter.eids
    A = len(riskinput.aids)
    E = len(eids)
    assert not param['insured_losses']
    L = len(riskmodel.lti)
    R = riskinput.hazard_getter.num_rlzs
    param['lrs_dt'] = numpy.dtype([('rlzi', U16), ('ratios', (F32, L))])
    agg = numpy.zeros((E, R, L), F32)
    avg = AccumDict(accum={} if riskinput.by_site or not param['avg_losses']
                    else numpy.zeros(A, F64))
    result = dict(aids=riskinput.aids, avglosses=avg)

    # update the result dictionary and the agg array with each output
    for out in riskmodel.gen_outputs(riskinput, monitor):
        if len(out.eids) == 0:  # this happens for sites with no events
            continue
        r = out.rlzi
        idx = riskinput.hazard_getter.eid2idx
        for l, loss_ratios in enumerate(out):
            if loss_ratios is None:  # for GMFs below the minimum_intensity
                continue
            loss_type = riskmodel.loss_types[l]
            indices = numpy.array([idx[eid] for eid in out.eids])
            for a, asset in enumerate(out.assets):
                ratios = loss_ratios[a]  # shape (E, I)
                aid = asset.ordinal
                losses = ratios * asset.value(loss_type)
                # average losses
                if param['avg_losses']:
                    rat = ratios.sum(axis=0) * param['ses_ratio']
                    lba = avg[l, r]
                    try:
                        lba[aid] += rat
                    except KeyError:
                        lba[aid] = rat

                # this is the critical loop: it is important to keep it
                # vectorized in terms of the event indices
                agg[indices, r, l] += losses[:, 0]  # 0 == no insured

    it = ((eid, r, losses)
          for eid, all_losses in zip(eids, agg)
          for r, losses in enumerate(all_losses) if losses.sum())
    result['agglosses'] = numpy.fromiter(it, param['elt_dt'])
    # store info about the GMFs, must be done at the end
    result['gmdata'] = riskinput.gmdata
    return result
예제 #25
0
def get_risk_functions(oqparam, kind='vulnerability fragility consequence '
                       'vulnerability_retrofitted'):
    """
    :param oqparam:
        an OqParam instance
    :param kind:
        a space-separated string with the kinds of risk models to read
    :returns:
        a list of risk functions
    """
    kinds = kind.split()
    rmodels = AccumDict()
    for kind in kinds:
        for key in sorted(oqparam.inputs):
            mo = re.match('(occupants|%s)_%s$' % (COST_TYPE_REGEX, kind), key)
            if mo:
                loss_type = mo.group(1)  # the cost_type in the key
                # can be occupants, structural, nonstructural, ...
                rmodel = nrml.to_python(oqparam.inputs[key])
                if len(rmodel) == 0:
                    raise InvalidFile('%s is empty!' % oqparam.inputs[key])
                rmodels[loss_type, kind] = rmodel
                if rmodel.lossCategory is None:  # NRML 0.4
                    continue
                cost_type = str(rmodel.lossCategory)
                rmodel_kind = rmodel.__class__.__name__
                kind_ = kind.replace('_retrofitted', '')  # strip retrofitted
                if not rmodel_kind.lower().startswith(kind_):
                    raise ValueError(
                        'Error in the file "%s_file=%s": is '
                        'of kind %s, expected %s' % (
                            key, oqparam.inputs[key], rmodel_kind,
                            kind.capitalize() + 'Model'))
                if cost_type != loss_type:
                    raise ValueError(
                        'Error in the file "%s_file=%s": lossCategory is of '
                        'type "%s", expected "%s"' %
                        (key, oqparam.inputs[key],
                         rmodel.lossCategory, loss_type))
    cl_risk = oqparam.calculation_mode in ('classical', 'classical_risk')
    rlist = RiskFuncList()
    rlist.limit_states = []
    for (loss_type, kind), rm in sorted(rmodels.items()):
        if kind == 'fragility':
            for (imt, riskid), ffl in sorted(rm.items()):
                if not rlist.limit_states:
                    rlist.limit_states.extend(rm.limitStates)
                # we are rejecting the case of loss types with different
                # limit states; this may change in the future
                assert rlist.limit_states == rm.limitStates, (
                    rlist.limit_states, rm.limitStates)
                ffl.loss_type = loss_type
                ffl.kind = kind
                rlist.append(ffl)
        elif kind == 'consequence':
            for riskid, cf in sorted(rm.items()):
                rf = hdf5.ArrayWrapper(
                    cf, dict(id=riskid, loss_type=loss_type, kind=kind))
                rlist.append(rf)
        else:  # vulnerability, vulnerability_retrofitted
            # only for classical_risk reduce the loss_ratios
            # to make sure they are strictly increasing
            for (imt, riskid), rf in sorted(rm.items()):
                rf = rf.strictly_increasing() if cl_risk else rf
                rf.loss_type = loss_type
                rf.kind = kind
                rlist.append(rf)
    return rlist
예제 #26
0
def sample_cluster(sources, srcfilter, num_ses, param):
    """
    Yields ruptures generated by a cluster of sources.

    :param sources:
        A sequence of sources of the same group
    :param num_ses:
        Number of stochastic event sets
    :param param:
        a dictionary of additional parameters including
        ses_per_logic_tree_path
    :yields:
        dictionaries with keys rup_array, calc_times, eff_ruptures
    """
    eb_ruptures = []
    ses_seed = param['ses_seed']
    numpy.random.seed(sources[0].serial(ses_seed))
    [et_id] = set(src.et_id for src in sources)
    # AccumDict of arrays with 3 elements nsites, nruptures, calc_time
    calc_times = AccumDict(accum=numpy.zeros(3, numpy.float32))
    # Set the parameters required to compute the number of occurrences
    # of the group of sources
    #  assert param['oqparam'].number_of_logic_tree_samples > 0
    samples = getattr(sources[0], 'samples', 1)
    tom = getattr(sources, 'temporal_occurrence_model')
    rate = tom.occurrence_rate
    time_span = tom.time_span
    # Note that using a single time interval corresponding to the product
    # of the investigation time and the number of realisations as we do
    # here is admitted only in the case of a time-independent model
    grp_num_occ = numpy.random.poisson(rate * time_span * samples * num_ses)
    # Now we process the sources included in the group. Possible cases:
    # * The group is a cluster. In this case we choose one rupture per each
    #   source; uncertainty in the ruptures can be handled in this case
    #   using mutually exclusive ruptures (note that this is admitted
    #   only for nons-parametric sources).
    # * The group contains mutually exclusive sources. In this case we
    #   choose one source and then one rupture from this source.
    rup_counter = {}
    rup_data = {}
    for rlz_num in range(grp_num_occ):
        if sources.cluster:
            for src, _ in srcfilter.filter(sources):
                # Track calculation time
                t0 = time.time()
                rup = src.get_one_rupture(ses_seed)
                # The problem here is that we do not know a-priori the
                # number of occurrences of a given rupture.
                if src.id not in rup_counter:
                    rup_counter[src.id] = {}
                    rup_data[src.id] = {}
                if rup.idx not in rup_counter[src.id]:
                    rup_counter[src.id][rup.idx] = 1
                    rup_data[src.id][rup.idx] = [rup, src.id, et_id]
                else:
                    rup_counter[src.id][rup.idx] += 1
                # Store info
                dt = time.time() - t0
                calc_times[src.id] += numpy.array(
                    [len(rup_data[src.id]), src.nsites, dt])
        elif param['src_interdep'] == 'mutex':
            raise NotImplementedError('src_interdep == mutex')
    # Create event based ruptures
    for src_key in rup_data:
        for rup_key in rup_data[src_key]:
            rup, source_id, et_id = rup_data[src_key][rup_key]
            cnt = rup_counter[src_key][rup_key]
            ebr = EBRupture(rup, source_id, et_id, cnt)
            eb_ruptures.append(ebr)

    return eb_ruptures, calc_times
예제 #27
0
    def execute(self):
        """
        Run in parallel `core_task(sources, sitecol, monitor)`, by
        parallelizing on the sources according to their weight and
        tectonic region type.
        """
        oq = self.oqparam
        if oq.hazard_calculation_id and not oq.compare_with_classical:
            with util.read(self.oqparam.hazard_calculation_id) as parent:
                self.full_lt = parent['full_lt']
            self.calc_stats()  # post-processing
            return {}

        srcfilter = self.src_filter()
        srcs = self.csm.get_sources()
        if oq.is_ucerf():
            logging.info('Prefiltering UCERFSources')
            for src in srcs:
                if hasattr(src, 'start'):
                    src.src_filter = srcfilter  # hack for .iter_ruptures
                    src.all_ridx = src.get_ridx()
        calc_times = parallel.Starmap.apply(
            preclassical, (srcs, srcfilter),
            concurrent_tasks=oq.concurrent_tasks or 1,
            num_cores=oq.num_cores, h5=self.datastore.hdf5).reduce()
        if oq.calculation_mode == 'preclassical':
            self.store_source_info(calc_times, nsites=True)
            self.datastore['full_lt'] = self.csm.full_lt
            self.datastore.swmr_on()  # fixes HDF5 error in build_hazard
            return

        self.update_source_info(calc_times, nsites=True)
        # if OQ_SAMPLE_SOURCES is set extract one source for group
        ss = os.environ.get('OQ_SAMPLE_SOURCES')
        if ss:
            for sg in self.csm.src_groups:
                if not sg.atomic:
                    srcs = [src for src in sg if src.nsites]
                    sg.sources = [srcs[0]]

        mags = self.datastore['source_mags']  # by TRT
        if len(mags) == 0:  # everything was discarded
            raise RuntimeError('All sources were discarded!?')
        gsims_by_trt = self.full_lt.get_gsims_by_trt()
        mags_by_trt = {}
        for trt in mags:
            mags_by_trt[trt] = mags[trt][()]
        psd = oq.pointsource_distance
        if psd is not None:
            psd.interp(mags_by_trt)
            for trt, dic in psd.ddic.items():
                # the sum is zero for {'default': [(1, 0), (10, 0)]}
                if sum(dic.values()):
                    it = list(dic.items())
                    md = '%s->%d ... %s->%d' % (it[0] + it[-1])
                    logging.info('ps_dist %s: %s', trt, md)
        imts_with_period = [imt for imt in oq.imtls
                            if imt == 'PGA' or imt.startswith('SA')]
        imts_ok = len(imts_with_period) == len(oq.imtls)
        if (imts_ok and psd and psd.suggested()) or (
                imts_ok and oq.minimum_intensity):
            aw = get_effect(mags_by_trt, self.sitecol.one(), gsims_by_trt, oq)
            if psd:
                dic = {trt: [(float(mag), int(dst))
                             for mag, dst in psd.ddic[trt].items()]
                       for trt in psd.ddic if trt != 'default'}
                logging.info('pointsource_distance=\n%s', pprint.pformat(dic))
            if len(vars(aw)) > 1:  # more than _extra
                self.datastore['effect_by_mag_dst'] = aw
        smap = parallel.Starmap(classical, h5=self.datastore.hdf5,
                                num_cores=oq.num_cores)
        smap.monitor.save('srcfilter', self.src_filter())
        rlzs_by_gsim_list = self.submit_tasks(smap)
        rlzs_by_g = []
        for rlzs_by_gsim in rlzs_by_gsim_list:
            for rlzs in rlzs_by_gsim.values():
                rlzs_by_g.append(rlzs)
        self.datastore['rlzs_by_g'] = [U32(rlzs) for rlzs in rlzs_by_g]
        acc0 = self.acc0()  # create the rup/ datasets BEFORE swmr_on()
        poes_shape = (self.N, len(oq.imtls.array), len(rlzs_by_g))  # NLG
        size = numpy.prod(poes_shape) * 8
        logging.info('Requiring %s for ProbabilityMap of shape %s',
                     humansize(size), poes_shape)
        self.datastore.create_dset('_poes', F64, poes_shape)
        self.datastore.swmr_on()
        smap.h5 = self.datastore.hdf5
        self.calc_times = AccumDict(accum=numpy.zeros(3, F32))
        try:
            acc = smap.reduce(self.agg_dicts, acc0)
            self.store_rlz_info(acc.eff_ruptures)
        finally:
            with self.monitor('store source_info'):
                self.store_source_info(self.calc_times)
            if self.by_task:
                logging.info('Storing by_task information')
                num_tasks = max(self.by_task) + 1,
                er = self.datastore.create_dset('by_task/eff_ruptures',
                                                U32, num_tasks)
                es = self.datastore.create_dset('by_task/eff_sites',
                                                U32, num_tasks)
                si = self.datastore.create_dset('by_task/srcids',
                                                hdf5.vstr, num_tasks,
                                                fillvalue=None)
                for task_no, rec in self.by_task.items():
                    effrups, effsites, srcids = rec
                    er[task_no] = effrups
                    es[task_no] = effsites
                    si[task_no] = ' '.join(srcids)
                self.by_task.clear()
        self.numrups = sum(arr[0] for arr in self.calc_times.values())
        numsites = sum(arr[1] for arr in self.calc_times.values())
        logging.info('Effective number of ruptures: {:_d}/{:_d}'.format(
            int(self.numrups), self.totrups))
        logging.info('Effective number of sites per rupture: %d',
                     numsites / self.numrups)
        if psd:
            psdist = max(max(psd.ddic[trt].values()) for trt in psd.ddic)
            if psdist and self.maxradius >= psdist / 2:
                logging.warning('The pointsource_distance of %d km is too '
                                'small compared to a maxradius of %d km',
                                psdist, self.maxradius)
        self.calc_times.clear()  # save a bit of memory
        return acc
예제 #28
0
    def compute(self):
        """
        Submit disaggregation tasks and return the results
        """
        logging.info('Reading ruptures')
        oq = self.oqparam
        dstore = (self.datastore.parent
                  if self.datastore.parent else self.datastore)
        mags = set()
        for trt, dset in self.datastore['source_mags'].items():
            mags.update(dset[:])
        mags = sorted(mags)
        allargs = []
        totweight = sum(d['rctx']['nsites'].sum() for n, d in dstore.items()
                        if n.startswith('mag_') and len(d['rctx']))
        et_ids = dstore['et_ids'][:]
        rlzs_by_gsim = self.full_lt.get_rlzs_by_gsim_list(et_ids)
        G = max(len(rbg) for rbg in rlzs_by_gsim)
        maxw = 2 * 1024**3 / (16 * G * self.M)  # at max 2 GB
        maxweight = min(numpy.ceil(totweight / (oq.concurrent_tasks or 1)),
                        maxw)
        num_eff_rlzs = len(self.full_lt.sm_rlzs)
        task_inputs = []
        U = 0
        totrups = 0
        for mag in mags:
            rctx = dstore['mag_%s/rctx' % mag][:]
            totrups += len(rctx)
            for grp_id, gids in enumerate(et_ids):
                idxs, = numpy.where(rctx['grp_id'] == grp_id)
                if len(idxs) == 0:
                    continue
                trti = gids[0] // num_eff_rlzs
                trt = self.trts[trti]
                cmaker = ContextMaker(
                    trt, rlzs_by_gsim[grp_id], {
                        'truncation_level': oq.truncation_level,
                        'maximum_distance': oq.maximum_distance,
                        'collapse_level': oq.collapse_level,
                        'imtls': oq.imtls
                    })
                for blk in block_splitter(rctx[idxs], maxweight, nsites):
                    nr = len(blk)
                    U = max(U, blk.weight)
                    allargs.append((dstore, numpy.array(blk), cmaker,
                                    self.hmap4, trti, self.bin_edges, oq))
                    task_inputs.append((trti, mag, nr))
        logging.info('Found {:_d} ruptures'.format(totrups))
        nbytes, msg = get_array_nbytes(dict(M=self.M, G=G, U=U, F=2))
        logging.info('Maximum mean_std per task:\n%s', msg)

        s = self.shapedic
        sd = dict(N=s['N'],
                  M=s['M'],
                  P=s['P'],
                  Z=s['Z'],
                  D=s['dist'],
                  E=s['eps'],
                  Lo=s['lon'],
                  La=s['lat'])
        sd['tasks'] = numpy.ceil(len(allargs))
        nbytes, msg = get_array_nbytes(sd)
        if nbytes > oq.max_data_transfer:
            raise ValueError(
                'Estimated data transfer too big\n%s > max_data_transfer=%s' %
                (msg, humansize(oq.max_data_transfer)))
        logging.info('Estimated data transfer:\n%s', msg)

        sd.pop('tasks')
        sd['mags_trt'] = sum(
            len(mags) for mags in self.datastore['source_mags'].values())
        nbytes, msg = get_array_nbytes(sd)
        logging.info('Estimated memory on the master:\n%s', msg)

        dt = numpy.dtype([('trti', U8), ('mag', '|S4'), ('nrups', U32)])
        self.datastore['disagg_task'] = numpy.array(task_inputs, dt)
        self.datastore.swmr_on()
        smap = parallel.Starmap(compute_disagg,
                                allargs,
                                h5=self.datastore.hdf5)
        results = smap.reduce(self.agg_result, AccumDict(accum={}))
        return results  # imti, sid -> trti, magi -> 6D array
예제 #29
0
    def execute(self):
        """
        Run in parallel `core_task(sources, sitecol, monitor)`, by
        parallelizing on the sources according to their weight and
        tectonic region type.
        """
        oq = self.oqparam
        if oq.hazard_calculation_id and not oq.compare_with_classical:
            with util.read(self.oqparam.hazard_calculation_id) as parent:
                self.full_lt = parent['full_lt']
            self.calc_stats()  # post-processing
            return {}

        mags = self.datastore['source_mags']  # by TRT
        if len(mags) == 0:  # everything was discarded
            raise RuntimeError('All sources were discarded!?')
        gsims_by_trt = self.full_lt.get_gsims_by_trt()
        if oq.pointsource_distance is not None:
            for trt in gsims_by_trt:
                oq.pointsource_distance[trt] = getdefault(
                    oq.pointsource_distance, trt)
        mags_by_trt = {}
        for trt in mags:
            mags_by_trt[trt] = mags[trt][()]
        imts_with_period = [imt for imt in oq.imtls
                            if imt == 'PGA' or imt.startswith('SA')]
        imts_ok = len(imts_with_period) == len(oq.imtls)
        if (imts_ok and oq.pointsource_distance and
                oq.pointsource_distance.suggested()) or (
                    imts_ok and oq.minimum_intensity):
            aw, self.psd = get_effect(
                mags_by_trt, self.sitecol.one(), gsims_by_trt, oq)
            if len(vars(aw)) > 1:  # more than _extra
                self.datastore['effect_by_mag_dst'] = aw
        elif oq.pointsource_distance:
            self.psd = oq.pointsource_distance.interp(mags_by_trt)
        else:
            self.psd = {}
        smap = parallel.Starmap(classical, h5=self.datastore.hdf5,
                                num_cores=oq.num_cores)
        self.submit_tasks(smap)
        acc0 = self.acc0()  # create the rup/ datasets BEFORE swmr_on()
        self.datastore.swmr_on()
        smap.h5 = self.datastore.hdf5
        self.calc_times = AccumDict(accum=numpy.zeros(3, F32))
        try:
            acc = smap.reduce(self.agg_dicts, acc0)
            self.store_rlz_info(acc.eff_ruptures)
        finally:
            with self.monitor('store source_info'):
                self.store_source_info(self.calc_times)
            if self.by_task:
                logging.info('Storing by_task information')
                num_tasks = max(self.by_task) + 1,
                er = self.datastore.create_dset('by_task/eff_ruptures',
                                                U32, num_tasks)
                es = self.datastore.create_dset('by_task/eff_sites',
                                                U32, num_tasks)
                si = self.datastore.create_dset('by_task/srcids',
                                                hdf5.vstr, num_tasks,
                                                fillvalue=None)
                for task_no, rec in self.by_task.items():
                    effrups, effsites, srcids = rec
                    er[task_no] = effrups
                    es[task_no] = effsites
                    si[task_no] = ' '.join(srcids)
                self.by_task.clear()
        self.numrups = sum(arr[0] for arr in self.calc_times.values())
        numsites = sum(arr[1] for arr in self.calc_times.values())
        logging.info('Effective number of ruptures: {:_d}/{:_d}'.format(
            int(self.numrups), self.totrups))
        logging.info('Effective number of sites per rupture: %d',
                     numsites / self.numrups)
        if self.psd:
            psdist = max(max(self.psd[trt].values()) for trt in self.psd)
            if psdist != -1 and self.maxradius >= psdist / 2:
                logging.warning('The pointsource_distance of %d km is too '
                                'small compared to a maxradius of %d km',
                                psdist, self.maxradius)
        self.calc_times.clear()  # save a bit of memory
        return acc
예제 #30
0
    def execute(self):
        """
        Run in parallel `core_task(sources, sitecol, monitor)`, by
        parallelizing on the sources according to their weight and
        tectonic region type.
        """
        oq = self.oqparam
        if oq.hazard_calculation_id and not oq.compare_with_classical:
            with util.read(self.oqparam.hazard_calculation_id) as parent:
                self.full_lt = parent['full_lt']
            self.calc_stats()  # post-processing
            return {}

        assert oq.max_sites_per_tile > oq.max_sites_disagg, (
            oq.max_sites_per_tile, oq.max_sites_disagg)
        psd = self.set_psd()
        srcfilter = self.src_filter()
        performance.Monitor.save(self.datastore, 'srcfilter', srcfilter)
        srcs = self.csm.get_sources(atomic=False)
        if srcs:
            res = parallel.Starmap.apply(preclassical, (srcs, self.params),
                                         concurrent_tasks=oq.concurrent_tasks
                                         or 1,
                                         h5=self.datastore.hdf5).reduce()

            if oq.calculation_mode == 'preclassical':
                self.store_source_info(res['calc_times'], nsites=True)
                self.datastore['full_lt'] = self.csm.full_lt
                self.datastore.swmr_on()  # fixes HDF5 error in build_hazard
                return

            self.update_source_info(res['calc_times'], nsites=True)
            sources_by_grp = groupby(res['sources'],
                                     operator.attrgetter('grp_id'))
        else:
            for src in self.csm.get_sources(atomic=True):
                src.num_ruptures = src.count_ruptures()
                src.nsites = self.N
            sources_by_grp = {}
        self.csm.src_groups = [sg for sg in self.csm.src_groups if sg.atomic]
        if oq.ps_grid_spacing:
            smap = parallel.Starmap(
                grid_point_sources,
                h5=self.datastore.hdf5,
                distribute=None if len(sources_by_grp) > 1 else 'no')
            for grp_id, sources in sources_by_grp.items():
                smap.submit((sources, oq.ps_grid_spacing))
            dic = smap.reduce()
            before, after = 0, 0
            for grp_id, sources in sources_by_grp.items():
                before += len(sources)
                after += len(dic[grp_id])
                sg = SourceGroup(sources[0].tectonic_region_type)
                sg.sources = dic[grp_id]
                self.csm.src_groups.append(sg)
            logging.info('Reduced point sources %d->%d', before, after)
        else:
            for grp_id, sources in sources_by_grp.items():
                sg = SourceGroup(sources[0].tectonic_region_type)
                sg.sources = sources
                self.csm.src_groups.append(sg)
        smap = parallel.Starmap(classical, h5=self.datastore.hdf5)
        self.submit_tasks(smap)
        acc0 = self.acc0()  # create the rup/ datasets BEFORE swmr_on()
        self.datastore.swmr_on()
        smap.h5 = self.datastore.hdf5
        self.calc_times = AccumDict(accum=numpy.zeros(3, F32))
        try:
            acc = smap.reduce(self.agg_dicts, acc0)
            self.store_rlz_info(acc.eff_ruptures)
        finally:
            source_ids = self.store_source_info(self.calc_times)
            if self.by_task:
                logging.info('Storing by_task information')
                num_tasks = max(self.by_task) + 1,
                er = self.datastore.create_dset('by_task/eff_ruptures', U32,
                                                num_tasks)
                es = self.datastore.create_dset('by_task/eff_sites', U32,
                                                num_tasks)
                si = self.datastore.create_dset('by_task/srcids',
                                                hdf5.vstr,
                                                num_tasks,
                                                fillvalue=None)
                for task_no, rec in self.by_task.items():
                    effrups, effsites, srcids = rec
                    er[task_no] = effrups
                    es[task_no] = effsites
                    si[task_no] = ' '.join(source_ids[s] for s in srcids)
                self.by_task.clear()
        if self.calc_times:  # can be empty in case of errors
            self.numrups = sum(arr[0] for arr in self.calc_times.values())
            numsites = sum(arr[1] for arr in self.calc_times.values())
            logging.info('Effective number of ruptures: {:_d}/{:_d}'.format(
                int(self.numrups), self.totrups))
            logging.info('Effective number of sites per rupture: %d',
                         numsites / self.numrups)
        if psd:
            psdist = max(max(psd.ddic[trt].values()) for trt in psd.ddic)
            if psdist and self.maxradius >= psdist / 2:
                logging.warning(
                    'The pointsource_distance of %d km is too '
                    'small compared to a maxradius of %d km', psdist,
                    self.maxradius)
        self.calc_times.clear()  # save a bit of memory
        return acc