예제 #1
0
    def save_events(self, rup_array):
        """
        :param rup_array: an array of ruptures with fields grp_id
        :returns: a list of RuptureGetters
        """
        # this is very fast compared to saving the ruptures
        eids = rupture.get_eids(
            rup_array, self.samples_by_grp, self.num_rlzs_by_grp)
        self.check_overflow()  # check the number of events
        events = numpy.zeros(len(eids), rupture.events_dt)
        # when computing the events all ruptures must be considered,
        # including the ones far away that will be discarded later on
        rgetters = self.gen_rupture_getters()

        # build the associations eid -> rlz in parallel
        smap = parallel.Starmap(RuptureGetter.get_eid_rlz,
                                ((rgetter,) for rgetter in rgetters),
                                self.monitor('get_eid_rlz'),
                                progress=logging.debug)
        i = 0
        for eid_rlz in smap:  # 30 million of events associated in 1 minute!
            for er in eid_rlz:
                events[i] = er
                i += 1
        events.sort(order=['rlz', 'eid'])  # fast too
        n_unique_events = len(numpy.unique(events['eid']))
        assert n_unique_events == len(events), (n_unique_events, len(events))
        self.datastore['events'] = events
        indices = numpy.zeros((self.R, 2), U32)
        for r, [startstop] in get_indices(events['rlz']).items():
            indices[r] = startstop
        self.datastore.set_attrs('events', indices=indices)
예제 #2
0
    def save_events(self, rup_array):
        """
        :param rup_array: an array of ruptures with fields grp_id
        :returns: a list of RuptureGetters
        """
        # this is very fast compared to saving the ruptures

        eids = rupture.get_eids(rup_array, self.samples_by_grp,
                                self.num_rlzs_by_grp)

        self.check_overflow()  # check the number of events
        events = numpy.zeros(len(eids), rupture.events_dt)
        # when computing the events all ruptures must be considered,
        # including the ones far away that will be discarded later on
        rgetters = gen_rgetters(self.datastore)
        # build the associations eid -> rlz sequentially or in parallel
        # this is very fast: I saw 30 million events associated in 1 minute!

        logging.info('Building assocs event_id -> rlz_id for {:_d} events'
                     ' and {:_d} ruptures'.format(len(events), len(rup_array)))
        if len(events) < 1E5:
            it = map(RuptureGetter.get_eid_rlz, rgetters)
        else:

            # parallel composite array with the associations eid->rlz
            it = parallel.Starmap(RuptureGetter.get_eid_rlz,
                                  ((rgetter, ) for rgetter in rgetters),
                                  progress=logging.debug,
                                  h5=self.datastore.hdf5)
        i = 0
        for eid_rlz in it:
            for er in eid_rlz:
                events[i] = er
                i += 1
                if i >= TWO32:
                    raise ValueError('There are more than %d events!' % i)
        events.sort(order='rup_id')  # fast too
        # sanity check
        n_unique_events = len(numpy.unique(events[['id', 'rup_id']]))

        assert n_unique_events == len(events), (n_unique_events, len(events))

        events['id'] = numpy.arange(len(events))
        # set event year and event ses starting from 1
        itime = int(self.oqparam.investigation_time)
        nses = self.oqparam.ses_per_logic_tree_path
        extra = numpy.zeros(len(events), [('year', U16), ('ses_id', U16)])

        # seed for year and ses_id
        numpy.random.seed(self.oqparam.ses_seed)
        extra['year'] = numpy.random.choice(itime, len(events)) + 1
        extra['ses_id'] = numpy.random.choice(nses, len(events)) + 1
        self.datastore['events'] = util.compose_arrays(events, extra)
        eindices = get_indices(events['rup_id'])
        arr = numpy.array(list(eindices.values()))[:, 0, :]

        self.datastore['ruptures']['e0'] = arr[:, 0]
        self.datastore['ruptures']['e1'] = arr[:, 1]
예제 #3
0
 def combine(self, acc, res):
     aed = res.pop('aed', ())
     if len(aed) == 0:
         return acc + res
     for aid, [(i1, i2)] in get_indices(aed['aid']).items():
         self.datastore['dd_data/indices'][aid] = (
             self.start + i1, self.start + i2)
     self.start += len(aed)
     hdf5.extend(self.datastore['dd_data/data'], aed)
     return acc + res
예제 #4
0
파일: calc.py 프로젝트: kangwenhe/oq-engine
 def save_events(self, rup_array):
     """
     :param rup_array: an array of ruptures with fields et_id
     :returns: a list of RuptureGetters
     """
     from openquake.calculators.getters import (get_eid_rlz,
                                                gen_rupture_getters)
     # this is very fast compared to saving the ruptures
     E = rup_array['n_occ'].sum()
     self.check_overflow(E)  # check the number of events
     events = numpy.zeros(E, rupture.events_dt)
     # when computing the events all ruptures must be considered,
     # including the ones far away that will be discarded later on
     rgetters = gen_rupture_getters(self.datastore,
                                    self.oqparam.concurrent_tasks)
     # build the associations eid -> rlz sequentially or in parallel
     # this is very fast: I saw 30 million events associated in 1 minute!
     logging.info('Associating event_id -> rlz_id for {:_d} events '
                  'and {:_d} ruptures'.format(len(events), len(rup_array)))
     iterargs = ((rg.proxies, rg.rlzs_by_gsim) for rg in rgetters)
     if len(events) < 1E5:
         it = itertools.starmap(get_eid_rlz, iterargs)
     else:
         it = parallel.Starmap(get_eid_rlz,
                               iterargs,
                               progress=logging.debug,
                               h5=self.datastore.hdf5)
     i = 0
     for eid_rlz in it:
         for er in eid_rlz:
             events[i] = er
             i += 1
             if i >= TWO32:
                 raise ValueError('There are more than %d events!' % i)
     events.sort(order='rup_id')  # fast too
     # sanity check
     n_unique_events = len(numpy.unique(events[['id', 'rup_id']]))
     assert n_unique_events == len(events), (n_unique_events, len(events))
     events['id'] = numpy.arange(len(events))
     # set event year and event ses starting from 1
     nses = self.oqparam.ses_per_logic_tree_path
     extra = numpy.zeros(len(events), [('year', U32), ('ses_id', U32)])
     numpy.random.seed(self.oqparam.ses_seed)
     if self.oqparam.investigation_time:
         itime = int(self.oqparam.investigation_time)
         extra['year'] = numpy.random.choice(itime, len(events)) + 1
     extra['ses_id'] = numpy.random.choice(nses, len(events)) + 1
     self.datastore['events'] = util.compose_arrays(events, extra)
     eindices = get_indices(events['rup_id'])
     arr = numpy.array(list(eindices.values()))[:, 0, :]
     self.datastore['ruptures']['e0'] = arr[:, 0]
     self.datastore['ruptures']['e1'] = arr[:, 1]
예제 #5
0
 def combine(self, acc, res):
     """
     Combine the outputs from scenario_risk and incrementally store
     the asset loss table
     """
     ael = res.pop('ael', ())
     if len(ael) == 0:
         return acc + res
     for aid, [(i1, i2)] in get_indices(ael['asset_id']).items():
         self.datastore['loss_data/indices'][aid] = (self.start + i1,
                                                     self.start + i2)
     self.start += len(ael)
     hdf5.extend(self.datastore['loss_data/data'], ael)
     return acc + res
예제 #6
0
    def save_events(self, rup_array):
        """
        :param rup_array: an array of ruptures with fields grp_id
        :returns: a list of RuptureGetters
        """
        # this is very fast compared to saving the ruptures

        eids = rupture.get_eids_for_erf_based(rup_array, self.num_rlzs_by_grp)
        #eids = rupture.get_eids(rup_array, self.samples_by_grp, self.num_rlzs_by_grp)

        self.check_overflow()  # check the number of events
        events = numpy.zeros(len(eids), rupture.events_dt)
        # when computing the events all ruptures must be considered,
        # including the ones far away that will be discarded later on
        rgetters = gen_rgetters(self.datastore)
        # build the associations eid -> rlz sequentially or in parallel
        # this is very fast: I saw 30 million events associated in 1 minute!

        logging.info('Building assocs event_id -> rlz_id for {:_d} events'
                     ' and {:_d} ruptures'.format(len(events), len(rup_array)))
        if len(events) < 1E5:
            it = map(RuptureGetter.get_eid_rlz, rgetters)
        else:

            # parallel composite array with the associations eid->rlz
            it = parallel.Starmap(RuptureGetter.get_eid_rlz,
                                  ((rgetter, ) for rgetter in rgetters),
                                  progress=logging.debug,
                                  h5=self.datastore.hdf5)
        i = 0
        for eid_rlz in it:
            for er in eid_rlz:
                events[i] = er
                i += 1
                if i >= TWO32:
                    raise ValueError('There are more than %d events!' % i)
        events.sort(order='rup_id')  # fast too

        events['id'] = numpy.arange(
            len(events))  # one event per event -> same id of ruptures

        self.datastore['events'] = events

        eindices = get_indices(events['rup_id'])
        arr = numpy.array(list(eindices.values()))[:, 0, :]

        self.datastore['ruptures']['e0'] = arr[:, 0]
        self.datastore['ruptures']['e1'] = arr[:, 1]
예제 #7
0
    def save_events(self, rup_array):
        """
        :param rup_array: an array of ruptures with fields grp_id
        :returns: a list of RuptureGetters
        """
        # this is very fast compared to saving the ruptures
        eids = rupture.get_eids(rup_array, self.samples_by_grp,
                                self.num_rlzs_by_grp)
        self.check_overflow()  # check the number of events
        events = numpy.zeros(len(eids), rupture.events_dt)
        # when computing the events all ruptures must be considered,
        # including the ones far away that will be discarded later on
        rgetters = self.gen_rupture_getters()

        # build the associations eid -> rlz sequentially or in parallel
        # this is very fast: I saw 30 million events associated in 1 minute!
        logging.info(
            'Building associations event_id -> rlz_id for %d events'
            ' and %d ruptures', len(events), len(rup_array))
        if len(events) < 1E5:
            it = map(RuptureGetter.get_eid_rlz, rgetters)
        else:
            it = parallel.Starmap(RuptureGetter.get_eid_rlz,
                                  ((rgetter, ) for rgetter in rgetters),
                                  progress=logging.debug,
                                  hdf5path=self.datastore.filename)
        i = 0
        for eid_rlz in it:
            for er in eid_rlz:
                events[i] = er
                i += 1
                if i >= TWO32:
                    raise ValueError('There are more than %d events!' % i)
        events.sort(order='rup_id')  # fast too
        # sanity check
        n_unique_events = len(numpy.unique(events[['id', 'rup_id']]))
        assert n_unique_events == len(events), (n_unique_events, len(events))
        events['id'] = numpy.arange(len(events))
        self.datastore['events'] = events
        eindices = get_indices(events['rup_id'])
        arr = numpy.array(list(eindices.values()))[:, 0, :]
        self.datastore['eslices'] = arr  # shape (U, 2)
예제 #8
0
    def full_disaggregation(self):
        """
        Run the disaggregation phase.
        """
        oq = self.oqparam
        tl = oq.truncation_level
        src_filter = self.src_filter()
        if hasattr(self, 'csm'):
            for sg in self.csm.src_groups:
                if sg.atomic:
                    raise NotImplementedError(
                        'Atomic groups are not supported yet')
            if not self.csm.get_sources():
                raise RuntimeError('All sources were filtered away!')

        csm_info = self.datastore['csm_info']
        self.poes_disagg = oq.poes_disagg or (None, )
        self.imts = list(oq.imtls)

        self.ws = [rlz.weight for rlz in self.rlzs_assoc.realizations]
        self.pgetter = getters.PmapGetter(self.datastore, self.ws,
                                          self.sitecol.sids)

        # build array rlzs (N, Z)
        if oq.rlz_index is None:
            Z = oq.num_rlzs_disagg
            rlzs = numpy.zeros((self.N, Z), int)
            if self.R > 1:
                for sid in self.sitecol.sids:
                    curves = numpy.array(
                        [pc.array for pc in self.pgetter.get_pcurves(sid)])
                    mean = getters.build_stat_curve(curves, oq.imtls,
                                                    stats.mean_curve, self.ws)
                    rlzs[sid] = util.closest_to_ref(curves, mean.array)[:Z]
                self.datastore['best_rlzs'] = rlzs
        else:
            Z = len(oq.rlz_index)
            rlzs = numpy.zeros((self.N, Z), int)
            for z in range(Z):
                rlzs[:, z] = oq.rlz_index[z]
        assert Z <= self.R, (Z, self.R)
        self.Z = Z
        self.rlzs = rlzs

        if oq.iml_disagg:
            # no hazard curves are needed
            self.poe_id = {None: 0}
            curves = [[None for z in range(Z)] for s in range(self.N)]
            self.ok_sites = set(self.sitecol.sids)
        else:
            self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)}
            curves = [
                self.get_curve(sid, rlzs[sid]) for sid in self.sitecol.sids
            ]
            self.ok_sites = set(self.check_poes_disagg(curves, rlzs))
        self.iml4 = _iml4(rlzs, oq.iml_disagg, oq.imtls, self.poes_disagg,
                          curves)
        if oq.disagg_by_src:
            self.build_disagg_by_src(rlzs)

        eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1)

        # build trt_edges
        trts = tuple(csm_info.trts)
        trt_num = {trt: i for i, trt in enumerate(trts)}
        self.trts = trts

        # build mag_edges
        min_mag = csm_info.min_mag
        max_mag = csm_info.max_mag
        mag_edges = oq.mag_bin_width * numpy.arange(
            int(numpy.floor(min_mag / oq.mag_bin_width)),
            int(numpy.ceil(max_mag / oq.mag_bin_width) + 1))

        # build dist_edges
        maxdist = max(oq.maximum_distance(trt) for trt in trts)
        dist_edges = oq.distance_bin_width * numpy.arange(
            0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1))

        # build eps_edges
        eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1)

        # build lon_edges, lat_edges per sid
        bbs = src_filter.get_bounding_boxes(mag=max_mag)
        lon_edges, lat_edges = {}, {}  # by sid
        for sid, bb in zip(self.sitecol.sids, bbs):
            lon_edges[sid], lat_edges[sid] = disagg.lon_lat_bins(
                bb, oq.coordinate_bin_width)
        self.bin_edges = mag_edges, dist_edges, lon_edges, lat_edges, eps_edges
        self.save_bin_edges()

        self.imldict = {}  # sid, rlz, poe, imt -> iml
        for s in self.sitecol.sids:
            for z, rlz in enumerate(rlzs[s]):
                logging.info('Site #%d, disaggregating for rlz=#%d', s, rlz)
                for p, poe in enumerate(self.poes_disagg):
                    for m, imt in enumerate(oq.imtls):
                        self.imldict[s, rlz, poe, imt] = self.iml4[s, m, p, z]

        # submit disagg tasks
        gid = self.datastore['rup/grp_id'][()]
        indices_by_grp = get_indices(gid)  # grp_id -> [(start, stop),...]
        blocksize = len(gid) // (oq.concurrent_tasks or 1) + 1
        # NB: removing the blocksize causes slow disaggregation tasks
        allargs = []
        dstore = (self.datastore.parent
                  if self.datastore.parent else self.datastore)
        for grp_id, trt in csm_info.trt_by_grp.items():
            trti = trt_num[trt]
            rlzs_by_gsim = self.rlzs_assoc.get_rlzs_by_gsim(grp_id)
            cmaker = ContextMaker(
                trt, rlzs_by_gsim, {
                    'truncation_level': oq.truncation_level,
                    'maximum_distance': src_filter.integration_distance,
                    'filter_distance': oq.filter_distance,
                    'imtls': oq.imtls
                })
            for start, stop in indices_by_grp[grp_id]:
                for slc in gen_slices(start, stop, blocksize):
                    allargs.append((dstore, slc, self.sitecol, oq, cmaker,
                                    self.iml4, trti, self.bin_edges))
        results = parallel.Starmap(compute_disagg,
                                   allargs,
                                   h5=self.datastore.hdf5).reduce(
                                       self.agg_result, AccumDict(accum={}))
        return results  # sid -> trti-> 8D array
    def full_disaggregation(self):
        """
        Run the disaggregation phase.
        """
        oq = self.oqparam
        tl = oq.truncation_level
        src_filter = self.src_filter()
        if hasattr(self, 'csm'):
            for sg in self.csm.src_groups:
                if sg.atomic:
                    raise NotImplementedError(
                        'Atomic groups are not supported yet')
            if not self.csm.get_sources():
                raise RuntimeError('All sources were filtered away!')

        csm_info = self.datastore['csm_info']
        self.poes_disagg = oq.poes_disagg or (None, )
        self.imts = list(oq.imtls)
        if oq.rlz_index is None:
            try:
                rlzs = self.datastore['best_rlz'][()]
            except KeyError:
                rlzs = numpy.zeros(self.N, int)
        else:
            rlzs = [oq.rlz_index] * self.N

        if oq.iml_disagg:
            self.poe_id = {None: 0}
            curves = [None] * len(self.sitecol)  # no hazard curves are needed
            self.ok_sites = set(self.sitecol.sids)
        else:
            self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)}
            curves = [self.get_curve(sid, rlzs) for sid in self.sitecol.sids]
            self.ok_sites = set(self.check_poes_disagg(curves, rlzs))
        self.iml2s = _iml2s(rlzs, oq.iml_disagg, oq.imtls, self.poes_disagg,
                            curves)
        if oq.disagg_by_src:
            self.build_disagg_by_src()

        eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1)

        # build trt_edges
        trts = tuple(csm_info.trts)
        trt_num = {trt: i for i, trt in enumerate(trts)}
        self.trts = trts

        # build mag_edges
        min_mag = csm_info.min_mag
        max_mag = csm_info.max_mag
        mag_edges = oq.mag_bin_width * numpy.arange(
            int(numpy.floor(min_mag / oq.mag_bin_width)),
            int(numpy.ceil(max_mag / oq.mag_bin_width) + 1))

        # build dist_edges
        maxdist = max(oq.maximum_distance(trt, max_mag) for trt in trts)
        dist_edges = oq.distance_bin_width * numpy.arange(
            0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1))

        # build eps_edges
        eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1)

        # build lon_edges, lat_edges per sid
        bbs = src_filter.get_bounding_boxes(mag=max_mag)
        lon_edges, lat_edges = {}, {}  # by sid
        for sid, bb in zip(self.sitecol.sids, bbs):
            lon_edges[sid], lat_edges[sid] = disagg.lon_lat_bins(
                bb, oq.coordinate_bin_width)
        self.bin_edges = mag_edges, dist_edges, lon_edges, lat_edges, eps_edges
        self.save_bin_edges()

        self.imldict = {}  # sid, rlzi, poe, imt -> iml
        for s in self.sitecol.sids:
            iml2 = self.iml2s[s]
            r = rlzs[s]
            logging.info('Site #%d, disaggregating for rlz=#%d', s, r)
            for p, poe in enumerate(self.poes_disagg):
                for m, imt in enumerate(oq.imtls):
                    self.imldict[s, r, poe, imt] = iml2[m, p]

        # submit disagg tasks
        gid = self.datastore['rup/grp_id'][()]
        indices_by_grp = get_indices(gid)  # grp_id -> [(start, stop),...]
        blocksize = len(gid) // (oq.concurrent_tasks or 1) + 1
        allargs = []
        for grp_id, trt in csm_info.trt_by_grp.items():
            trti = trt_num[trt]
            rlzs_by_gsim = self.rlzs_assoc.get_rlzs_by_gsim(grp_id)
            cmaker = ContextMaker(
                trt, rlzs_by_gsim, {
                    'truncation_level': oq.truncation_level,
                    'maximum_distance': src_filter.integration_distance,
                    'filter_distance': oq.filter_distance,
                    'imtls': oq.imtls
                })
            for start, stop in indices_by_grp[grp_id]:
                for slc in gen_slices(start, stop, blocksize):
                    allargs.append((self.datastore, slc, cmaker, self.iml2s,
                                    trti, self.bin_edges))
        results = parallel.Starmap(compute_disagg,
                                   allargs,
                                   h5=self.datastore.hdf5).reduce(
                                       self.agg_result, AccumDict(accum={}))
        return results  # sid -> trti-> 7D array
예제 #10
0
    def build_events_from_sources(self):
        """
        Prefilter the composite source model and store the source_info
        """
        oq = self.oqparam
        gsims_by_trt = self.csm.gsim_lt.values
        logging.info('Building ruptures')
        smap = parallel.Starmap(self.build_ruptures.__func__,
                                hdf5path=self.datastore.filename)
        eff_ruptures = AccumDict(accum=0)  # grp_id => potential ruptures
        calc_times = AccumDict(accum=numpy.zeros(2, F32))
        ses_idx = 0
        for sm_id, sm in enumerate(self.csm.source_models):
            logging.info('Sending %s', sm)
            for sg in sm.src_groups:
                if not sg.sources:
                    continue
                par = self.param.copy()
                par['gsims'] = gsims_by_trt[sg.trt]
                if sg.atomic:  # do not split the group
                    smap.submit(sg, self.src_filter, par)
                else:  # traditional groups
                    for block in self.block_splitter(sg.sources, key=by_grp):
                        if 'ucerf' in oq.calculation_mode:
                            for i in range(oq.ses_per_logic_tree_path):
                                par['ses_seeds'] = [(ses_idx,
                                                     oq.ses_seed + i + 1)]
                                smap.submit(block, self.src_filter, par)
                                ses_idx += 1
                        else:
                            smap.submit(block, self.src_filter, par)
        mon = self.monitor('saving ruptures')
        for dic in smap:
            if dic['calc_times']:
                calc_times += dic['calc_times']
            if dic['eff_ruptures']:
                eff_ruptures += dic['eff_ruptures']
            if dic['rup_array']:
                with mon:
                    self.rupser.save(dic['rup_array'])
        self.rupser.close()
        if not self.rupser.nruptures:
            raise RuntimeError('No ruptures were generated, perhaps the '
                               'investigation time is too short')

        # logic tree reduction, must be called before storing the events
        self.store_rlz_info(eff_ruptures)
        self.init_logic_tree(self.csm.info)
        with self.monitor('store source_info', autoflush=True):
            self.store_source_info(calc_times)
        logging.info('Reordering the ruptures and storing the events')
        attrs = self.datastore.getitem('ruptures').attrs
        sorted_ruptures = self.datastore.getitem('ruptures')[()]
        # order the ruptures by serial
        sorted_ruptures.sort(order='serial')
        ngroups = len(self.csm.info.trt_by_grp)
        grp_indices = numpy.zeros((ngroups, 2), U32)
        grp_ids = sorted_ruptures['grp_id']
        for grp_id, [startstop] in get_indices(grp_ids).items():
            grp_indices[grp_id] = startstop
        self.datastore['ruptures'] = sorted_ruptures
        self.datastore.set_attrs('ruptures', grp_indices=grp_indices, **attrs)
        with self.monitor('saving events'):
            self.save_events(sorted_ruptures)
예제 #11
0
    def build_events_from_sources(self):
        """
        Prefilter the composite source model and store the source_info
        """
        oq = self.oqparam
        gsims_by_trt = self.csm.gsim_lt.values

        def weight_src(src):
            return src.num_ruptures

        logging.info('Building ruptures')
        smap = parallel.Starmap(
            self.build_ruptures.__func__, monitor=self.monitor())
        eff_ruptures = AccumDict(accum=0)  # grp_id => potential ruptures
        calc_times = AccumDict(accum=numpy.zeros(2, F32))
        ses_idx = 0
        for sm_id, sm in enumerate(self.csm.source_models):
            logging.info('Sending %s', sm)
            for sg in sm.src_groups:
                if not sg.sources:
                    continue
                par = self.param.copy()
                par['gsims'] = gsims_by_trt[sg.trt]
                if sg.atomic:  # do not split the group
                    smap.submit(sg, self.src_filter, par)
                else:  # traditional groups
                    for block in self.block_splitter(
                            sg.sources, weight_src, by_grp):
                        if 'ucerf' in oq.calculation_mode:
                            for i in range(oq.ses_per_logic_tree_path):
                                par['ses_seeds'] = [
                                    (ses_idx, oq.ses_seed + i + 1)]
                                smap.submit(block, self.src_filter, par)
                                ses_idx += 1
                        else:
                            smap.submit(block, self.src_filter, par)
        mon = self.monitor('saving ruptures')
        for dic in smap:
            if dic['calc_times']:
                calc_times += dic['calc_times']
            if dic['eff_ruptures']:
                eff_ruptures += dic['eff_ruptures']
            if dic['rup_array']:
                with mon:
                    self.rupser.save(dic['rup_array'])
        self.rupser.close()
        if not self.rupser.nruptures:
            raise RuntimeError('No ruptures were generated, perhaps the '
                               'investigation time is too short')

        # logic tree reduction, must be called before storing the events
        self.store_rlz_info(eff_ruptures)
        store_rlzs_by_grp(self.datastore)
        self.init_logic_tree(self.csm.info)
        with self.monitor('store source_info', autoflush=True):
            self.store_source_info(calc_times)
        logging.info('Reordering the ruptures and storing the events')
        attrs = self.datastore.getitem('ruptures').attrs
        sorted_ruptures = self.datastore.getitem('ruptures')[()]
        # order the ruptures by serial
        sorted_ruptures.sort(order='serial')
        ngroups = len(self.csm.info.trt_by_grp)
        grp_indices = numpy.zeros((ngroups, 2), U32)
        grp_ids = sorted_ruptures['grp_id']
        for grp_id, [startstop] in get_indices(grp_ids).items():
            grp_indices[grp_id] = startstop
        self.datastore['ruptures'] = sorted_ruptures
        self.datastore.set_attrs('ruptures', grp_indices=grp_indices, **attrs)
        self.save_events(sorted_ruptures)
예제 #12
0
def calc_risk(gmfs, param, monitor):
    mon_risk = monitor('computing risk', measuremem=False)
    mon_agg = monitor('aggregating losses', measuremem=False)
    eids = numpy.unique(gmfs['eid'])
    dstore = datastore.read(param['hdf5path'])
    with monitor('getting assets'):
        assetcol = dstore['assetcol']
        assets_by_site = assetcol.assets_by_site()
        exposed_values = dstore['exposed_values/agg'][()]
    with monitor('getting crmodel'):
        crmodel = riskmodels.CompositeRiskModel.read(dstore)
        events = dstore['events'][list(eids)]
        weights = dstore['weights'][()]
    E = len(eids)
    L = len(param['lba'].loss_names)
    elt_dt = [('event_id', U32), ('rlzi', U16), ('loss', (F32, (L, )))]
    alt = general.AccumDict(accum=numpy.zeros(L, F32))  # aid, eid -> loss
    arr = numpy.zeros((E, L), F32)
    acc = dict(events_per_sid=0, numlosses=numpy.zeros(2, int))  # (kept, tot)
    lba = param['lba']
    tempname = param['tempname']
    eid2rlz = dict(events[['id', 'rlz_id']])
    eid2idx = {eid: idx for idx, eid in enumerate(eids)}

    minimum_loss = []
    fraction = param['minimum_loss_fraction'] / len(assetcol)
    for lt, lti in crmodel.lti.items():
        val = exposed_values[lti] * fraction
        minimum_loss.append(val)
        if lt in lba.policy_dict:  # same order as in lba.compute
            minimum_loss.append(val)

    for sid, haz in general.group_array(gmfs, 'sid').items():
        assets_on_sid = assets_by_site[sid]
        if len(assets_on_sid) == 0:
            continue
        acc['events_per_sid'] += len(haz)
        if param['avg_losses']:
            ws = weights[[eid2rlz[eid] for eid in haz['eid']]]
        assets_by_taxo = get_assets_by_taxo(assets_on_sid, tempname)
        eidx = numpy.array([eid2idx[eid] for eid in haz['eid']])
        with mon_risk:
            out = get_output(crmodel, assets_by_taxo, haz)
        for lti, lt in enumerate(crmodel.loss_types):
            lratios = out[lt]
            if lt == 'occupants':
                field = 'occupants_None'
            else:
                field = 'value-' + lt
            for a, asset in enumerate(assets_on_sid):
                aid = asset['ordinal']
                ls = asset[field] * lratios[a]
                for loss_idx, losses in lba.compute(asset, ls, lt):
                    kept = 0
                    with mon_agg:
                        if param['aggregate_by']:
                            for loss, eid in zip(losses, out.eids):
                                if loss >= minimum_loss[loss_idx]:
                                    alt[aid, eid][loss_idx] = loss
                                    kept += 1
                        arr[eidx, loss_idx] += losses
                    if param['avg_losses']:  # this is really fast
                        lba.losses_by_A[aid, loss_idx] += losses @ ws
                    acc['numlosses'] += numpy.array([kept, len(losses)])
    if len(gmfs):
        acc['events_per_sid'] /= len(gmfs)
    acc['elt'] = numpy.fromiter(  # this is ultra-fast
        ((event['id'], event['rlz_id'], losses)
         for event, losses in zip(events, arr) if losses.sum()), elt_dt)
    acc['alt'] = alt = numpy.fromiter(  # already sorted by aid
        ((aid, eid, eid2rlz[eid], loss) for (aid, eid), loss in alt.items()),
        param['ael_dt'])
    alt.sort(order='rlzi')
    acc['indices'] = general.get_indices(alt['rlzi'])
    if param['avg_losses']:
        acc['losses_by_A'] = param['lba'].losses_by_A * param['ses_ratio']
        # without resetting the cache the sequential avg_losses would be wrong!
        del param['lba'].__dict__['losses_by_A']
    return acc