Ejemplo n.º 1
0
class EventBasedCalculator(base.HazardCalculator):
    """
    Event based PSHA calculator generating the ground motion fields and
    the hazard curves from the ruptures, depending on the configuration
    parameters.
    """
    core_task = compute_hazard
    is_stochastic = True

    def gen_args(self, monitor):
        """
        :yields: the arguments for compute_gmfs_and_curves
        """
        oq = self.oqparam
        param = dict(oqparam=oq,
                     min_iml=self.get_min_iml(oq),
                     truncation_level=oq.truncation_level,
                     imtls=oq.imtls,
                     filter_distance=oq.filter_distance,
                     seed=oq.ses_seed,
                     maximum_distance=oq.maximum_distance,
                     ses_per_logic_tree_path=oq.ses_per_logic_tree_path)
        concurrent_tasks = oq.concurrent_tasks
        if oq.hazard_calculation_id:
            U = len(self.datastore.parent['ruptures'])
            logging.info('Found %d ruptures', U)
            parent = self.can_read_parent() or self.datastore
            samples_by_grp = self.csm_info.get_samples_by_grp()
            for slc in split_in_slices(U, concurrent_tasks or 1):
                for grp_id in self.rlzs_by_gsim_grp:
                    rlzs_by_gsim = self.rlzs_by_gsim_grp[grp_id]
                    ruptures = RuptureGetter(parent, slc, grp_id)
                    param['samples'] = samples_by_grp[grp_id]
                    yield ruptures, self.sitecol, rlzs_by_gsim, param, monitor
            return

        maxweight = self.csm.get_maxweight(weight, concurrent_tasks or 1)
        logging.info('Using maxweight=%d', maxweight)
        num_tasks = 0
        num_sources = 0
        for sm in self.csm.source_models:
            param['samples'] = sm.samples
            for sg in sm.src_groups:
                rlzs_by_gsim = self.rlzs_by_gsim_grp[sg.id]
                self.csm.add_infos(sg.sources)
                if sg.src_interdep == 'mutex':  # do not split
                    yield sg, self.src_filter, rlzs_by_gsim, param, monitor
                    num_tasks += 1
                    num_sources += len(sg.sources)
                    continue
                for block in block_splitter(sg.sources, maxweight, weight):
                    yield block, self.src_filter, rlzs_by_gsim, param, monitor
                    num_tasks += 1
                    num_sources += len(block)
        logging.info('Sent %d sources in %d tasks', num_sources, num_tasks)

    def zerodict(self):
        """
        Initial accumulator, a dictionary (grp_id, gsim) -> curves
        """
        if self.oqparam.hazard_calculation_id is None:
            # filter_csm must be called first
            self.src_filter, self.csm = self.filter_csm()
            self.csm_info = self.csm.info
        else:
            self.datastore.parent = datastore.read(
                self.oqparam.hazard_calculation_id)
            self.csm_info = self.datastore.parent['csm_info']
        self.rlzs_by_gsim_grp = self.csm_info.get_rlzs_by_gsim_grp()
        self.L = len(self.oqparam.imtls.array)
        self.R = self.csm_info.get_num_rlzs()
        zd = AccumDict({r: ProbabilityMap(self.L) for r in range(self.R)})
        zd.eff_ruptures = AccumDict()
        self.grp_trt = self.csm_info.grp_by("trt")
        return zd

    def agg_dicts(self, acc, result):
        """
        :param acc: accumulator dictionary
        :param result: an AccumDict with events, ruptures, gmfs and hcurves
        """
        oq = self.oqparam
        if oq.save_ruptures and not oq.ground_motion_fields:
            self.gmf_size += max_gmf_size(
                result['ruptures'], self.csm_info.rlzs_assoc.get_rlzs_by_gsim,
                self.csm_info.get_samples_by_grp(), len(self.oqparam.imtls))
        if hasattr(result, 'calc_times'):
            for srcid, nsites, eids, dt in result.calc_times:
                info = self.csm.infos[srcid]
                info.num_sites += nsites
                info.calc_time += dt
                info.num_split += 1
                info.events += len(eids)
        if hasattr(result, 'eff_ruptures'):
            acc.eff_ruptures += result.eff_ruptures
        if hasattr(result, 'events'):
            self.datastore.extend('events', result.events)
        self.save_ruptures(result['ruptures'])
        sav_mon = self.monitor('saving gmfs')
        agg_mon = self.monitor('aggregating hcurves')
        if 'gmdata' in result:
            self.gmdata += result['gmdata']
            data = result['gmfdata']
            with sav_mon:
                self.datastore.extend('gmf_data/data', data)
                # it is important to save the number of bytes while the
                # computation is going, to see the progress
                update_nbytes(self.datastore, 'gmf_data/data', data)
                for sid, start, stop in result['indices']:
                    self.indices[sid, 0].append(start + self.offset)
                    self.indices[sid, 1].append(stop + self.offset)
                self.offset += len(data)
                if self.offset >= TWO32:
                    raise RuntimeError(
                        'The gmf_data table has more than %d rows' % TWO32)
        imtls = self.oqparam.imtls
        with agg_mon:
            for key, poes in result.get('hcurves', {}).items():
                r, sid, imt = str2rsi(key)
                array = acc[r].setdefault(sid, 0).array[imtls(imt), 0]
                array[:] = 1. - (1. - array) * (1. - poes)
        sav_mon.flush()
        agg_mon.flush()
        self.datastore.flush()
        return acc

    def save_ruptures(self, ruptures_by_grp_id):
        """
        Extend the 'events' dataset with the events from the given ruptures;
        also, save the ruptures if the flag `save_ruptures` is on.

        :param ruptures_by_grp_id: a dictionary grp_id -> list of EBRuptures
        """
        with self.monitor('saving ruptures', autoflush=True):
            for grp_id, ebrs in ruptures_by_grp_id.items():
                if len(ebrs):
                    events = get_events(ebrs)
                    dset = self.datastore.extend('events', events)
                    if self.oqparam.save_ruptures:
                        self.rupser.save(ebrs, eidx=len(dset) - len(events))

    def check_overflow(self):
        """
        Raise a ValueError if the number of sites is larger than 65,536 or the
        number of IMTs is larger than 256 or the number of ruptures is larger
        than 4,294,967,296. The limits are due to the numpy dtype used to
        store the GMFs (gmv_dt). They could be relaxed in the future.
        """
        max_ = dict(sites=2**16, events=2**32, imts=2**8)
        try:
            events = len(self.datastore['events'])
        except KeyError:
            events = 0
        num_ = dict(sites=len(self.sitecol),
                    events=events,
                    imts=len(self.oqparam.imtls))
        for var in max_:
            if num_[var] > max_[var]:
                raise ValueError('The event based calculator is restricted to '
                                 '%d %s, got %d' % (max_[var], var, num_[var]))

    def execute(self):
        if self.oqparam.hazard_calculation_id:

            def saving_sources_by_task(allargs, dstore):
                return allargs
        else:
            from openquake.calculators.classical import saving_sources_by_task
        self.gmdata = {}
        self.offset = 0
        self.gmf_size = 0
        self.indices = collections.defaultdict(list)  # sid, idx -> indices
        acc = self.zerodict()
        with self.monitor('managing sources', autoflush=True):
            allargs = self.gen_args(self.monitor('classical'))
            iterargs = saving_sources_by_task(allargs, self.datastore)
            if isinstance(allargs, list):
                # there is a trick here: if the arguments are known
                # (a list, not an iterator), keep them as a list
                # then the Starmap will understand the case of a single
                # argument tuple and it will run in core the task
                iterargs = list(iterargs)
            if self.oqparam.ground_motion_fields is False:
                logging.info('Generating ruptures only')
            ires = parallel.Starmap(self.core_task.__func__,
                                    iterargs).submit_all()
        acc = ires.reduce(self.agg_dicts, acc)
        if self.oqparam.hazard_calculation_id is None:
            with self.monitor('store source_info', autoflush=True):
                self.store_source_info(self.csm.infos, acc)
        self.check_overflow()  # check the number of events
        base.save_gmdata(self, self.R)
        if self.indices:
            N = len(self.sitecol.complete)
            logging.info('Saving gmf_data/indices')
            with self.monitor('saving gmf_data/indices',
                              measuremem=True,
                              autoflush=True):
                dset = self.datastore.create_dset('gmf_data/indices',
                                                  hdf5.vuint32,
                                                  shape=(N, 2),
                                                  fillvalue=None)
                for sid in self.sitecol.complete.sids:
                    dset[sid, 0] = self.indices[sid, 0]
                    dset[sid, 1] = self.indices[sid, 1]
        elif (self.oqparam.ground_motion_fields
              and 'ucerf' not in self.oqparam.calculation_mode):
            raise RuntimeError('No GMFs were generated, perhaps they were '
                               'all below the minimum_intensity threshold')
        return acc

    def save_gmf_bytes(self):
        """Save the attribute nbytes in the gmf_data datasets"""
        ds = self.datastore
        for sm_id in ds['gmf_data']:
            ds.set_nbytes('gmf_data/' + sm_id)
        ds.set_nbytes('gmf_data')

    def init(self):
        """
        Set the random seed passed to the SourceManager and the
        minimum_intensity dictionary.
        """
        self.rupser = calc.RuptureSerializer(self.datastore)

    def post_execute(self, result):
        """
        Save the SES collection
        """
        oq = self.oqparam
        N = len(self.sitecol.complete)
        L = len(oq.imtls.array)
        if oq.hazard_calculation_id is None:
            self.rupser.close()
            num_events = sum(set_counts(self.datastore, 'events').values())
            if num_events == 0:
                raise RuntimeError(
                    'No seismic events! Perhaps the investigation time is too '
                    'small or the maximum_distance is too small')
            if oq.save_ruptures:
                logging.info('Setting %d event years on %d ruptures',
                             num_events, self.rupser.nruptures)
            with self.monitor('setting event years',
                              measuremem=True,
                              autoflush=True):
                numpy.random.seed(self.oqparam.ses_seed)
                set_random_years(self.datastore, 'events',
                                 int(self.oqparam.investigation_time))

        if self.gmf_size:
            self.datastore.set_attrs('events', max_gmf_size=self.gmf_size)
            msg = 'less than ' if self.get_min_iml(self.oqparam).sum() else ''
            logging.info('Generating %s%s of GMFs', msg,
                         humansize(self.gmf_size))

        if oq.hazard_curves_from_gmfs:
            rlzs = self.csm_info.rlzs_assoc.realizations
            # compute and save statistics; this is done in process and can
            # be very slow if there are thousands of realizations
            weights = [rlz.weight for rlz in rlzs]
            hstats = self.oqparam.hazard_stats()
            if len(hstats):
                logging.info('Computing statistical hazard curves')
                for kind, stat in hstats:
                    pmap = compute_pmap_stats(result.values(), [stat], weights)
                    arr = numpy.zeros((N, L), F32)
                    for sid in pmap:
                        arr[sid] = pmap[sid].array[:, 0]
                    self.datastore['hcurves/' + kind] = arr
            self.save_hmaps()
        if self.datastore.parent:
            self.datastore.parent.open('r')
        if 'gmf_data' in self.datastore:
            self.save_gmf_bytes()
        if oq.compare_with_classical:  # compute classical curves
            export_dir = os.path.join(oq.export_dir, 'cl')
            if not os.path.exists(export_dir):
                os.makedirs(export_dir)
            oq.export_dir = export_dir
            # one could also set oq.number_of_logic_tree_samples = 0
            self.cl = ClassicalCalculator(oq)
            # TODO: perhaps it is possible to avoid reprocessing the source
            # model, however usually this is quite fast and do not dominate
            # the computation
            self.cl.run(close=False)
            cl_mean_curves = get_mean_curves(self.cl.datastore)
            eb_mean_curves = get_mean_curves(self.datastore)
            rdiff, index = util.max_rel_diff_index(cl_mean_curves,
                                                   eb_mean_curves)
            logging.warn(
                'Relative difference with the classical '
                'mean curves: %d%% at site index %d', rdiff * 100, index)
Ejemplo n.º 2
0
class EventBasedCalculator(base.HazardCalculator):
    """
    Event based PSHA calculator generating the ground motion fields and
    the hazard curves from the ruptures, depending on the configuration
    parameters.
    """
    core_task = compute_gmfs
    is_stochastic = True

    @cached_property
    def csm_info(self):
        """
        :returns: a cached CompositionInfo object
        """
        try:
            return self.csm.info
        except AttributeError:
            return self.datastore.parent['csm_info']

    def init(self):
        if hasattr(self, 'csm'):
            self.check_floating_spinning()
        self.rupser = calc.RuptureSerializer(self.datastore)
        self.rlzs_by_gsim_grp = self.csm_info.get_rlzs_by_gsim_grp()
        self.samples_by_grp = self.csm_info.get_samples_by_grp()

    def from_ruptures(self, param, monitor):
        """
        :yields: the arguments for compute_gmfs_and_curves
        """
        oq = self.oqparam
        concurrent_tasks = oq.concurrent_tasks
        U = len(self.datastore.parent['ruptures'])
        logging.info('Found %d ruptures', U)
        parent = self.can_read_parent() or self.datastore
        for slc in split_in_slices(U, concurrent_tasks or 1):
            for grp_id in self.rlzs_by_gsim_grp:
                rlzs_by_gsim = self.rlzs_by_gsim_grp[grp_id]
                ruptures = RuptureGetter(parent, slc, grp_id)
                par = param.copy()
                par['samples'] = self.samples_by_grp[grp_id]
                yield ruptures, self.sitecol, rlzs_by_gsim, par, monitor

    def zerodict(self):
        """
        Initial accumulator, a dictionary (grp_id, gsim) -> curves
        """
        self.R = self.csm_info.get_num_rlzs()
        self.L = len(self.oqparam.imtls.array)
        zd = AccumDict({r: ProbabilityMap(self.L) for r in range(self.R)})
        zd.eff_ruptures = AccumDict()
        self.grp_trt = self.csm_info.grp_by("trt")
        return zd

    def _store_ruptures(self, ires):
        gmf_size = 0
        calc_times = AccumDict(accum=numpy.zeros(3, F32))
        for srcs in ires:
            for src in srcs:
                # save the events always; save the ruptures
                # if oq.save_ruptures is true
                self.save_ruptures(src.eb_ruptures)
                gmf_size += max_gmf_size(
                    {src.src_group_id: src.eb_ruptures},
                    self.rlzs_by_gsim_grp,
                    self.samples_by_grp,
                    len(self.oqparam.imtls))
                calc_times += src.calc_times
                del src.calc_times
                yield from src.eb_ruptures
                del src.eb_ruptures
        self.rupser.close()
        if gmf_size:
            self.datastore.set_attrs('events', max_gmf_size=gmf_size)
            msg = 'less than ' if self.get_min_iml(self.oqparam).sum() else ''
            logging.info('Estimating %s%s of GMFs', msg, humansize(gmf_size))

        with self.monitor('store source_info', autoflush=True):
            self.store_source_info(calc_times)
            eff_ruptures = {
                grp.id: sum(src.num_ruptures for src in grp)
                for grp in self.csm.src_groups}
            self.store_csm_info(eff_ruptures)

    def from_sources(self, par, monitor):
        """
        Prefilter the composite source model and store the source_info
        """
        self.R = self.csm.info.get_num_rlzs()
        num_rlzs = {grp_id: sum(
            len(rlzs) for rlzs in self.rlzs_by_gsim_grp[grp_id].values())
                    for grp_id in self.rlzs_by_gsim_grp}
        param = {'ruptures_per_block': RUPTURES_PER_BLOCK}
        param['filter_distance'] = self.oqparam.filter_distance
        param['ses_per_logic_tree_path'] = self.oqparam.ses_per_logic_tree_path
        param['gsims_by_trt'] = self.csm.gsim_lt.values
        param['pointsource_distance'] = self.oqparam.pointsource_distance
        logging.info('Building ruptures')
        ires = parallel.Starmap.apply(
            build_ruptures,
            (self.csm.get_sources(), self.src_filter, param, monitor),
            concurrent_tasks=self.oqparam.concurrent_tasks,
            weight=operator.attrgetter('num_ruptures'),
            key=operator.attrgetter('src_group_id'))

        def weight(ebr):
            return numpy.sqrt(num_rlzs[ebr.grp_id] * ebr.multiplicity *
                              len(ebr.sids))
        for ruptures in block_splitter(self._store_ruptures(ires), BLOCKSIZE,
                                       weight, operator.attrgetter('grp_id')):
            ebr = ruptures[0]
            rlzs_by_gsim = self.rlzs_by_gsim_grp[ebr.grp_id]
            par = par.copy()
            par['samples'] = self.samples_by_grp[ebr.grp_id]
            yield ruptures, self.src_filter, rlzs_by_gsim, par, monitor

        self.setting_events()
        if self.oqparam.ground_motion_fields:
            logging.info('Building GMFs')

    def agg_dicts(self, acc, result):
        """
        :param acc: accumulator dictionary
        :param result: an AccumDict with events, ruptures, gmfs and hcurves
        """
        # in UCERF
        if hasattr(result, 'ruptures_by_grp'):
            for ruptures in result.ruptures_by_grp.values():
                self.save_ruptures(ruptures)
        elif hasattr(result, 'events_by_grp'):
            for grp_id in result.events_by_grp:
                events = result.events_by_grp[grp_id]
                self.datastore.extend('events', events)
        sav_mon = self.monitor('saving gmfs')
        agg_mon = self.monitor('aggregating hcurves')
        if 'gmdata' in result:
            self.gmdata += result['gmdata']
            data = result.pop('gmfdata')
            with sav_mon:
                self.datastore.extend('gmf_data/data', data)
                # it is important to save the number of bytes while the
                # computation is going, to see the progress
                update_nbytes(self.datastore, 'gmf_data/data', data)
                for sid, start, stop in result['indices']:
                    self.indices[sid, 0].append(start + self.offset)
                    self.indices[sid, 1].append(stop + self.offset)
                self.offset += len(data)
                if self.offset >= TWO32:
                    raise RuntimeError(
                        'The gmf_data table has more than %d rows' % TWO32)
        imtls = self.oqparam.imtls
        with agg_mon:
            for key, poes in result.get('hcurves', {}).items():
                r, sid, imt = str2rsi(key)
                array = acc[r].setdefault(sid, 0).array[imtls(imt), 0]
                array[:] = 1. - (1. - array) * (1. - poes)
        sav_mon.flush()
        agg_mon.flush()
        self.datastore.flush()
        return acc

    def save_ruptures(self, ruptures):
        """
        Extend the 'events' dataset with the events from the given ruptures;
        also, save the ruptures if the flag `save_ruptures` is on.

        :param ruptures: a list of EBRuptures
        """
        if len(ruptures):
            events = get_events(ruptures)
            dset = self.datastore.extend('events', events)
            if self.oqparam.save_ruptures:
                self.rupser.save(ruptures, eidx=len(dset)-len(events))

    def check_overflow(self):
        """
        Raise a ValueError if the number of sites is larger than 65,536 or the
        number of IMTs is larger than 256 or the number of ruptures is larger
        than 4,294,967,296. The limits are due to the numpy dtype used to
        store the GMFs (gmv_dt). They could be relaxed in the future.
        """
        max_ = dict(sites=2**16, events=2**32, imts=2**8)
        try:
            events = len(self.datastore['events'])
        except KeyError:
            events = 0
        num_ = dict(sites=len(self.sitecol), events=events,
                    imts=len(self.oqparam.imtls))
        for var in max_:
            if num_[var] > max_[var]:
                raise ValueError(
                    'The event based calculator is restricted to '
                    '%d %s, got %d' % (max_[var], var, num_[var]))

    def execute(self):
        oq = self.oqparam
        self.gmdata = {}
        self.offset = 0
        self.indices = collections.defaultdict(list)  # sid, idx -> indices
        param = dict(
            oqparam=oq, min_iml=self.get_min_iml(oq),
            save_ruptures=oq.save_ruptures,
            gmf=oq.ground_motion_fields,
            truncation_level=oq.truncation_level,
            imtls=oq.imtls, filter_distance=oq.filter_distance,
            ses_per_logic_tree_path=oq.ses_per_logic_tree_path)
        if oq.hazard_calculation_id:  # from ruptures
            assert oq.ground_motion_fields, 'must be True!'
            self.datastore.parent = datastore.read(oq.hazard_calculation_id)
            iterargs = self.from_ruptures(param, self.monitor())
        else:  # from sources
            iterargs = self.from_sources(param, self.monitor())
            if oq.ground_motion_fields is False:
                for args in iterargs:  # store the ruptures/events
                    pass
                return {}
        acc = parallel.Starmap(
            self.core_task.__func__, iterargs, self.monitor()
        ).reduce(self.agg_dicts, self.zerodict())
        self.check_overflow()  # check the number of events
        base.save_gmdata(self, self.R)
        if self.indices:
            N = len(self.sitecol.complete)
            logging.info('Saving gmf_data/indices')
            with self.monitor('saving gmf_data/indices', measuremem=True,
                              autoflush=True):
                dset = self.datastore.create_dset(
                    'gmf_data/indices', hdf5.vuint32,
                    shape=(N, 2), fillvalue=None)
                for sid in self.sitecol.complete.sids:
                    dset[sid, 0] = self.indices[sid, 0]
                    dset[sid, 1] = self.indices[sid, 1]
        elif (oq.ground_motion_fields and
              'ucerf' not in oq.calculation_mode):
            raise RuntimeError('No GMFs were generated, perhaps they were '
                               'all below the minimum_intensity threshold')
        return acc

    def save_gmf_bytes(self):
        """Save the attribute nbytes in the gmf_data datasets"""
        ds = self.datastore
        for sm_id in ds['gmf_data']:
            ds.set_nbytes('gmf_data/' + sm_id)
        ds.set_nbytes('gmf_data')

    def setting_events(self):
        """
        Call set_random_years on the events dataset
        """
        if self.oqparam.hazard_calculation_id is None:
            num_events = sum(set_counts(self.datastore, 'events').values())
            if num_events == 0:
                raise RuntimeError(
                    'No seismic events! Perhaps the investigation time is too '
                    'small or the maximum_distance is too small')
            if self.oqparam.save_ruptures:
                logging.info('Setting %d event years on %d ruptures',
                             num_events, self.rupser.nruptures)
            with self.monitor('setting event years', measuremem=True,
                              autoflush=True):
                set_random_years(self.datastore, 'events',
                                 self.oqparam.ses_seed,
                                 int(self.oqparam.investigation_time))

    def post_execute(self, result):
        """
        Save the SES collection
        """
        oq = self.oqparam
        if 'ucerf' in oq.calculation_mode:
            self.rupser.close()
            self.csm.info.update_eff_ruptures(self.csm.get_num_ruptures())
            self.setting_events()
        N = len(self.sitecol.complete)
        L = len(oq.imtls.array)
        if result and oq.hazard_curves_from_gmfs:
            rlzs = self.csm_info.get_rlzs_assoc().realizations
            # compute and save statistics; this is done in process and can
            # be very slow if there are thousands of realizations
            weights = [rlz.weight for rlz in rlzs]
            # NB: in the future we may want to save to individual hazard
            # curves if oq.individual_curves is set; for the moment we
            # save the statistical curves only
            hstats = oq.hazard_stats()
            if len(hstats):
                logging.info('Computing statistical hazard curves')
                for statname, stat in hstats:
                    pmap = compute_pmap_stats(result.values(), [stat], weights)
                    arr = numpy.zeros((N, L), F32)
                    for sid in pmap:
                        arr[sid] = pmap[sid].array[:, 0]
                    self.datastore['hcurves/' + statname] = arr
                    if oq.poes:
                        P = len(oq.poes)
                        I = len(oq.imtls)
                        self.datastore.create_dset(
                            'hmaps/' + statname, F32, (N, P * I))
                        self.datastore.set_attrs(
                            'hmaps/' + statname, nbytes=N * P * I * 4)
                        hmap = calc.make_hmap(pmap, oq.imtls, oq.poes)
                        ds = self.datastore['hmaps/' + statname]
                        for sid in hmap:
                            ds[sid] = hmap[sid].array[:, 0]

        if self.datastore.parent:
            self.datastore.parent.open('r')
        if 'gmf_data' in self.datastore:
            self.save_gmf_bytes()
        if oq.compare_with_classical:  # compute classical curves
            export_dir = os.path.join(oq.export_dir, 'cl')
            if not os.path.exists(export_dir):
                os.makedirs(export_dir)
            oq.export_dir = export_dir
            # one could also set oq.number_of_logic_tree_samples = 0
            self.cl = ClassicalCalculator(oq)
            # TODO: perhaps it is possible to avoid reprocessing the source
            # model, however usually this is quite fast and do not dominate
            # the computation
            self.cl.run(close=False)
            cl_mean_curves = get_mean_curves(self.cl.datastore)
            eb_mean_curves = get_mean_curves(self.datastore)
            rdiff, index = util.max_rel_diff_index(
                cl_mean_curves, eb_mean_curves)
            logging.warn('Relative difference with the classical '
                         'mean curves: %d%% at site index %d',
                         rdiff * 100, index)
Ejemplo n.º 3
0
class EventBasedCalculator(base.HazardCalculator):
    """
    Event based PSHA calculator generating the ground motion fields and
    the hazard curves from the ruptures, depending on the configuration
    parameters.
    """
    core_task = compute_gmfs
    is_stochastic = True
    accept_precalc = ['event_based', 'ebrisk', 'event_based_risk']

    def init(self):
        if hasattr(self, 'csm'):
            self.check_floating_spinning()
        if not self.datastore.parent:
            self.rupser = calc.RuptureSerializer(self.datastore)
        self.srcfilter = self.src_filter(self.datastore.tempname)

    def init_logic_tree(self, full_lt):
        self.trt_by_grp = full_lt.trt_by_grp
        self.rlzs_by_gsim_grp = full_lt.get_rlzs_by_gsim_grp()
        self.samples_by_grp = full_lt.get_samples_by_grp()
        self.num_rlzs_by_grp = {
            grp_id:
            sum(len(rlzs) for rlzs in self.rlzs_by_gsim_grp[grp_id].values())
            for grp_id in self.rlzs_by_gsim_grp
        }

    def acc0(self):
        """
        Initial accumulator, a dictionary (grp_id, gsim) -> curves
        """
        self.L = len(self.oqparam.imtls.array)
        zd = {r: ProbabilityMap(self.L) for r in range(self.R)}
        return zd

    def build_events_from_sources(self):
        """
        Prefilter the composite source model and store the source_info
        """
        gsims_by_trt = self.csm.full_lt.get_gsims_by_trt()
        logging.info('Building ruptures')
        maxweight = sum(sg.weight for sg in self.csm.src_groups) / (
            self.oqparam.concurrent_tasks or 1)
        eff_ruptures = AccumDict(accum=0)  # trt => potential ruptures
        calc_times = AccumDict(accum=numpy.zeros(3, F32))  # nr, ns, dt
        allargs = []
        if self.oqparam.is_ucerf():
            # manage the filtering in a special way
            for sg in self.csm.src_groups:
                for src in sg:
                    src.src_filter = self.srcfilter
            srcfilter = nofilter  # otherwise it would be ultra-slow
        else:
            srcfilter = self.srcfilter
        for sg in self.csm.src_groups:
            if not sg.sources:
                continue
            logging.info('Sending %s', sg)
            par = self.param.copy()
            par['gsims'] = gsims_by_trt[sg.trt]
            for src_group in sg.split(maxweight):
                allargs.append((src_group, srcfilter, par))
        smap = parallel.Starmap(sample_ruptures,
                                allargs,
                                h5=self.datastore.hdf5)
        mon = self.monitor('saving ruptures')
        for dic in smap:
            if dic['calc_times']:
                calc_times += dic['calc_times']
            if dic['eff_ruptures']:
                eff_ruptures += dic['eff_ruptures']
            if dic['rup_array']:
                with mon:
                    self.rupser.save(dic['rup_array'])
        self.rupser.close()
        if not self.rupser.nruptures:
            raise RuntimeError('No ruptures were generated, perhaps the '
                               'investigation time is too short')

        # logic tree reduction, must be called before storing the events
        self.store_rlz_info(eff_ruptures)
        self.init_logic_tree(self.csm.full_lt)
        with self.monitor('store source_info'):
            self.store_source_info(calc_times)
        logging.info('Reordering the ruptures and storing the events')
        sorted_ruptures = self.datastore.getitem('ruptures')[()]
        # order the ruptures by rup_id
        sorted_ruptures.sort(order='serial')
        nr = len(sorted_ruptures)
        assert len(numpy.unique(sorted_ruptures['serial'])) == nr  # sanity
        self.datastore['ruptures'] = sorted_ruptures
        self.datastore['ruptures']['id'] = numpy.arange(nr)
        with self.monitor('saving events'):
            self.save_events(sorted_ruptures)

    def agg_dicts(self, acc, result):
        """
        :param acc: accumulator dictionary
        :param result: an AccumDict with events, ruptures, gmfs and hcurves
        """
        sav_mon = self.monitor('saving gmfs')
        agg_mon = self.monitor('aggregating hcurves')
        with sav_mon:
            data = result.pop('gmfdata')
            if len(data):
                times = result.pop('times')
                rupids = list(times['rup_id'])
                self.datastore['gmf_data/time_by_rup'][rupids] = times
                hdf5.extend(self.datastore['gmf_data/data'], data)
                sig_eps = result.pop('sig_eps')
                hdf5.extend(self.datastore['gmf_data/sigma_epsilon'], sig_eps)
                for sid, start, stop in result['indices']:
                    self.indices[sid, 0].append(start + self.offset)
                    self.indices[sid, 1].append(stop + self.offset)
                self.offset += len(data)
        if self.offset >= TWO32:
            raise RuntimeError('The gmf_data table has more than %d rows' %
                               TWO32)
        imtls = self.oqparam.imtls
        with agg_mon:
            for key, poes in result.get('hcurves', {}).items():
                r, sid, imt = str2rsi(key)
                array = acc[r].setdefault(sid, 0).array[imtls(imt), 0]
                array[:] = 1. - (1. - array) * (1. - poes)
        self.datastore.flush()
        return acc

    def save_events(self, rup_array):
        """
        :param rup_array: an array of ruptures with fields grp_id
        :returns: a list of RuptureGetters
        """
        # this is very fast compared to saving the ruptures
        eids = rupture.get_eids(rup_array, self.samples_by_grp,
                                self.num_rlzs_by_grp)
        self.check_overflow()  # check the number of events
        events = numpy.zeros(len(eids), rupture.events_dt)
        # when computing the events all ruptures must be considered,
        # including the ones far away that will be discarded later on
        rgetters = gen_rgetters(self.datastore)
        # build the associations eid -> rlz sequentially or in parallel
        # this is very fast: I saw 30 million events associated in 1 minute!
        logging.info('Building assocs event_id -> rlz_id for {:_d} events'
                     ' and {:_d} ruptures'.format(len(events), len(rup_array)))
        if len(events) < 1E5:
            it = map(RuptureGetter.get_eid_rlz, rgetters)
        else:
            it = parallel.Starmap(RuptureGetter.get_eid_rlz,
                                  ((rgetter, ) for rgetter in rgetters),
                                  progress=logging.debug,
                                  h5=self.datastore.hdf5)
        i = 0
        for eid_rlz in it:
            for er in eid_rlz:
                events[i] = er
                i += 1
                if i >= TWO32:
                    raise ValueError('There are more than %d events!' % i)
        events.sort(order='rup_id')  # fast too
        # sanity check
        n_unique_events = len(numpy.unique(events[['id', 'rup_id']]))
        assert n_unique_events == len(events), (n_unique_events, len(events))
        events['id'] = numpy.arange(len(events))
        # set event year and event ses starting from 1
        itime = int(self.oqparam.investigation_time)
        nses = self.oqparam.ses_per_logic_tree_path
        extra = numpy.zeros(len(events), [('year', U16), ('ses_id', U16)])
        numpy.random.seed(self.oqparam.ses_seed)
        extra['year'] = numpy.random.choice(itime, len(events)) + 1
        extra['ses_id'] = numpy.random.choice(nses, len(events)) + 1
        self.datastore['events'] = util.compose_arrays(events, extra)
        eindices = get_indices(events['rup_id'])
        arr = numpy.array(list(eindices.values()))[:, 0, :]
        self.datastore['ruptures']['e0'] = arr[:, 0]
        self.datastore['ruptures']['e1'] = arr[:, 1]

    def check_overflow(self):
        """
        Raise a ValueError if the number of sites is larger than 65,536 or the
        number of IMTs is larger than 256 or the number of ruptures is larger
        than 4,294,967,296. The limits are due to the numpy dtype used to
        store the GMFs (gmv_dt). There also a limit of max_potential_gmfs on
        the number of sites times the number of events, to avoid producing too
        many GMFs. In that case split the calculation or be smarter.
        """
        oq = self.oqparam
        max_ = dict(sites=TWO32, events=TWO32, imts=2**8)
        num_ = dict(events=self.E, imts=len(self.oqparam.imtls))
        n = len(getattr(self, 'sitecol', ()) or ())
        num_['sites'] = n
        if oq.calculation_mode == 'event_based' and oq.ground_motion_fields:
            if n > oq.max_sites_per_gmf:
                raise ValueError(
                    'You cannot compute the GMFs for %d > %d sites' %
                    (n, oq.max_sites_per_gmf))
            elif n * self.E > oq.max_potential_gmfs:
                raise ValueError(
                    'A GMF calculation with %d sites and %d events is '
                    'impossibly large' % (n, self.E))
        for var in num_:
            if num_[var] > max_[var]:
                raise ValueError(
                    'The %s calculator is restricted to %d %s, got %d' %
                    (oq.calculation_mode, max_[var], var, num_[var]))

    def set_param(self, **kw):
        oq = self.oqparam
        # set the minimum_intensity
        if hasattr(self, 'crmodel') and not oq.minimum_intensity:
            # infer it from the risk models if not directly set in job.ini
            oq.minimum_intensity = self.crmodel.min_iml
        min_iml = oq.min_iml
        if oq.ground_motion_fields and min_iml.sum() == 0:
            logging.warning('The GMFs are not filtered: '
                            'you may want to set a minimum_intensity')
        else:
            logging.info('minimum_intensity=%s', oq.minimum_intensity)
        self.param.update(oqparam=oq,
                          gmf=oq.ground_motion_fields,
                          truncation_level=oq.truncation_level,
                          imtls=oq.imtls,
                          filter_distance=oq.filter_distance,
                          ses_per_logic_tree_path=oq.ses_per_logic_tree_path,
                          **kw)

    def execute(self):
        oq = self.oqparam
        self.set_param()
        self.offset = 0
        self.indices = AccumDict(accum=[])  # sid, idx -> indices
        if oq.hazard_calculation_id:  # from ruptures
            self.datastore.parent = util.read(oq.hazard_calculation_id)
            self.init_logic_tree(self.datastore.parent['full_lt'])
        else:  # from sources
            self.build_events_from_sources()
            if (oq.ground_motion_fields is False
                    and oq.hazard_curves_from_gmfs is False):
                return {}
        if not oq.imtls:
            raise InvalidFile('There are no intensity measure types in %s' %
                              oq.inputs['job_ini'])
        N = len(self.sitecol.complete)
        if oq.ground_motion_fields:
            nrups = len(self.datastore['ruptures'])
            self.datastore.create_dset('gmf_data/data', oq.gmf_data_dt())
            self.datastore.create_dset('gmf_data/sigma_epsilon',
                                       sig_eps_dt(oq.imtls))
            self.datastore.create_dset('gmf_data/indices',
                                       hdf5.vuint32,
                                       shape=(N, 2),
                                       fillvalue=None)
            self.datastore.create_dset('gmf_data/events_by_sid', U32, (N, ))
            self.datastore.create_dset('gmf_data/time_by_rup',
                                       time_dt, (nrups, ),
                                       fillvalue=None)
        if oq.hazard_curves_from_gmfs:
            self.param['rlz_by_event'] = self.datastore['events']['rlz_id']

        # compute_gmfs in parallel
        self.datastore.swmr_on()
        logging.info('Reading %d ruptures', len(self.datastore['ruptures']))
        iterargs = ((rgetter, self.srcfilter, self.param)
                    for rgetter in gen_rupture_getters(
                        self.datastore, self.srcfilter, oq.concurrent_tasks))
        acc = parallel.Starmap(self.core_task.__func__,
                               iterargs,
                               h5=self.datastore.hdf5,
                               num_cores=oq.num_cores).reduce(
                                   self.agg_dicts, self.acc0())

        if self.indices:
            dset = self.datastore['gmf_data/indices']
            num_evs = self.datastore['gmf_data/events_by_sid']
            logging.info('Saving gmf_data/indices')
            with self.monitor('saving gmf_data/indices', measuremem=True):
                self.datastore['gmf_data/imts'] = ' '.join(oq.imtls)
                for sid in self.sitecol.complete.sids:
                    start = numpy.array(self.indices[sid, 0])
                    stop = numpy.array(self.indices[sid, 1])
                    dset[sid, 0] = start
                    dset[sid, 1] = stop
                    num_evs[sid] = (stop - start).sum()
            avg_events_by_sid = num_evs[()].sum() / N
            logging.info('Found ~%d GMVs per site', avg_events_by_sid)
        elif oq.ground_motion_fields:
            raise RuntimeError('No GMFs were generated, perhaps they were '
                               'all below the minimum_intensity threshold')
        return acc

    def post_execute(self, result):
        oq = self.oqparam
        if not oq.ground_motion_fields and not oq.hazard_curves_from_gmfs:
            return
        N = len(self.sitecol.complete)
        M = len(oq.imtls)
        L = len(oq.imtls.array)
        L1 = L // M
        if result and oq.hazard_curves_from_gmfs:
            rlzs = self.datastore['full_lt'].get_realizations()
            # compute and save statistics; this is done in process and can
            # be very slow if there are thousands of realizations
            weights = [rlz.weight for rlz in rlzs]
            # NB: in the future we may want to save to individual hazard
            # curves if oq.individual_curves is set; for the moment we
            # save the statistical curves only
            hstats = oq.hazard_stats()
            S = len(hstats)
            pmaps = list(result.values())
            R = len(weights)
            if len(pmaps) != R:
                # this should never happen, unless I break the
                # logic tree reduction mechanism during refactoring
                raise AssertionError('Expected %d pmaps, got %d' %
                                     (len(weights), len(pmaps)))
            if oq.individual_curves:
                logging.info('Saving individual hazard curves')
                self.datastore.create_dset('hcurves-rlzs', F32, (N, R, M, L1))
                self.datastore.set_shape_attrs('hcurves-rlzs',
                                               site_id=N,
                                               rlz_id=R,
                                               imt=list(oq.imtls),
                                               lvl=numpy.arange(L1))
                if oq.poes:
                    P = len(oq.poes)
                    M = len(oq.imtls)
                    ds = self.datastore.create_dset('hmaps-rlzs', F32,
                                                    (N, R, M, P))
                    self.datastore.set_shape_attrs('hmaps-rlzs',
                                                   site_id=N,
                                                   rlz_id=R,
                                                   imt=list(oq.imtls),
                                                   poe=oq.poes)
                for r, pmap in enumerate(pmaps):
                    arr = numpy.zeros((N, M, L1), F32)
                    for sid in pmap:
                        arr[sid] = pmap[sid].array.reshape(M, L1)
                    self.datastore['hcurves-rlzs'][:, r] = arr
                    if oq.poes:
                        hmap = calc.make_hmap(pmap, oq.imtls, oq.poes)
                        for sid in hmap:
                            ds[sid, r] = hmap[sid].array

            if S:
                logging.info('Computing statistical hazard curves')
                self.datastore.create_dset('hcurves-stats', F32, (N, S, M, L1))
                self.datastore.set_shape_attrs('hcurves-stats',
                                               site_id=N,
                                               stat=list(hstats),
                                               imt=list(oq.imtls),
                                               lvl=numpy.arange(L1))
                if oq.poes:
                    P = len(oq.poes)
                    M = len(oq.imtls)
                    ds = self.datastore.create_dset('hmaps-stats', F32,
                                                    (N, S, M, P))
                    self.datastore.set_shape_attrs('hmaps-stats',
                                                   site_id=N,
                                                   stat=list(hstats),
                                                   imt=list(oq.imtls),
                                                   poes=oq.poes)
                for s, stat in enumerate(hstats):
                    pmap = compute_pmap_stats(pmaps, [hstats[stat]], weights,
                                              oq.imtls)
                    arr = numpy.zeros((N, M, L1), F32)
                    for sid in pmap:
                        arr[sid] = pmap[sid].array.reshape(M, L1)
                    self.datastore['hcurves-stats'][:, s] = arr
                    if oq.poes:
                        hmap = calc.make_hmap(pmap, oq.imtls, oq.poes)
                        for sid in hmap:
                            ds[sid, s] = hmap[sid].array

        if self.datastore.parent:
            self.datastore.parent.open('r')
        if oq.compare_with_classical:  # compute classical curves
            export_dir = os.path.join(oq.export_dir, 'cl')
            if not os.path.exists(export_dir):
                os.makedirs(export_dir)
            oq.export_dir = export_dir
            job_id = logs.init('job')
            oq.calculation_mode = 'classical'
            self.cl = ClassicalCalculator(oq, job_id)
            # TODO: perhaps it is possible to avoid reprocessing the source
            # model, however usually this is quite fast and do not dominate
            # the computation
            self.cl.run()
            engine.expose_outputs(self.datastore)
            for imt in oq.imtls:
                cl_mean_curves = get_mean_curves(self.datastore, imt)
                eb_mean_curves = get_mean_curves(self.datastore, imt)
                self.rdiff, index = util.max_rel_diff_index(
                    cl_mean_curves, eb_mean_curves)
                logging.warning(
                    'Relative difference with the classical '
                    'mean curves: %d%% at site index %d, imt=%s',
                    self.rdiff * 100, index, imt)
Ejemplo n.º 4
0
class EventBasedCalculator(base.HazardCalculator):
    """
    Event based PSHA calculator generating the ground motion fields and
    the hazard curves from the ruptures, depending on the configuration
    parameters.
    """
    pre_calculator = 'event_based_rupture'
    core_task = compute_gmfs_and_curves
    is_stochastic = True

    def combine_pmaps_and_save_gmfs(self, acc, results):
        """
        Combine the hazard curves (if any) and save the gmfs (if any)
        sequentially; notice that the gmfs may come from
        different tasks in any order.

        :param acc: an accumulator for the hazard curves
        :param results: dictionaries rlzi, imt -> [gmf_array, curves_by_imt]
        :returns: a new accumulator
        """
        sav_mon = self.monitor('saving gmfs')
        agg_mon = self.monitor('aggregating hcurves')
        hdf5path = self.datastore.hdf5path
        for res in results:
            self.gmdata += res['gmdata']
            data = res['gmfdata']
            with sav_mon:
                hdf5.extend3(hdf5path, 'gmf_data/data', data)
                # it is important to save the number of bytes while the
                # computation is going, to see the progress
                update_nbytes(self.datastore, 'gmf_data/data', data)
                for sid, start, stop in res['indices']:
                    self.indices[sid].append(
                        (start + self.offset, stop + self.offset))
                self.offset += len(data)
            slicedic = self.oqparam.imtls.slicedic
            with agg_mon:
                for key, poes in res['hcurves'].items():
                    r, sid, imt = str2rsi(key)
                    array = acc[r].setdefault(sid, 0).array[slicedic[imt], 0]
                    array[:] = 1. - (1. - array) * (1. - poes)
            sav_mon.flush()
            agg_mon.flush()
            self.datastore.flush()
            if 'ruptures' in res:
                vars(EventBasedRuptureCalculator)['save_ruptures'](
                    self, res['ruptures'])
        return acc

    def gen_args(self):
        """
        :yields: the arguments for compute_gmfs_and_curves
        """
        oq = self.oqparam
        sitecol = self.sitecol.complete
        monitor = self.monitor(self.core_task.__name__)
        imts = list(oq.imtls)
        min_iml = self.get_min_iml(oq)
        correl_model = oq.get_correl_model()
        try:
            csm_info = self.csm.info
        except AttributeError:  # no csm
            csm_info = self.datastore['csm_info']
        samples_by_grp = csm_info.get_samples_by_grp()
        rlzs_by_gsim = {
            grp_id: self.rlzs_assoc.get_rlzs_by_gsim(grp_id)
            for grp_id in samples_by_grp
        }
        if self.precalc:
            num_ruptures = sum(len(rs) for rs in self.precalc.result.values())
            block_size = math.ceil(num_ruptures / (oq.concurrent_tasks or 1))
            for grp_id, ruptures in self.precalc.result.items():
                if not ruptures:
                    continue
                for block in block_splitter(ruptures, block_size):
                    getter = GmfGetter(rlzs_by_gsim[grp_id], block, sitecol,
                                       imts, min_iml, oq.maximum_distance,
                                       oq.truncation_level, correl_model,
                                       oq.filter_distance,
                                       samples_by_grp[grp_id])
                    yield [getter], oq, monitor
            return
        U = len(self.datastore['ruptures'])
        logging.info('Found %d ruptures', U)
        parent = self.can_read_parent() or self.datastore
        for slc in split_in_slices(U, oq.concurrent_tasks or 1):
            getters = []
            for grp_id in rlzs_by_gsim:
                ruptures = RuptureGetter(parent, slc, grp_id)
                if parent is self.datastore:  # not accessible parent
                    ruptures = list(ruptures)
                    if not ruptures:
                        continue
                getters.append(
                    GmfGetter(rlzs_by_gsim[grp_id], ruptures, sitecol, imts,
                              min_iml, oq.maximum_distance,
                              oq.truncation_level, correl_model,
                              oq.filter_distance, samples_by_grp[grp_id]))
            yield getters, oq, monitor

    def execute(self):
        """
        Run in parallel `core_task(sources, sitecol, monitor)`, by
        parallelizing on the ruptures according to their weight and
        tectonic region type.
        """
        oq = self.oqparam
        calc.check_overflow(self)

        self.csm_info = self.datastore['csm_info']
        self.sm_id = {
            tuple(sm.path): sm.ordinal
            for sm in self.csm_info.source_models
        }
        L = len(oq.imtls.array)
        R = self.datastore['csm_info'].get_num_rlzs()
        self.gmdata = {}
        self.offset = 0
        self.indices = collections.defaultdict(list)  # sid -> indices
        ires = parallel.Starmap(self.core_task.__func__,
                                self.gen_args()).submit_all()
        if self.precalc and self.precalc.result:
            # remove the ruptures in memory to save memory
            self.precalc.result.clear()
        acc = ires.reduce(self.combine_pmaps_and_save_gmfs,
                          {r: ProbabilityMap(L)
                           for r in range(R)})
        base.save_gmdata(self, R)
        if self.indices:
            logging.info('Saving gmf_data/indices')
            with self.monitor('saving gmf_data/indices',
                              measuremem=True,
                              autoflush=True):
                self.datastore.save_vlen('gmf_data/indices', [
                    numpy.array(self.indices[sid], indices_dt)
                    for sid in self.sitecol.complete.sids
                ])
        else:
            raise RuntimeError('No GMFs were generated, perhaps they were '
                               'all below the minimum_intensity threshold')
        return acc

    def save_gmf_bytes(self):
        """Save the attribute nbytes in the gmf_data datasets"""
        ds = self.datastore
        for sm_id in ds['gmf_data']:
            ds.set_nbytes('gmf_data/' + sm_id)
        ds.set_nbytes('gmf_data')

    def post_execute(self, result):
        """
        :param result:
            a dictionary (src_group_id, gsim) -> haz_curves or an empty
            dictionary if hazard_curves_from_gmfs is false
        """
        oq = self.oqparam
        if oq.hazard_curves_from_gmfs:
            rlzs = self.rlzs_assoc.realizations
            # save individual curves
            for i in sorted(result):
                key = 'hcurves/rlz-%03d' % i
                if result[i]:
                    self.datastore[key] = result[i]
                else:
                    self.datastore[key] = ProbabilityMap(oq.imtls.array.size)
                    logging.info('Zero curves for %s', key)
            # compute and save statistics; this is done in process and can
            # be very slow if there are thousands of realizations
            weights = [rlz.weight for rlz in rlzs]
            hstats = self.oqparam.hazard_stats()
            if len(hstats) and len(rlzs) > 1:
                logging.info('Computing statistical hazard curves')
                for kind, stat in hstats:
                    pmap = compute_pmap_stats(result.values(), [stat], weights)
                    self.datastore['hcurves/' + kind] = pmap
        if self.datastore.parent:
            self.datastore.parent.open()
        if 'gmf_data' in self.datastore:
            self.save_gmf_bytes()
        if oq.compare_with_classical:  # compute classical curves
            export_dir = os.path.join(oq.export_dir, 'cl')
            if not os.path.exists(export_dir):
                os.makedirs(export_dir)
            oq.export_dir = export_dir
            # one could also set oq.number_of_logic_tree_samples = 0
            self.cl = ClassicalCalculator(oq)
            # TODO: perhaps it is possible to avoid reprocessing the source
            # model, however usually this is quite fast and do not dominate
            # the computation
            self.cl.run(close=False)
            cl_mean_curves = get_mean_curves(self.cl.datastore)
            eb_mean_curves = get_mean_curves(self.datastore)
            for imt in eb_mean_curves.dtype.names:
                rdiff, index = util.max_rel_diff_index(cl_mean_curves[imt],
                                                       eb_mean_curves[imt])
                logging.warn(
                    'Relative difference with the classical '
                    'mean curves for IMT=%s: %d%% at site index %d', imt,
                    rdiff * 100, index)
Ejemplo n.º 5
0
class EventBasedCalculator(base.HazardCalculator):
    """
    Event based PSHA calculator generating the ground motion fields and
    the hazard curves from the ruptures, depending on the configuration
    parameters.
    """
    core_task = compute_gmfs
    is_stochastic = True
    accept_precalc = ['event_based', 'ebrisk', 'event_based_risk']

    def init(self):
        if hasattr(self, 'csm'):
            self.check_floating_spinning()
        if hasattr(self.oqparam, 'maximum_distance'):
            self.srcfilter = self.src_filter(self.datastore.tempname)
        else:
            self.srcfilter = nofilter
        if not self.datastore.parent:
            self.datastore.create_dset('ruptures', rupture_dt)
            self.datastore.create_dset('rupgeoms', hdf5.vfloat32)

    def acc0(self):
        """
        Initial accumulator, a dictionary (grp_id, gsim) -> curves
        """
        self.L = len(self.oqparam.imtls.array)
        zd = {r: ProbabilityMap(self.L) for r in range(self.R)}
        return zd

    def build_events_from_sources(self):
        """
        Prefilter the composite source model and store the source_info
        """
        gsims_by_trt = self.csm.full_lt.get_gsims_by_trt()
        logging.info('Building ruptures')
        maxweight = sum(sg.weight for sg in self.csm.src_groups) / (
            self.oqparam.concurrent_tasks or 1)
        eff_ruptures = AccumDict(accum=0)  # trt => potential ruptures
        calc_times = AccumDict(accum=numpy.zeros(3, F32))  # nr, ns, dt
        allargs = []
        if self.oqparam.is_ucerf():
            # manage the filtering in a special way
            for sg in self.csm.src_groups:
                for src in sg:
                    src.src_filter = self.srcfilter
            srcfilter = nofilter  # otherwise it would be ultra-slow
        else:
            srcfilter = self.srcfilter
        for sg in self.csm.src_groups:
            if not sg.sources:
                continue
            logging.info('Sending %s', sg)
            par = self.param.copy()
            par['gsims'] = gsims_by_trt[sg.trt]
            for src_group in sg.split(maxweight):
                allargs.append((src_group, srcfilter, par))
        smap = parallel.Starmap(
            sample_ruptures, allargs, h5=self.datastore.hdf5)
        mon = self.monitor('saving ruptures')
        self.nruptures = 0
        for dic in smap:
            rup_array = dic['rup_array']
            if len(rup_array) == 0:
                continue
            if dic['calc_times']:
                calc_times += dic['calc_times']
            if dic['eff_ruptures']:
                eff_ruptures += dic['eff_ruptures']
            with mon:
                n = len(rup_array)
                rup_array['id'] = numpy.arange(
                    self.nruptures, self.nruptures + n)
                self.nruptures += n
                hdf5.extend(self.datastore['ruptures'], rup_array)
                hdf5.extend(self.datastore['rupgeoms'], rup_array.geom)
        if len(self.datastore['ruptures']) == 0:
            if os.environ.get('OQ_SAMPLE_SOURCES'):
                raise SystemExit(0)  # success even with no ruptures
            raise RuntimeError('No ruptures were generated, perhaps the '
                               'investigation time is too short')

        # must be called before storing the events
        self.store_rlz_info(eff_ruptures)  # store full_lt
        with self.monitor('store source_info'):
            self.store_source_info(calc_times)
        imp = calc.RuptureImporter(self.datastore)
        with self.monitor('saving ruptures and events'):
            imp.import_rups(self.datastore.getitem('ruptures')[()])

    def agg_dicts(self, acc, result):
        """
        :param acc: accumulator dictionary
        :param result: an AccumDict with events, ruptures, gmfs and hcurves
        """
        sav_mon = self.monitor('saving gmfs')
        agg_mon = self.monitor('aggregating hcurves')
        with sav_mon:
            data = result.pop('gmfdata')
            if len(data):
                times = result.pop('times')
                rupids = list(times['rup_id'])
                self.datastore['gmf_data/time_by_rup'][rupids] = times
                hdf5.extend(self.datastore['gmf_data/data'], data)
                sig_eps = result.pop('sig_eps')
                hdf5.extend(self.datastore['gmf_data/sigma_epsilon'], sig_eps)
                for sid, start, stop in result['indices']:
                    self.indices[sid, 0].append(start + self.offset)
                    self.indices[sid, 1].append(stop + self.offset)
                self.offset += len(data)
        if self.offset >= TWO32:
            raise RuntimeError(
                'The gmf_data table has more than %d rows' % TWO32)
        imtls = self.oqparam.imtls
        with agg_mon:
            for key, poes in result.get('hcurves', {}).items():
                r, sid, imt = str2rsi(key)
                array = acc[r].setdefault(sid, 0).array[imtls(imt), 0]
                array[:] = 1. - (1. - array) * (1. - poes)
        self.datastore.flush()
        return acc

    def set_param(self, **kw):
        oq = self.oqparam
        # set the minimum_intensity
        if hasattr(self, 'crmodel') and not oq.minimum_intensity:
            # infer it from the risk models if not directly set in job.ini
            oq.minimum_intensity = self.crmodel.min_iml
        min_iml = oq.min_iml
        if oq.ground_motion_fields and min_iml.sum() == 0:
            logging.warning('The GMFs are not filtered: '
                            'you may want to set a minimum_intensity')
        else:
            logging.info('minimum_intensity=%s', oq.minimum_intensity)
        self.param.update(
            oqparam=oq,
            gmf=oq.ground_motion_fields,
            truncation_level=oq.truncation_level,
            imtls=oq.imtls, filter_distance=oq.filter_distance,
            ses_per_logic_tree_path=oq.ses_per_logic_tree_path, **kw)

    def _read_scenario_ruptures(self):
        oq = self.oqparam
        if oq.inputs['rupture_model'].endswith(('.xml', '.toml', '.txt')):
            self.gsims = readinput.get_gsims(oq)
            self.cmaker = ContextMaker(
                '*', self.gsims,
                {'maximum_distance': oq.maximum_distance,
                 'filter_distance': oq.filter_distance})
            n_occ = numpy.array([oq.number_of_ground_motion_fields])
            rup = readinput.get_rupture(oq)
            ebr = EBRupture(rup, 0, 0, n_occ)
            ebr.e0 = 0
            rup_array = get_rup_array([ebr], self.srcfilter).array
            mesh = surface_to_array(rup.surface).transpose(1, 2, 0).flatten()
            hdf5.extend(self.datastore['rupgeoms'],
                        numpy.array([mesh], object))
        elif oq.inputs['rupture_model'].endswith('.csv'):
            aw = readinput.get_ruptures(oq.inputs['rupture_model'])
            rup_array = aw.array
            hdf5.extend(self.datastore['rupgeoms'], aw.geom)

        if len(rup_array) == 0:
            raise RuntimeError(
                'There are no sites within the maximum_distance'
                ' of %s km from the rupture' % oq.maximum_distance(
                    rup.tectonic_region_type, rup.mag))

        gsim_lt = readinput.get_gsim_lt(self.oqparam)
        # check the number of branchsets
        branchsets = len(gsim_lt._ltnode)
        if len(rup_array) == 1 and branchsets > 1:
            raise InvalidFile(
                '%s for a scenario calculation must contain a single '
                'branchset, found %d!' % (oq.inputs['job_ini'], branchsets))

        fake = logictree.FullLogicTree.fake(gsim_lt)
        self.realizations = fake.get_realizations()
        self.datastore['full_lt'] = fake
        self.store_rlz_info({})  # store weights
        self.save_params()
        calc.RuptureImporter(self.datastore).import_rups(rup_array)

    def execute(self):
        oq = self.oqparam
        self.set_param()
        self.offset = 0
        self.indices = AccumDict(accum=[])  # sid, idx -> indices
        if oq.hazard_calculation_id:  # from ruptures
            self.datastore.parent = util.read(oq.hazard_calculation_id)
        elif hasattr(self, 'csm'):  # from sources
            self.build_events_from_sources()
            if (oq.ground_motion_fields is False and
                    oq.hazard_curves_from_gmfs is False):
                return {}
        elif 'rupture_model' not in oq.inputs:
            logging.warning(
                'There is no rupture_model, the calculator will just '
                'import data without performing any calculation')
            return {}
        else:  # scenario
            self._read_scenario_ruptures()
            if (oq.ground_motion_fields is False and
                    oq.hazard_curves_from_gmfs is False):
                return {}
        if not oq.imtls:
            raise InvalidFile('There are no intensity measure types in %s' %
                              oq.inputs['job_ini'])
        N = len(self.sitecol.complete)
        if oq.ground_motion_fields:
            nrups = len(self.datastore['ruptures'])
            self.datastore.create_dset('gmf_data/data', oq.gmf_data_dt())
            self.datastore.create_dset('gmf_data/sigma_epsilon',
                                       sig_eps_dt(oq.imtls))
            self.datastore.create_dset(
                'gmf_data/indices', hdf5.vuint32, shape=(N, 2), fillvalue=None)
            self.datastore.create_dset('gmf_data/events_by_sid', U32, (N,))
            self.datastore.create_dset('gmf_data/time_by_rup',
                                       time_dt, (nrups,), fillvalue=None)
        if oq.hazard_curves_from_gmfs:
            self.param['rlz_by_event'] = self.datastore['events']['rlz_id']

        # compute_gmfs in parallel
        nr = len(self.datastore['ruptures'])
        self.datastore.swmr_on()
        logging.info('Reading %d ruptures', nr)
        iterargs = ((rgetter, self.srcfilter, self.param)
                    for rgetter in gen_rupture_getters(
                            self.datastore, self.srcfilter,
                            oq.concurrent_tasks))
        acc = parallel.Starmap(
            self.core_task.__func__, iterargs, h5=self.datastore.hdf5,
            num_cores=oq.num_cores
        ).reduce(self.agg_dicts, self.acc0())

        if self.indices:
            dset = self.datastore['gmf_data/indices']
            num_evs = self.datastore['gmf_data/events_by_sid']
            logging.info('Saving gmf_data/indices')
            with self.monitor('saving gmf_data/indices', measuremem=True):
                self.datastore['gmf_data/imts'] = ' '.join(oq.imtls)
                for sid in self.sitecol.complete.sids:
                    start = numpy.array(self.indices[sid, 0])
                    stop = numpy.array(self.indices[sid, 1])
                    dset[sid, 0] = start
                    dset[sid, 1] = stop
                    num_evs[sid] = (stop - start).sum()
            avg_events_by_sid = num_evs[()].sum() / N
            logging.info('Found ~%d GMVs per site', avg_events_by_sid)
        elif oq.ground_motion_fields:
            raise RuntimeError('No GMFs were generated, perhaps they were '
                               'all below the minimum_intensity threshold')
        return acc

    def post_execute(self, result):
        oq = self.oqparam
        if not oq.ground_motion_fields and not oq.hazard_curves_from_gmfs:
            return
        N = len(self.sitecol.complete)
        M = len(oq.imtls)  # 0 in scenario
        L = len(oq.imtls.array)
        L1 = L // (M or 1)
        if result and oq.hazard_curves_from_gmfs:
            rlzs = self.datastore['full_lt'].get_realizations()
            # compute and save statistics; this is done in process and can
            # be very slow if there are thousands of realizations
            weights = [rlz.weight for rlz in rlzs]
            # NB: in the future we may want to save to individual hazard
            # curves if oq.individual_curves is set; for the moment we
            # save the statistical curves only
            hstats = oq.hazard_stats()
            S = len(hstats)
            pmaps = list(result.values())
            R = len(weights)
            if len(pmaps) != R:
                # this should never happen, unless I break the
                # logic tree reduction mechanism during refactoring
                raise AssertionError('Expected %d pmaps, got %d' %
                                     (len(weights), len(pmaps)))
            if oq.individual_curves:
                logging.info('Saving individual hazard curves')
                self.datastore.create_dset('hcurves-rlzs', F32, (N, R, M, L1))
                self.datastore.set_shape_attrs(
                    'hcurves-rlzs', site_id=N, rlz_id=R,
                    imt=list(oq.imtls), lvl=numpy.arange(L1))
                if oq.poes:
                    P = len(oq.poes)
                    M = len(oq.imtls)
                    ds = self.datastore.create_dset(
                        'hmaps-rlzs', F32, (N, R, M, P))
                    self.datastore.set_shape_attrs(
                        'hmaps-rlzs', site_id=N, rlz_id=R,
                        imt=list(oq.imtls), poe=oq.poes)
                for r, pmap in enumerate(pmaps):
                    arr = numpy.zeros((N, M, L1), F32)
                    for sid in pmap:
                        arr[sid] = pmap[sid].array.reshape(M, L1)
                    self.datastore['hcurves-rlzs'][:, r] = arr
                    if oq.poes:
                        hmap = calc.make_hmap(pmap, oq.imtls, oq.poes)
                        for sid in hmap:
                            ds[sid, r] = hmap[sid].array

            if S:
                logging.info('Computing statistical hazard curves')
                self.datastore.create_dset('hcurves-stats', F32, (N, S, M, L1))
                self.datastore.set_shape_attrs(
                    'hcurves-stats', site_id=N, stat=list(hstats),
                    imt=list(oq.imtls), lvl=numpy.arange(L1))
                if oq.poes:
                    P = len(oq.poes)
                    M = len(oq.imtls)
                    ds = self.datastore.create_dset(
                        'hmaps-stats', F32, (N, S, M, P))
                    self.datastore.set_shape_attrs(
                        'hmaps-stats', site_id=N, stat=list(hstats),
                        imt=list(oq.imtls), poes=oq.poes)
                for s, stat in enumerate(hstats):
                    pmap = compute_pmap_stats(
                        pmaps, [hstats[stat]], weights, oq.imtls)
                    arr = numpy.zeros((N, M, L1), F32)
                    for sid in pmap:
                        arr[sid] = pmap[sid].array.reshape(M, L1)
                    self.datastore['hcurves-stats'][:, s] = arr
                    if oq.poes:
                        hmap = calc.make_hmap(pmap, oq.imtls, oq.poes)
                        for sid in hmap:
                            ds[sid, s] = hmap[sid].array

        if self.datastore.parent:
            self.datastore.parent.open('r')
        if oq.compare_with_classical:  # compute classical curves
            export_dir = os.path.join(oq.export_dir, 'cl')
            if not os.path.exists(export_dir):
                os.makedirs(export_dir)
            oq.export_dir = export_dir
            job_id = logs.init('job')
            oq.calculation_mode = 'classical'
            self.cl = ClassicalCalculator(oq, job_id)
            # TODO: perhaps it is possible to avoid reprocessing the source
            # model, however usually this is quite fast and do not dominate
            # the computation
            self.cl.run()
            engine.expose_outputs(self.datastore)
            for imt in oq.imtls:
                cl_mean_curves = get_mean_curves(self.datastore, imt)
                eb_mean_curves = get_mean_curves(self.datastore, imt)
                self.rdiff, index = util.max_rel_diff_index(
                    cl_mean_curves, eb_mean_curves)
                logging.warning('Relative difference with the classical '
                                'mean curves: %d%% at site index %d, imt=%s',
                                self.rdiff * 100, index, imt)
Ejemplo n.º 6
0
class EventBasedCalculator(base.HazardCalculator):
    """
    Event based PSHA calculator generating the ground motion fields and
    the hazard curves from the ruptures, depending on the configuration
    parameters.
    """
    core_task = compute_gmfs
    is_stochastic = True
    accept_precalc = ['event_based', 'ebrisk', 'event_based_risk']

    def init(self):
        if hasattr(self, 'csm'):
            self.check_floating_spinning()
        if hasattr(self.oqparam, 'maximum_distance'):
            self.srcfilter = self.src_filter()
        else:
            self.srcfilter = nofilter
        if not self.datastore.parent:
            self.datastore.create_dset('ruptures', rupture_dt)
            self.datastore.create_dset('rupgeoms', hdf5.vfloat32)

    def acc0(self):
        """
        Initial accumulator, a dictionary rlz -> ProbabilityMap
        """
        self.L = self.oqparam.imtls.size
        return {r: ProbabilityMap(self.L) for r in range(self.R)}

    def build_events_from_sources(self):
        """
        Prefilter the composite source model and store the source_info
        """
        gsims_by_trt = self.csm.full_lt.get_gsims_by_trt()
        sources = self.csm.get_sources()
        # weighting the heavy sources
        nrups = parallel.Starmap(
            count_ruptures, [(src,) for src in sources if src.code in b'AMC'],
            progress=logging.debug
        ).reduce()
        for src in sources:
            src.nsites = 1  # avoid 0 weight
            try:
                src.num_ruptures = nrups[src.source_id]
            except KeyError:
                src.num_ruptures = src.count_ruptures()
        maxweight = sum(sg.weight for sg in self.csm.src_groups) / (
            self.oqparam.concurrent_tasks or 1)
        eff_ruptures = AccumDict(accum=0)  # trt => potential ruptures
        calc_times = AccumDict(accum=numpy.zeros(3, F32))  # nr, ns, dt
        allargs = []
        if self.oqparam.is_ucerf():
            # manage the filtering in a special way
            for sg in self.csm.src_groups:
                for src in sg:
                    src.src_filter = self.srcfilter
            srcfilter = nofilter  # otherwise it would be ultra-slow
        else:
            srcfilter = self.srcfilter
        logging.info('Building ruptures')
        for sg in self.csm.src_groups:
            if not sg.sources:
                continue
            logging.info('Sending %s', sg)
            par = self.param.copy()
            par['gsims'] = gsims_by_trt[sg.trt]
            for src_group in sg.split(maxweight):
                allargs.append((src_group, srcfilter, par))
        smap = parallel.Starmap(
            sample_ruptures, allargs, h5=self.datastore.hdf5)
        mon = self.monitor('saving ruptures')
        self.nruptures = 0
        for dic in smap:
            # NB: dic should be a dictionary, but when the calculation dies
            # for an OOM it can become None, thus giving a very confusing error
            if dic is None:
                raise MemoryError('You ran out of memory!')
            rup_array = dic['rup_array']
            if len(rup_array) == 0:
                continue
            if dic['calc_times']:
                calc_times += dic['calc_times']
            if dic['eff_ruptures']:
                eff_ruptures += dic['eff_ruptures']
            with mon:
                n = len(rup_array)
                rup_array['id'] = numpy.arange(
                    self.nruptures, self.nruptures + n)
                self.nruptures += n
                hdf5.extend(self.datastore['ruptures'], rup_array)
                hdf5.extend(self.datastore['rupgeoms'], rup_array.geom)
        if len(self.datastore['ruptures']) == 0:
            raise RuntimeError('No ruptures were generated, perhaps the '
                               'investigation time is too short')

        # must be called before storing the events
        self.store_rlz_info(eff_ruptures)  # store full_lt
        self.store_source_info(calc_times)
        imp = calc.RuptureImporter(self.datastore)
        with self.monitor('saving ruptures and events'):
            imp.import_rups_events(
                self.datastore.getitem('ruptures')[()], get_rupture_getters)

    def agg_dicts(self, acc, result):
        """
        :param acc: accumulator dictionary
        :param result: an AccumDict with events, ruptures, gmfs and hcurves
        """
        sav_mon = self.monitor('saving gmfs')
        agg_mon = self.monitor('aggregating hcurves')
        primary = self.oqparam.get_primary_imtls()
        sec_imts = self.oqparam.get_sec_imts()
        with sav_mon:
            df = result.pop('gmfdata')
            if len(df):
                dset = self.datastore['gmf_data/sid']
                times = result.pop('times')
                [task_no] = numpy.unique(times['task_no'])
                rupids = list(times['rup_id'])
                self.datastore['gmf_data/time_by_rup'][rupids] = times
                hdf5.extend(dset, df.sid.to_numpy())
                hdf5.extend(self.datastore['gmf_data/eid'], df.eid.to_numpy())
                for m in range(len(primary)):
                    hdf5.extend(self.datastore[f'gmf_data/gmv_{m}'],
                                df[f'gmv_{m}'])
                for sec_imt in sec_imts:
                    hdf5.extend(self.datastore[f'gmf_data/{sec_imt}'],
                                df[sec_imt])
                sig_eps = result.pop('sig_eps')
                hdf5.extend(self.datastore['gmf_data/sigma_epsilon'], sig_eps)
                self.offset += len(df)
        if self.offset >= TWO32:
            raise RuntimeError(
                'The gmf_data table has more than %d rows' % TWO32)
        imtls = self.oqparam.imtls
        with agg_mon:
            for key, poes in result.get('hcurves', {}).items():
                r, sid, imt = str2rsi(key)
                array = acc[r].setdefault(sid, 0).array[imtls(imt), 0]
                array[:] = 1. - (1. - array) * (1. - poes)
        self.datastore.flush()
        return acc

    def set_param(self, **kw):
        oq = self.oqparam
        if oq.ground_motion_fields and oq.min_iml.sum() == 0:
            logging.warning('The GMFs are not filtered: '
                            'you may want to set a minimum_intensity')
        else:
            logging.info('minimum_intensity=%s', oq.minimum_intensity)
        self.param.update(
            oqparam=oq,
            gmf=oq.ground_motion_fields,
            truncation_level=oq.truncation_level,
            imtls=oq.imtls,
            ses_per_logic_tree_path=oq.ses_per_logic_tree_path, **kw)

    def _read_scenario_ruptures(self):
        oq = self.oqparam
        gsim_lt = readinput.get_gsim_lt(self.oqparam)
        G = gsim_lt.get_num_paths()
        if oq.calculation_mode.startswith('scenario'):
            ngmfs = oq.number_of_ground_motion_fields
        if oq.inputs['rupture_model'].endswith('.xml'):
            self.gsims = [gsim_rlz.value[0] for gsim_rlz in gsim_lt]
            self.cmaker = ContextMaker(
                '*', self.gsims, {'maximum_distance': oq.maximum_distance,
                                  'imtls': oq.imtls})
            rup = readinput.get_rupture(oq)
            if self.N > oq.max_sites_disagg:  # many sites, split rupture
                ebrs = [EBRupture(copyobj(rup, rup_id=rup.rup_id + i),
                                  0, 0, G, e0=i * G) for i in range(ngmfs)]
            else:  # keep a single rupture with a big occupation number
                ebrs = [EBRupture(rup, 0, 0, G * ngmfs, rup.rup_id)]
            aw = get_rup_array(ebrs, self.srcfilter)
            if len(aw) == 0:
                raise RuntimeError(
                    'The rupture is too far from the sites! Please check the '
                    'maximum_distance and the position of the rupture')
        elif oq.inputs['rupture_model'].endswith('.csv'):
            aw = readinput.get_ruptures(oq.inputs['rupture_model'])
            num_gsims = numpy.array(
                [len(gsim_lt.values[trt]) for trt in gsim_lt.values], U32)
            if oq.calculation_mode.startswith('scenario'):
                # rescale n_occ
                aw['n_occ'] *= ngmfs * num_gsims[aw['trt_smr']]
        rup_array = aw.array
        hdf5.extend(self.datastore['rupgeoms'], aw.geom)

        if len(rup_array) == 0:
            raise RuntimeError(
                'There are no sites within the maximum_distance'
                ' of %s km from the rupture' % oq.maximum_distance(
                    rup.tectonic_region_type, rup.mag))

        # check the number of branchsets
        branchsets = len(gsim_lt._ltnode)
        if len(rup_array) == 1 and branchsets > 1:
            raise InvalidFile(
                '%s for a scenario calculation must contain a single '
                'branchset, found %d!' % (oq.inputs['job_ini'], branchsets))

        fake = logictree.FullLogicTree.fake(gsim_lt)
        self.realizations = fake.get_realizations()
        self.datastore['full_lt'] = fake
        self.store_rlz_info({})  # store weights
        self.save_params()
        imp = calc.RuptureImporter(self.datastore)
        imp.import_rups_events(rup_array, get_rupture_getters)

    def execute(self):
        oq = self.oqparam
        dstore = self.datastore
        self.set_param()
        self.offset = 0
        if oq.hazard_calculation_id:  # from ruptures
            dstore.parent = datastore.read(oq.hazard_calculation_id)
        elif hasattr(self, 'csm'):  # from sources
            self.build_events_from_sources()
            if (oq.ground_motion_fields is False and
                    oq.hazard_curves_from_gmfs is False):
                return {}
        elif 'rupture_model' not in oq.inputs:
            logging.warning(
                'There is no rupture_model, the calculator will just '
                'import data without performing any calculation')
            fake = logictree.FullLogicTree.fake()
            dstore['full_lt'] = fake  # needed to expose the outputs
            dstore['weights'] = [1.]
            return {}
        else:  # scenario
            self._read_scenario_ruptures()
            if (oq.ground_motion_fields is False and
                    oq.hazard_curves_from_gmfs is False):
                return {}

        if oq.ground_motion_fields:
            imts = oq.get_primary_imtls()
            nrups = len(dstore['ruptures'])
            base.create_gmf_data(dstore, imts, oq.get_sec_imts())
            dstore.create_dset('gmf_data/sigma_epsilon', sig_eps_dt(oq.imtls))
            dstore.create_dset('gmf_data/time_by_rup',
                               time_dt, (nrups,), fillvalue=None)

        # compute_gmfs in parallel
        nr = len(dstore['ruptures'])
        logging.info('Reading {:_d} ruptures'.format(nr))
        rgetters = get_rupture_getters(dstore, oq.concurrent_tasks * 1.25,
                                       srcfilter=self.srcfilter)
        allargs = [(rgetter, self.param) for rgetter in rgetters]
        dstore.swmr_on()
        smap = parallel.Starmap(
            self.core_task.__func__, allargs, h5=dstore.hdf5)
        smap.monitor.save('srcfilter', self.srcfilter)
        acc = smap.reduce(self.agg_dicts, self.acc0())
        if 'gmf_data' not in dstore:
            return acc
        if oq.ground_motion_fields:
            with self.monitor('saving avg_gmf', measuremem=True):
                self.save_avg_gmf()
        return acc

    def save_avg_gmf(self):
        """
        Compute and save avg_gmf, unless there are too many GMFs
        """
        size = self.datastore.getsize('gmf_data')
        logging.info(f'Stored {humansize(size)} of GMFs')
        if size > 100 * 1024**2:
            logging.warning(
                'There are more than 100 MB of GMFs, not computing avg_gmf')
            return numpy.unique(self.datastore['gmf_data/eid'][:])

        rlzs = self.datastore['events']['rlz_id']
        self.weights = self.datastore['weights'][:][rlzs]
        gmf_df = self.datastore.read_df('gmf_data', 'sid')
        for sec_imt in self.oqparam.get_sec_imts():  # ignore secondary perils
            del gmf_df[sec_imt]
        rel_events = gmf_df.eid.unique()
        e = len(rel_events)
        if e == 0:
            raise RuntimeError(
                'No GMFs were generated, perhaps they were '
                'all below the minimum_intensity threshold')
        elif e < len(self.datastore['events']):
            self.datastore['relevant_events'] = rel_events
            logging.info('Stored {:_d} relevant event IDs'.format(e))

        # really compute and store the avg_gmf
        M = len(self.oqparam.min_iml)
        avg_gmf = numpy.zeros((2, self.N, M), F32)
        for sid, avgstd in compute_avg_gmf(
                gmf_df, self.weights, self.oqparam.min_iml).items():
            avg_gmf[:, sid] = avgstd
        self.datastore['avg_gmf'] = avg_gmf
        return rel_events

    def post_execute(self, result):
        oq = self.oqparam
        if (not result or not oq.ground_motion_fields and not
                oq.hazard_curves_from_gmfs):
            return
        N = len(self.sitecol.complete)
        M = len(oq.imtls)  # 0 in scenario
        L = oq.imtls.size
        L1 = L // (M or 1)
        # check seed dependency unless the number of GMFs is huge
        if ('gmf_data' in self.datastore and
                self.datastore.getsize('gmf_data') < 1E9):
            logging.info('Checking seed dependency')
            err = views.view('gmf_error', self.datastore)
            if err > .05:
                logging.warning('Your results are expected to have a large '
                                'dependency from ses_seed')
        if oq.hazard_curves_from_gmfs:
            rlzs = self.datastore['full_lt'].get_realizations()
            # compute and save statistics; this is done in process and can
            # be very slow if there are thousands of realizations
            weights = [rlz.weight for rlz in rlzs]
            # NB: in the future we may want to save to individual hazard
            # curves if oq.individual_curves is set; for the moment we
            # save the statistical curves only
            hstats = oq.hazard_stats()
            S = len(hstats)
            pmaps = list(result.values())
            R = len(weights)
            if len(pmaps) != R:
                # this should never happen, unless I break the
                # logic tree reduction mechanism during refactoring
                raise AssertionError('Expected %d pmaps, got %d' %
                                     (len(weights), len(pmaps)))
            if oq.individual_curves:
                logging.info('Saving individual hazard curves')
                self.datastore.create_dset('hcurves-rlzs', F32, (N, R, M, L1))
                self.datastore.set_shape_descr(
                    'hcurves-rlzs', site_id=N, rlz_id=R,
                    imt=list(oq.imtls), lvl=numpy.arange(L1))
                if oq.poes:
                    P = len(oq.poes)
                    M = len(oq.imtls)
                    ds = self.datastore.create_dset(
                        'hmaps-rlzs', F32, (N, R, M, P))
                    self.datastore.set_shape_descr(
                        'hmaps-rlzs', site_id=N, rlz_id=R,
                        imt=list(oq.imtls), poe=oq.poes)
                for r, pmap in enumerate(pmaps):
                    arr = numpy.zeros((N, M, L1), F32)
                    for sid in pmap:
                        arr[sid] = pmap[sid].array.reshape(M, L1)
                    self.datastore['hcurves-rlzs'][:, r] = arr
                    if oq.poes:
                        hmap = calc.make_hmap(pmap, oq.imtls, oq.poes)
                        for sid in hmap:
                            ds[sid, r] = hmap[sid].array

            if S:
                logging.info('Computing statistical hazard curves')
                self.datastore.create_dset('hcurves-stats', F32, (N, S, M, L1))
                self.datastore.set_shape_descr(
                    'hcurves-stats', site_id=N, stat=list(hstats),
                    imt=list(oq.imtls), lvl=numpy.arange(L1))
                if oq.poes:
                    P = len(oq.poes)
                    M = len(oq.imtls)
                    ds = self.datastore.create_dset(
                        'hmaps-stats', F32, (N, S, M, P))
                    self.datastore.set_shape_descr(
                        'hmaps-stats', site_id=N, stat=list(hstats),
                        imt=list(oq.imtls), poes=oq.poes)
                for s, stat in enumerate(hstats):
                    pmap = compute_pmap_stats(
                        pmaps, [hstats[stat]], weights, oq.imtls)
                    arr = numpy.zeros((N, M, L1), F32)
                    for sid in pmap:
                        arr[sid] = pmap[sid].array.reshape(M, L1)
                    self.datastore['hcurves-stats'][:, s] = arr
                    if oq.poes:
                        hmap = calc.make_hmap(pmap, oq.imtls, oq.poes)
                        for sid in hmap:
                            ds[sid, s] = hmap[sid].array
        if self.datastore.parent:
            self.datastore.parent.open('r')
        if oq.compare_with_classical:  # compute classical curves
            export_dir = os.path.join(oq.export_dir, 'cl')
            if not os.path.exists(export_dir):
                os.makedirs(export_dir)
            oq.export_dir = export_dir
            oq.calculation_mode = 'classical'
            with logs.init('job', vars(oq)) as log:
                self.cl = ClassicalCalculator(oq, log.calc_id)
                # TODO: perhaps it is possible to avoid reprocessing the source
                # model, however usually this is quite fast and do not dominate
                # the computation
                self.cl.run()
                engine.expose_outputs(self.cl.datastore)
                for imt in oq.imtls:
                    cl_mean_curves = get_mean_curves(self.datastore, imt)
                    eb_mean_curves = get_mean_curves(self.datastore, imt)
                    self.rdiff, index = util.max_rel_diff_index(
                        cl_mean_curves, eb_mean_curves)
                    logging.warning(
                        'Relative difference with the classical '
                        'mean curves: %d%% at site index %d, imt=%s',
                        self.rdiff * 100, index, imt)
Ejemplo n.º 7
0
class EventBasedCalculator(base.HazardCalculator):
    """
    Event based PSHA calculator generating the ground motion fields and
    the hazard curves from the ruptures, depending on the configuration
    parameters.
    """
    core_task = compute_gmfs
    is_stochastic = True
    accept_precalc = ['event_based', 'event_based_risk', 'ucerf_hazard']
    build_ruptures = sample_ruptures

    @cached_property
    def csm_info(self):
        """
        :returns: a cached CompositionInfo object
        """
        try:
            return self.csm.info
        except AttributeError:
            return self.datastore.parent['csm_info']

    def init(self):
        if hasattr(self, 'csm'):
            self.check_floating_spinning()
        self.rupser = calc.RuptureSerializer(self.datastore)

    def init_logic_tree(self, csm_info):
        self.trt_by_grp = csm_info.grp_by("trt")
        self.rlzs_assoc = csm_info.get_rlzs_assoc()
        self.rlzs_by_gsim_grp = csm_info.get_rlzs_by_gsim_grp()
        self.samples_by_grp = csm_info.get_samples_by_grp()
        self.num_rlzs_by_grp = {
            grp_id:
            sum(len(rlzs) for rlzs in self.rlzs_by_gsim_grp[grp_id].values())
            for grp_id in self.rlzs_by_gsim_grp
        }

    def acc0(self):
        """
        Initial accumulator, a dictionary (grp_id, gsim) -> curves
        """
        self.L = len(self.oqparam.imtls.array)
        zd = {r: ProbabilityMap(self.L) for r in range(self.R)}
        return zd

    def build_events_from_sources(self):
        """
        Prefilter the composite source model and store the source_info
        """
        oq = self.oqparam
        gsims_by_trt = self.csm.gsim_lt.values
        logging.info('Building ruptures')
        smap = parallel.Starmap(self.build_ruptures.__func__,
                                hdf5path=self.datastore.filename)
        eff_ruptures = AccumDict(accum=0)  # grp_id => potential ruptures
        calc_times = AccumDict(accum=numpy.zeros(2, F32))
        ses_idx = 0
        for sm_id, sm in enumerate(self.csm.source_models):
            logging.info('Sending %s', sm)
            for sg in sm.src_groups:
                if not sg.sources:
                    continue
                par = self.param.copy()
                par['gsims'] = gsims_by_trt[sg.trt]
                if sg.atomic:  # do not split the group
                    smap.submit(sg, self.src_filter, par)
                else:  # traditional groups
                    for block in self.block_splitter(sg.sources, key=by_grp):
                        if 'ucerf' in oq.calculation_mode:
                            for i in range(oq.ses_per_logic_tree_path):
                                par['ses_seeds'] = [(ses_idx,
                                                     oq.ses_seed + i + 1)]
                                smap.submit(block, self.src_filter, par)
                                ses_idx += 1
                        else:
                            smap.submit(block, self.src_filter, par)
        mon = self.monitor('saving ruptures')
        for dic in smap:
            if dic['calc_times']:
                calc_times += dic['calc_times']
            if dic['eff_ruptures']:
                eff_ruptures += dic['eff_ruptures']
            if dic['rup_array']:
                with mon:
                    self.rupser.save(dic['rup_array'])
        self.rupser.close()
        if not self.rupser.nruptures:
            raise RuntimeError('No ruptures were generated, perhaps the '
                               'investigation time is too short')

        # logic tree reduction, must be called before storing the events
        self.store_rlz_info(eff_ruptures)
        self.init_logic_tree(self.csm.info)
        with self.monitor('store source_info', autoflush=True):
            self.store_source_info(calc_times)
        logging.info('Reordering the ruptures and storing the events')
        attrs = self.datastore.getitem('ruptures').attrs
        sorted_ruptures = self.datastore.getitem('ruptures')[()]
        # order the ruptures by serial
        sorted_ruptures.sort(order='serial')
        ngroups = len(self.csm.info.trt_by_grp)
        grp_indices = numpy.zeros((ngroups, 2), U32)
        grp_ids = sorted_ruptures['grp_id']
        for grp_id, [startstop] in get_indices(grp_ids).items():
            grp_indices[grp_id] = startstop
        self.datastore['ruptures'] = sorted_ruptures
        self.datastore.set_attrs('ruptures', grp_indices=grp_indices, **attrs)
        with self.monitor('saving events'):
            self.save_events(sorted_ruptures)

    def gen_rupture_getters(self):
        """
        :returns: a list of RuptureGetters
        """
        dstore = (self.datastore.parent
                  if self.datastore.parent else self.datastore)
        hdf5cache = dstore.hdf5cache()
        mode = 'r+' if os.path.exists(hdf5cache) else 'w'
        with hdf5.File(hdf5cache, mode) as cache:
            if 'ruptures' not in cache:
                dstore.hdf5.copy('ruptures', cache)
            if 'rupgeoms' not in cache:
                dstore.hdf5.copy('rupgeoms', cache)
        yield from gen_rupture_getters(
            dstore,
            concurrent_tasks=self.oqparam.concurrent_tasks or 1,
            hdf5cache=hdf5cache)
        if self.datastore.parent:
            self.datastore.parent.close()

    @cached_property
    def eid2idx(self):
        """
        :returns: a dict eid -> index in the events table
        """
        return dict(zip(self.datastore['events']['id'], range(self.E)))

    def agg_dicts(self, acc, result):
        """
        :param acc: accumulator dictionary
        :param result: an AccumDict with events, ruptures, gmfs and hcurves
        """
        sav_mon = self.monitor('saving gmfs')
        agg_mon = self.monitor('aggregating hcurves')
        with sav_mon:
            data = result.pop('gmfdata')
            if len(data):
                idxs = base.get_idxs(data, self.eid2idx)  # this has to be fast
                data['eid'] = idxs  # replace eid with idx
                self.datastore.extend('gmf_data/data', data)
                sig_eps = result.pop('sig_eps')
                sig_eps['eid'] = base.get_idxs(sig_eps, self.eid2idx)
                self.datastore.extend('gmf_data/sigma_epsilon', sig_eps)
                # it is important to save the number of bytes while the
                # computation is going, to see the progress
                update_nbytes(self.datastore, 'gmf_data/data', data)
                for sid, start, stop in result['indices']:
                    self.indices[sid, 0].append(start + self.offset)
                    self.indices[sid, 1].append(stop + self.offset)
                self.offset += len(data)
                if self.offset >= TWO32:
                    raise RuntimeError(
                        'The gmf_data table has more than %d rows' % TWO32)
        imtls = self.oqparam.imtls
        with agg_mon:
            for key, poes in result.get('hcurves', {}).items():
                r, sid, imt = str2rsi(key)
                array = acc[r].setdefault(sid, 0).array[imtls(imt), 0]
                array[:] = 1. - (1. - array) * (1. - poes)
        self.datastore.flush()
        return acc

    def save_events(self, rup_array):
        """
        :param rup_array: an array of ruptures with fields grp_id
        :returns: a list of RuptureGetters
        """
        # this is very fast compared to saving the ruptures
        eids = rupture.get_eids(rup_array, self.samples_by_grp,
                                self.num_rlzs_by_grp)
        self.check_overflow()  # check the number of events
        events = numpy.zeros(len(eids), rupture.events_dt)
        # when computing the events all ruptures must be considered,
        # including the ones far away that will be discarded later on
        rgetters = self.gen_rupture_getters()

        # build the associations eid -> rlz sequentially or in parallel
        # this is very fast: I saw 30 million events associated in 1 minute!
        logging.info('Building associations event_id -> rlz_id for %d events',
                     len(events))
        if len(events) < 1E5:
            it = map(RuptureGetter.get_eid_rlz, rgetters)
        else:
            it = parallel.Starmap(RuptureGetter.get_eid_rlz,
                                  ((rgetter, ) for rgetter in rgetters),
                                  progress=logging.debug,
                                  hdf5path=self.datastore.filename)
        i = 0
        for eid_rlz in it:
            for er in eid_rlz:
                events[i] = er
                i += 1
                if i >= TWO32:
                    raise ValueError('There are more than %d events!' % i)
        events.sort(order='id')  # fast too
        n_unique_events = len(numpy.unique(events['id']))
        assert n_unique_events == len(events), (n_unique_events, len(events))
        self.datastore['events'] = events

    def check_overflow(self):
        """
        Raise a ValueError if the number of sites is larger than 65,536 or the
        number of IMTs is larger than 256 or the number of ruptures is larger
        than 4,294,967,296. The limits are due to the numpy dtype used to
        store the GMFs (gmv_dt). They could be relaxed in the future.
        """
        oq = self.oqparam
        max_ = dict(sites=TWO32, events=TWO32, imts=2**8)
        num_ = dict(events=self.E, imts=len(self.oqparam.imtls))
        if self.sitecol:
            num_['sites'] = n = len(self.sitecol)
            if (oq.calculation_mode == 'event_based'
                    and oq.ground_motion_fields and n > oq.max_sites_per_gmf):
                raise ValueError(
                    'You cannot compute the GMFs for %d > %d sites' %
                    (n, oq.max_sites_per_gmf))
        for var in num_:
            if num_[var] > max_[var]:
                raise ValueError(
                    'The %s calculator is restricted to %d %s, got %d' %
                    (oq.calculation_mode, max_[var], var, num_[var]))

    def set_param(self, **kw):
        oq = self.oqparam
        # set the minimum_intensity
        if hasattr(self, 'crmodel') and not oq.minimum_intensity:
            # infer it from the risk models if not directly set in job.ini
            oq.minimum_intensity = self.crmodel.min_iml
        min_iml = oq.min_iml
        if min_iml.sum() == 0:
            logging.warning('The GMFs are not filtered: '
                            'you may want to set a minimum_intensity')
        else:
            logging.info('minimum_intensity=%s', oq.minimum_intensity)
        self.param.update(oqparam=oq,
                          gmf=oq.ground_motion_fields,
                          truncation_level=oq.truncation_level,
                          ruptures_per_block=oq.ruptures_per_block,
                          imtls=oq.imtls,
                          filter_distance=oq.filter_distance,
                          ses_per_logic_tree_path=oq.ses_per_logic_tree_path,
                          **kw)

    def execute(self):
        oq = self.oqparam
        self.set_param()
        self.offset = 0
        self.indices = collections.defaultdict(list)  # sid, idx -> indices
        if oq.hazard_calculation_id and 'ruptures' in self.datastore:
            # from ruptures
            self.datastore.parent = util.read(oq.hazard_calculation_id)
            self.init_logic_tree(self.csm_info)
        else:
            # from sources
            self.build_events_from_sources()
            if (oq.ground_motion_fields is False
                    and oq.hazard_curves_from_gmfs is False):
                return {}
        if not oq.imtls:
            raise InvalidFile('There are no intensity measure types in %s' %
                              oq.inputs['job_ini'])
        iterargs = ((rgetter, self.src_filter, self.param)
                    for rgetter in self.gen_rupture_getters())
        # call compute_gmfs in parallel
        acc = parallel.Starmap(self.core_task.__func__,
                               iterargs,
                               hdf5path=self.datastore.filename).reduce(
                                   self.agg_dicts, self.acc0())

        if self.indices:
            N = len(self.sitecol.complete)
            logging.info('Saving gmf_data/indices')
            with self.monitor('saving gmf_data/indices',
                              measuremem=True,
                              autoflush=True):
                self.datastore['gmf_data/imts'] = ' '.join(oq.imtls)
                dset = self.datastore.create_dset('gmf_data/indices',
                                                  hdf5.vuint32,
                                                  shape=(N, 2),
                                                  fillvalue=None)
                num_evs = self.datastore.create_dset('gmf_data/events_by_sid',
                                                     U32, (N, ))
                for sid in self.sitecol.complete.sids:
                    start = numpy.array(self.indices[sid, 0])
                    stop = numpy.array(self.indices[sid, 1])
                    dset[sid, 0] = start
                    dset[sid, 1] = stop
                    num_evs[sid] = (stop - start).sum()
                num_evs = num_evs[()]
                avg_events_by_sid = num_evs.sum() / N
                logging.info('Found ~%d GMVs per site', avg_events_by_sid)
                self.datastore.set_attrs('gmf_data',
                                         avg_events_by_sid=avg_events_by_sid,
                                         max_events_by_sid=num_evs.max())
        elif oq.ground_motion_fields:
            raise RuntimeError('No GMFs were generated, perhaps they were '
                               'all below the minimum_intensity threshold')
        return acc

    def post_execute(self, result):
        oq = self.oqparam
        if not oq.ground_motion_fields and not oq.hazard_curves_from_gmfs:
            return
        N = len(self.sitecol.complete)
        L = len(oq.imtls.array)
        if result and oq.hazard_curves_from_gmfs:
            rlzs = self.rlzs_assoc.realizations
            # compute and save statistics; this is done in process and can
            # be very slow if there are thousands of realizations
            weights = [rlz.weight for rlz in rlzs]
            # NB: in the future we may want to save to individual hazard
            # curves if oq.individual_curves is set; for the moment we
            # save the statistical curves only
            hstats = oq.hazard_stats()
            S = len(hstats)
            pmaps = list(result.values())
            R = len(weights)
            if len(pmaps) != R:
                # this should never happen, unless I break the
                # logic tree reduction mechanism during refactoring
                raise AssertionError('Expected %d pmaps, got %d' %
                                     (len(weights), len(pmaps)))
            if oq.individual_curves:
                logging.info('Saving individual hazard curves')
                self.datastore.create_dset('hcurves-rlzs', F32, (N, R, L))
                self.datastore.set_attrs('hcurves-rlzs', nbytes=N * R * L * 4)
                if oq.poes:
                    P = len(oq.poes)
                    M = len(oq.imtls)
                    ds = self.datastore.create_dset('hmaps-rlzs', F32,
                                                    (N, R, M, P))
                    self.datastore.set_attrs('hmaps-rlzs',
                                             nbytes=N * R * P * M * 4)
                for r, pmap in enumerate(pmaps):
                    arr = numpy.zeros((N, L), F32)
                    for sid in pmap:
                        arr[sid] = pmap[sid].array[:, 0]
                    self.datastore['hcurves-rlzs'][:, r] = arr
                    if oq.poes:
                        hmap = calc.make_hmap(pmap, oq.imtls, oq.poes)
                        for sid in hmap:
                            ds[sid, r] = hmap[sid].array

            if S:
                logging.info('Computing statistical hazard curves')
                self.datastore.create_dset('hcurves-stats', F32, (N, S, L))
                self.datastore.set_attrs('hcurves-stats', nbytes=N * S * L * 4)
                if oq.poes:
                    P = len(oq.poes)
                    M = len(oq.imtls)
                    ds = self.datastore.create_dset('hmaps-stats', F32,
                                                    (N, S, M, P))
                    self.datastore.set_attrs('hmaps-stats',
                                             nbytes=N * S * P * M * 4)
                for s, stat in enumerate(hstats):
                    pmap = compute_pmap_stats(pmaps, [hstats[stat]], weights,
                                              oq.imtls)
                    arr = numpy.zeros((N, L), F32)
                    for sid in pmap:
                        arr[sid] = pmap[sid].array[:, 0]
                    self.datastore['hcurves-stats'][:, s] = arr
                    if oq.poes:
                        hmap = calc.make_hmap(pmap, oq.imtls, oq.poes)
                        for sid in hmap:
                            ds[sid, s] = hmap[sid].array

        if self.datastore.parent:
            self.datastore.parent.open('r')
        if oq.compare_with_classical:  # compute classical curves
            export_dir = os.path.join(oq.export_dir, 'cl')
            if not os.path.exists(export_dir):
                os.makedirs(export_dir)
            oq.export_dir = export_dir
            job_id = logs.init('job')
            oq.calculation_mode = 'classical'
            self.cl = ClassicalCalculator(oq, job_id)
            # TODO: perhaps it is possible to avoid reprocessing the source
            # model, however usually this is quite fast and do not dominate
            # the computation
            self.cl.run(close=False)
            engine.expose_outputs(self.cl.datastore)
            cl_mean_curves = get_mean_curves(self.cl.datastore)
            eb_mean_curves = get_mean_curves(self.datastore)
            self.rdiff, index = util.max_rel_diff_index(
                cl_mean_curves, eb_mean_curves)
            logging.warning(
                'Relative difference with the classical '
                'mean curves: %d%% at site index %d', self.rdiff * 100, index)
Ejemplo n.º 8
0
class EventBasedCalculator(base.HazardCalculator):
    """
    Event based PSHA calculator generating the ground motion fields and
    the hazard curves from the ruptures, depending on the configuration
    parameters.
    """
    core_task = compute_gmfs
    is_stochastic = True
    accept_precalc = ['event_based', 'event_based_risk', 'ucerf_hazard']
    build_ruptures = sample_ruptures

    @cached_property
    def csm_info(self):
        """
        :returns: a cached CompositionInfo object
        """
        try:
            return self.csm.info
        except AttributeError:
            return self.datastore.parent['csm_info']

    def init(self):
        if hasattr(self, 'csm'):
            self.check_floating_spinning()
        self.rupser = calc.RuptureSerializer(self.datastore)

    def init_logic_tree(self, csm_info):
        self.trt_by_grp = csm_info.grp_by("trt")
        self.rlzs_assoc = csm_info.get_rlzs_assoc()
        self.rlzs_by_gsim_grp = csm_info.get_rlzs_by_gsim_grp()
        self.samples_by_grp = csm_info.get_samples_by_grp()
        self.num_rlzs_by_grp = {
            grp_id:
            sum(len(rlzs) for rlzs in self.rlzs_by_gsim_grp[grp_id].values())
            for grp_id in self.rlzs_by_gsim_grp}

    def acc0(self):
        """
        Initial accumulator, a dictionary (grp_id, gsim) -> curves
        """
        self.L = len(self.oqparam.imtls.array)
        zd = {r: ProbabilityMap(self.L) for r in range(self.R)}
        return zd

    def build_events_from_sources(self):
        """
        Prefilter the composite source model and store the source_info
        """
        oq = self.oqparam
        gsims_by_trt = self.csm.gsim_lt.values

        def weight_src(src):
            return src.num_ruptures

        logging.info('Building ruptures')
        smap = parallel.Starmap(
            self.build_ruptures.__func__, monitor=self.monitor())
        eff_ruptures = AccumDict(accum=0)  # grp_id => potential ruptures
        calc_times = AccumDict(accum=numpy.zeros(2, F32))
        ses_idx = 0
        for sm_id, sm in enumerate(self.csm.source_models):
            logging.info('Sending %s', sm)
            for sg in sm.src_groups:
                if not sg.sources:
                    continue
                par = self.param.copy()
                par['gsims'] = gsims_by_trt[sg.trt]
                if sg.atomic:  # do not split the group
                    smap.submit(sg, self.src_filter, par)
                else:  # traditional groups
                    for block in self.block_splitter(
                            sg.sources, weight_src, by_grp):
                        if 'ucerf' in oq.calculation_mode:
                            for i in range(oq.ses_per_logic_tree_path):
                                par['ses_seeds'] = [
                                    (ses_idx, oq.ses_seed + i + 1)]
                                smap.submit(block, self.src_filter, par)
                                ses_idx += 1
                        else:
                            smap.submit(block, self.src_filter, par)
        mon = self.monitor('saving ruptures')
        for dic in smap:
            if dic['calc_times']:
                calc_times += dic['calc_times']
            if dic['eff_ruptures']:
                eff_ruptures += dic['eff_ruptures']
            if dic['rup_array']:
                with mon:
                    self.rupser.save(dic['rup_array'])
        self.rupser.close()
        if not self.rupser.nruptures:
            raise RuntimeError('No ruptures were generated, perhaps the '
                               'investigation time is too short')

        # logic tree reduction, must be called before storing the events
        self.store_rlz_info(eff_ruptures)
        store_rlzs_by_grp(self.datastore)
        self.init_logic_tree(self.csm.info)
        with self.monitor('store source_info', autoflush=True):
            self.store_source_info(calc_times)
        logging.info('Reordering the ruptures and storing the events')
        attrs = self.datastore.getitem('ruptures').attrs
        sorted_ruptures = self.datastore.getitem('ruptures')[()]
        # order the ruptures by serial
        sorted_ruptures.sort(order='serial')
        ngroups = len(self.csm.info.trt_by_grp)
        grp_indices = numpy.zeros((ngroups, 2), U32)
        grp_ids = sorted_ruptures['grp_id']
        for grp_id, [startstop] in get_indices(grp_ids).items():
            grp_indices[grp_id] = startstop
        self.datastore['ruptures'] = sorted_ruptures
        self.datastore.set_attrs('ruptures', grp_indices=grp_indices, **attrs)
        self.save_events(sorted_ruptures)

    def gen_rupture_getters(self):
        """
        :returns: a list of RuptureGetters
        """
        dstore = (self.datastore.parent if self.datastore.parent
                  else self.datastore)
        hdf5cache = dstore.hdf5cache()
        mode = 'r+' if os.path.exists(hdf5cache) else 'w'
        with hdf5.File(hdf5cache, mode) as cache:
            if 'ruptures' not in cache:
                dstore.hdf5.copy('ruptures', cache)
            if 'rupgeoms' not in cache:
                dstore.hdf5.copy('rupgeoms', cache)
        yield from gen_rupture_getters(
            dstore, concurrent_tasks=self.oqparam.concurrent_tasks or 1,
            hdf5cache=hdf5cache)
        if self.datastore.parent:
            self.datastore.parent.close()

    @cached_property
    def eid2idx(self):
        """
        :returns: a dict eid -> index in the events table
        """
        return dict(zip(self.datastore['events']['id'], range(self.E)))

    def agg_dicts(self, acc, result):
        """
        :param acc: accumulator dictionary
        :param result: an AccumDict with events, ruptures, gmfs and hcurves
        """
        sav_mon = self.monitor('saving gmfs')
        agg_mon = self.monitor('aggregating hcurves')
        with sav_mon:
            data = result.pop('gmfdata')
            if len(data) == 0:
                return acc
            idxs = base.get_idxs(data, self.eid2idx)  # this has to be fast
            data['eid'] = idxs  # replace eid with idx
            self.datastore.extend('gmf_data/data', data)
            sig_eps = result.pop('sig_eps')
            sig_eps['eid'] = base.get_idxs(sig_eps, self.eid2idx)
            self.datastore.extend('gmf_data/sigma_epsilon', sig_eps)
            # it is important to save the number of bytes while the
            # computation is going, to see the progress
            update_nbytes(self.datastore, 'gmf_data/data', data)
            for sid, start, stop in result['indices']:
                self.indices[sid, 0].append(start + self.offset)
                self.indices[sid, 1].append(stop + self.offset)
            self.offset += len(data)
            if self.offset >= TWO32:
                raise RuntimeError(
                    'The gmf_data table has more than %d rows' % TWO32)
        imtls = self.oqparam.imtls
        with agg_mon:
            for key, poes in result.get('hcurves', {}).items():
                r, sid, imt = str2rsi(key)
                array = acc[r].setdefault(sid, 0).array[imtls(imt), 0]
                array[:] = 1. - (1. - array) * (1. - poes)
        sav_mon.flush()
        agg_mon.flush()
        self.datastore.flush()
        return acc

    def save_events(self, rup_array):
        """
        :param rup_array: an array of ruptures with fields grp_id
        :returns: a list of RuptureGetters
        """
        # this is very fast compared to saving the ruptures
        eids = rupture.get_eids(
            rup_array, self.samples_by_grp, self.num_rlzs_by_grp)
        self.check_overflow()  # check the number of events
        events = numpy.zeros(len(eids), rupture.events_dt)
        # when computing the events all ruptures must be considered,
        # including the ones far away that will be discarded later on
        rgetters = self.gen_rupture_getters()

        # build the associations eid -> rlz in parallel
        smap = parallel.Starmap(RuptureGetter.get_eid_rlz,
                                ((rgetter,) for rgetter in rgetters),
                                self.monitor('get_eid_rlz'),
                                progress=logging.debug)
        i = 0
        for eid_rlz in smap:  # 30 million of events associated in 1 minute!
            for er in eid_rlz:
                events[i] = er
                i += 1
                if i >= TWO32:
                    raise ValueError('There are more than %d events!' % i)
        events.sort(order='id')  # fast too
        n_unique_events = len(numpy.unique(events['id']))
        assert n_unique_events == len(events), (n_unique_events, len(events))
        self.datastore['events'] = events

    def check_overflow(self):
        """
        Raise a ValueError if the number of sites is larger than 65,536 or the
        number of IMTs is larger than 256 or the number of ruptures is larger
        than 4,294,967,296. The limits are due to the numpy dtype used to
        store the GMFs (gmv_dt). They could be relaxed in the future.
        """
        max_ = dict(sites=TWO32, events=TWO32, imts=2**8)
        num_ = dict(events=self.E, imts=len(self.oqparam.imtls))
        if self.sitecol:
            num_['sites'] = len(self.sitecol)
        for var in num_:
            if num_[var] > max_[var]:
                raise ValueError(
                    'The event based calculator is restricted to '
                    '%d %s, got %d' % (max_[var], var, num_[var]))

    def set_param(self, **kw):
        oq = self.oqparam
        # set the minimum_intensity
        if hasattr(self, 'riskmodel') and not oq.minimum_intensity:
            # infer it from the risk models if not directly set in job.ini
            oq.minimum_intensity = self.riskmodel.min_iml
        min_iml = oq.min_iml
        if min_iml.sum() == 0:
            logging.warning('The GMFs are not filtered: '
                            'you may want to set a minimum_intensity')
        else:
            logging.info('minimum_intensity=%s', oq.minimum_intensity)
        self.param.update(
            oqparam=oq,
            gmf=oq.ground_motion_fields,
            truncation_level=oq.truncation_level,
            ruptures_per_block=oq.ruptures_per_block,
            imtls=oq.imtls, filter_distance=oq.filter_distance,
            ses_per_logic_tree_path=oq.ses_per_logic_tree_path, **kw)

    def execute(self):
        oq = self.oqparam
        self.set_param()
        self.offset = 0
        self.indices = collections.defaultdict(list)  # sid, idx -> indices
        if oq.hazard_calculation_id and 'ruptures' in self.datastore:
            # from ruptures
            self.datastore.parent = datastore.read(oq.hazard_calculation_id)
            self.init_logic_tree(self.csm_info)
        else:
            # from sources
            self.build_events_from_sources()
            if oq.ground_motion_fields is False:
                return {}
        if not oq.imtls:
            raise InvalidFile('There are no intensity measure types in %s' %
                              oq.inputs['job_ini'])
        iterargs = ((rgetter, self.src_filter, self.param)
                    for rgetter in self.gen_rupture_getters())
        # call compute_gmfs in parallel
        acc = parallel.Starmap(
            self.core_task.__func__, iterargs, self.monitor()
        ).reduce(self.agg_dicts, self.acc0())

        if self.indices:
            N = len(self.sitecol.complete)
            logging.info('Saving gmf_data/indices')
            with self.monitor('saving gmf_data/indices', measuremem=True,
                              autoflush=True):
                self.datastore['gmf_data/imts'] = ' '.join(oq.imtls)
                dset = self.datastore.create_dset(
                    'gmf_data/indices', hdf5.vuint32,
                    shape=(N, 2), fillvalue=None)
                num_evs = self.datastore.create_dset(
                    'gmf_data/events_by_sid', U32, (N,))
                for sid in self.sitecol.complete.sids:
                    start = numpy.array(self.indices[sid, 0])
                    stop = numpy.array(self.indices[sid, 1])
                    dset[sid, 0] = start
                    dset[sid, 1] = stop
                    num_evs[sid] = (stop - start).sum()
                num_evs = num_evs[()]
                avg_events_by_sid = num_evs.sum() / N
                logging.info('Found ~%d GMVs per site', avg_events_by_sid)
                self.datastore.set_attrs(
                    'gmf_data', avg_events_by_sid=avg_events_by_sid,
                    max_events_by_sid=num_evs.max())
        elif oq.ground_motion_fields:
            raise RuntimeError('No GMFs were generated, perhaps they were '
                               'all below the minimum_intensity threshold')
        return acc

    def post_execute(self, result):
        oq = self.oqparam
        if not oq.ground_motion_fields:
            return
        N = len(self.sitecol.complete)
        L = len(oq.imtls.array)
        if result and oq.hazard_curves_from_gmfs:
            rlzs = self.rlzs_assoc.realizations
            # compute and save statistics; this is done in process and can
            # be very slow if there are thousands of realizations
            weights = [rlz.weight for rlz in rlzs]
            # NB: in the future we may want to save to individual hazard
            # curves if oq.individual_curves is set; for the moment we
            # save the statistical curves only
            hstats = oq.hazard_stats()
            S = len(hstats)
            pmaps = list(result.values())
            R = len(weights)
            if len(pmaps) != R:
                # this should never happen, unless I break the
                # logic tree reduction mechanism during refactoring
                raise AssertionError('Expected %d pmaps, got %d' %
                                     (len(weights), len(pmaps)))
            if oq.individual_curves:
                logging.info('Saving individual hazard curves')
                self.datastore.create_dset('hcurves-rlzs', F32, (N, R, L))
                self.datastore.set_attrs('hcurves-rlzs', nbytes=N * R * L * 4)
                if oq.poes:
                    P = len(oq.poes)
                    M = len(oq.imtls)
                    ds = self.datastore.create_dset(
                        'hmaps-rlzs', F32, (N, R, M, P))
                    self.datastore.set_attrs(
                        'hmaps-rlzs', nbytes=N * R * P * M * 4)
                for r, pmap in enumerate(pmaps):
                    arr = numpy.zeros((N, L), F32)
                    for sid in pmap:
                        arr[sid] = pmap[sid].array[:, 0]
                    self.datastore['hcurves-rlzs'][:, r] = arr
                    if oq.poes:
                        hmap = calc.make_hmap(pmap, oq.imtls, oq.poes)
                        for sid in hmap:
                            ds[sid, r] = hmap[sid].array

            if S:
                logging.info('Computing statistical hazard curves')
                self.datastore.create_dset('hcurves-stats', F32, (N, S, L))
                self.datastore.set_attrs('hcurves-stats', nbytes=N * S * L * 4)
                if oq.poes:
                    P = len(oq.poes)
                    M = len(oq.imtls)
                    ds = self.datastore.create_dset(
                        'hmaps-stats', F32, (N, S, M, P))
                    self.datastore.set_attrs(
                        'hmaps-stats', nbytes=N * S * P * M * 4)
                for s, stat in enumerate(hstats):
                    pmap = compute_pmap_stats(
                        pmaps, [hstats[stat]], weights, oq.imtls)
                    arr = numpy.zeros((N, L), F32)
                    for sid in pmap:
                        arr[sid] = pmap[sid].array[:, 0]
                    self.datastore['hcurves-stats'][:, s] = arr
                    if oq.poes:
                        hmap = calc.make_hmap(pmap, oq.imtls, oq.poes)
                        for sid in hmap:
                            ds[sid, s] = hmap[sid].array

        if self.datastore.parent:
            self.datastore.parent.open('r')
        if oq.compare_with_classical:  # compute classical curves
            export_dir = os.path.join(oq.export_dir, 'cl')
            if not os.path.exists(export_dir):
                os.makedirs(export_dir)
            oq.export_dir = export_dir
            job_id = logs.init('job')
            self.cl = ClassicalCalculator(oq, job_id)
            # TODO: perhaps it is possible to avoid reprocessing the source
            # model, however usually this is quite fast and do not dominate
            # the computation
            self.cl.run(close=False)
            engine.expose_outputs(self.cl.datastore)
            cl_mean_curves = get_mean_curves(self.cl.datastore)
            eb_mean_curves = get_mean_curves(self.datastore)
            self.rdiff, index = util.max_rel_diff_index(
                cl_mean_curves, eb_mean_curves)
            logging.warning('Relative difference with the classical '
                            'mean curves: %d%% at site index %d',
                            self.rdiff * 100, index)
Ejemplo n.º 9
0
class EventBasedCalculator(ClassicalCalculator):
    """
    Event based PSHA calculator generating the ruptures only
    """
    pre_calculator = 'event_based_rupture'
    core_func = compute_gmfs_and_curves
    is_stochastic = True

    def pre_execute(self):
        """
        Read the precomputed ruptures (or compute them on the fly) and
        prepare some empty files in the export directory to store the gmfs
        (if any). If there were pre-existing files, they will be erased.
        """
        super(EventBasedCalculator, self).pre_execute()
        self.sesruptures = []
        gsims_by_col = self.rlzs_assoc.get_gsims_by_col()
        self.datasets = {}
        for col_id, sescol in enumerate(self.datastore['sescollection']):
            gmf_dt = gsim_imt_dt(gsims_by_col[col_id], self.oqparam.imtls)
            for tag, sesrup in sorted(sescol.items()):
                sesrup = sescol[tag]
                self.sesruptures.append(sesrup)
            if self.oqparam.ground_motion_fields and sescol:
                self.datasets[col_id] = self.datastore.create_dset(
                    'gmfs/col%02d' % col_id, gmf_dt)

    def combine_curves_and_save_gmfs(self, acc, res):
        """
        Combine the hazard curves (if any) and save the gmfs (if any)
        sequentially; notice that the gmfs may come from
        different tasks in any order.

        :param acc: an accumulator for the hazard curves
        :param res: a dictionary trt_id, gsim -> gmf_array or curves_by_imt
        :returns: a new accumulator
        """
        sav_mon = self.monitor('saving gmfs')
        agg_mon = self.monitor('aggregating hcurves')
        save_gmfs = self.oqparam.ground_motion_fields
        for trt_id, gsim_or_col in res:
            if isinstance(gsim_or_col, int) and save_gmfs:
                with sav_mon:
                    gmfa = res[trt_id, gsim_or_col]
                    dataset = self.datasets[gsim_or_col]
                    dataset.attrs['trt_model_id'] = trt_id
                    dataset.extend(gmfa)
                    self.nbytes += gmfa.nbytes
                    self.datastore.hdf5.flush()
            elif isinstance(gsim_or_col, str):  # aggregate hcurves
                with agg_mon:
                    curves_by_imt = res[trt_id, gsim_or_col]
                    acc = agg_dicts(
                        acc, AccumDict({(trt_id, gsim_or_col): curves_by_imt}))
        sav_mon.flush()
        agg_mon.flush()
        return acc

    def execute(self):
        """
        Run in parallel `core_func(sources, sitecol, monitor)`, by
        parallelizing on the ruptures according to their weight and
        tectonic region type.
        """
        oq = self.oqparam
        if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields:
            return
        monitor = self.monitor(self.core_func.__name__)
        monitor.oqparam = oq
        zc = zero_curves(len(self.sitecol.complete), self.oqparam.imtls)
        zerodict = AccumDict((key, zc) for key in self.rlzs_assoc)
        self.nbytes = 0
        curves_by_trt_gsim = parallel.apply_reduce(
            self.core_func.__func__,
            (self.sesruptures, self.sitecol, self.rlzs_assoc, monitor),
            concurrent_tasks=self.oqparam.concurrent_tasks,
            acc=zerodict,
            agg=self.combine_curves_and_save_gmfs,
            key=operator.attrgetter('col_id'))
        if oq.ground_motion_fields:
            # sanity check on the saved gmfs size
            expected_nbytes = self.datastore['counts_per_rlz'].attrs[
                'gmfs_nbytes']
            self.datastore['gmfs'].attrs['nbytes'] = self.nbytes
            assert self.nbytes == expected_nbytes, (self.nbytes,
                                                    expected_nbytes)
        return curves_by_trt_gsim

    def post_execute(self, result):
        """
        :param result:
            a dictionary (trt_model_id, gsim) -> haz_curves or an empty
            dictionary if hazard_curves_from_gmfs is false
        """
        oq = self.oqparam
        if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields:
            return
        if oq.hazard_curves_from_gmfs:
            ClassicalCalculator.post_execute.__func__(self, result)
        if oq.compare_with_classical:  # compute classical curves
            export_dir = os.path.join(oq.export_dir, 'cl')
            if not os.path.exists(export_dir):
                os.makedirs(export_dir)
            oq.export_dir = export_dir
            # use a different datastore
            self.cl = ClassicalCalculator(oq, self.monitor)
            self.cl.datastore.parent = self.datastore
            result = self.cl.run(pre_execute=False, clean_up=False)
            for imt in self.mean_curves.dtype.fields:
                rdiff, index = max_rel_diff_index(self.cl.mean_curves[imt],
                                                  self.mean_curves[imt])
                logging.warn(
                    'Relative difference with the classical '
                    'mean curves for IMT=%s: %d%% at site index %d', imt,
                    rdiff * 100, index)
Ejemplo n.º 10
0
class EventBasedCalculator(ClassicalCalculator):
    """
    Event based PSHA calculator generating the ground motion fields and
    the hazard curves from the ruptures, depending on the configuration
    parameters.
    """
    pre_calculator = 'event_based_rupture'
    core_task = compute_gmfs_and_curves
    is_stochastic = True

    def pre_execute(self):
        """
        Read the precomputed ruptures (or compute them on the fly) and
        prepare some empty files in the export directory to store the gmfs
        (if any). If there were pre-existing files, they will be erased.
        """
        super(EventBasedCalculator, self).pre_execute()
        rlzs_by_tr_id = self.rlzs_assoc.get_rlzs_by_grp_id()
        num_rlzs = {t: len(rlzs) for t, rlzs in rlzs_by_tr_id.items()}
        self.sesruptures = []
        for serial in self.datastore['sescollection']:
            sr = self.datastore['sescollection/' + serial]
            sr.set_weight(num_rlzs, {})
            self.sesruptures.append(sr)
        self.sesruptures.sort(key=operator.attrgetter('serial'))
        if self.oqparam.ground_motion_fields:
            calc.check_overflow(self)
            for rlz in self.rlzs_assoc.realizations:
                self.datastore.create_dset(
                    'gmf_data/%04d' % rlz.ordinal, calc.gmv_dt)

    def combine_curves_and_save_gmfs(self, acc, res):
        """
        Combine the hazard curves (if any) and save the gmfs (if any)
        sequentially; notice that the gmfs may come from
        different tasks in any order.

        :param acc: an accumulator for the hazard curves
        :param res: a dictionary rlzi, imt -> [gmf_array, curves_by_imt]
        :returns: a new accumulator
        """
        sav_mon = self.monitor('saving gmfs')
        agg_mon = self.monitor('aggregating hcurves')
        for rlzi in res:
            gmfa, curves = res[rlzi]
            if gmfa is not None:
                with sav_mon:
                    hdf5.extend(self.datastore['gmf_data/%04d' % rlzi], gmfa)
            if curves is not None:  # aggregate hcurves
                with agg_mon:
                    self.agg_dicts(acc, {rlzi: curves})
        sav_mon.flush()
        agg_mon.flush()
        self.datastore.flush()
        return acc

    def execute(self):
        """
        Run in parallel `core_task(sources, sitecol, monitor)`, by
        parallelizing on the ruptures according to their weight and
        tectonic region type.
        """
        oq = self.oqparam
        if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields:
            return
        monitor = self.monitor(self.core_task.__name__)
        monitor.oqparam = oq
        min_iml = calc.fix_minimum_intensity(
            oq.minimum_intensity, oq.imtls)
        acc = parallel.apply_reduce(
            self.core_task.__func__,
            (self.sesruptures, self.sitecol, oq.imtls, self.rlzs_assoc,
             min_iml, monitor),
            concurrent_tasks=self.oqparam.concurrent_tasks,
            agg=self.combine_curves_and_save_gmfs,
            acc=ProbabilityMap(),
            key=operator.attrgetter('grp_id'),
            weight=operator.attrgetter('weight'))
        if oq.ground_motion_fields:
            self.datastore.set_nbytes('gmf_data')
        return acc

    def post_execute(self, result):
        """
        :param result:
            a dictionary (src_group_id, gsim) -> haz_curves or an empty
            dictionary if hazard_curves_from_gmfs is false
        """
        oq = self.oqparam
        if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields:
            return
        elif oq.hazard_curves_from_gmfs:
            rlzs = self.rlzs_assoc.realizations
            dic = {}
            for rlzi in result:
                dic[rlzs[rlzi]] = array_of_curves(
                    result[rlzi], len(self.sitecol), oq.imtls)
            self.save_curves(dic)
        if oq.compare_with_classical:  # compute classical curves
            export_dir = os.path.join(oq.export_dir, 'cl')
            if not os.path.exists(export_dir):
                os.makedirs(export_dir)
            oq.export_dir = export_dir
            # one could also set oq.number_of_logic_tree_samples = 0
            self.cl = ClassicalCalculator(oq, self.monitor)
            # TODO: perhaps it is possible to avoid reprocessing the source
            # model, however usually this is quite fast and do not dominate
            # the computation
            self.cl.run()
            for imt in self.mean_curves.dtype.fields:
                rdiff, index = max_rel_diff_index(
                    self.cl.mean_curves[imt], self.mean_curves[imt])
                logging.warn('Relative difference with the classical '
                             'mean curves for IMT=%s: %d%% at site index %d',
                             imt, rdiff * 100, index)
Ejemplo n.º 11
0
class EventBasedCalculator(ClassicalCalculator):
    """
    Event based PSHA calculator generating the ground motion fields and
    the hazard curves from the ruptures, depending on the configuration
    parameters.
    """
    pre_calculator = 'event_based_rupture'
    core_task = compute_gmfs_and_curves
    is_stochastic = True

    def combine_pmaps_and_save_gmfs(self, acc, res):
        """
        Combine the hazard curves (if any) and save the gmfs (if any)
        sequentially; notice that the gmfs may come from
        different tasks in any order.

        :param acc: an accumulator for the hazard curves
        :param res: a dictionary rlzi, imt -> [gmf_array, curves_by_imt]
        :returns: a new accumulator
        """
        sav_mon = self.monitor('saving gmfs')
        agg_mon = self.monitor('aggregating hcurves')
        self.gmdata += res['gmdata']
        data = res['gmfdata']
        if data is not None:
            with sav_mon:
                hdf5.extend3(self.datastore.hdf5path, 'gmf_data/data', data)
                for sid, start, stop in res['indices']:
                    self.indices[sid].append(
                        (start + self.offset, stop + self.offset))
                self.offset += len(data)
        slicedic = self.oqparam.imtls.slicedic
        with agg_mon:
            for key, poes in res['hcurves'].items():
                rlzi, sid, imt = str2rsi(key)
                array = acc[rlzi].setdefault(sid, 0).array[slicedic[imt], 0]
                array[:] = 1. - (1. - array) * (1. - poes)
        sav_mon.flush()
        agg_mon.flush()
        self.datastore.flush()
        if 'ruptures' in res:
            vars(EventBasedRuptureCalculator)['save_ruptures'](self,
                                                               res['ruptures'])
        return acc

    def gen_args(self, ruptures_by_grp):
        """
        :param ruptures_by_grp: a dictionary of EBRupture objects
        :yields: the arguments for compute_gmfs_and_curves
        """
        oq = self.oqparam
        monitor = self.monitor(self.core_task.__name__)
        imts = list(oq.imtls)
        min_iml = calc.fix_minimum_intensity(oq.minimum_intensity, imts)
        correl_model = oq.get_correl_model()
        for grp_id in ruptures_by_grp:
            ruptures = ruptures_by_grp[grp_id]
            if not ruptures:
                continue
            rlzs_by_gsim = self.rlzs_assoc.get_rlzs_by_gsim(grp_id)
            for block in block_splitter(ruptures, oq.ruptures_per_block):
                samples = self.rlzs_assoc.samples[grp_id]
                getter = GmfGetter(grp_id, rlzs_by_gsim, block, self.sitecol,
                                   imts, min_iml, oq.truncation_level,
                                   correl_model, samples)
                yield getter, oq, monitor

    def execute(self):
        """
        Run in parallel `core_task(sources, sitecol, monitor)`, by
        parallelizing on the ruptures according to their weight and
        tectonic region type.
        """
        oq = self.oqparam
        if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields:
            return
        if self.oqparam.ground_motion_fields:
            calc.check_overflow(self)

        with self.monitor('reading ruptures', autoflush=True):
            ruptures_by_grp = (self.precalc.result if self.precalc else
                               get_ruptures_by_grp(self.datastore.parent))

        self.csm_info = self.datastore['csm_info']
        self.sm_id = {
            tuple(sm.path): sm.ordinal
            for sm in self.csm_info.source_models
        }
        L = len(oq.imtls.array)
        rlzs = self.rlzs_assoc.realizations
        allargs = list(self.gen_args(ruptures_by_grp))
        res = parallel.Starmap(self.core_task.__func__, allargs).submit_all()
        self.gmdata = {}
        self.offset = 0
        self.indices = collections.defaultdict(list)  # sid -> indices
        acc = res.reduce(self.combine_pmaps_and_save_gmfs,
                         {rlz.ordinal: ProbabilityMap(L)
                          for rlz in rlzs})
        save_gmdata(self, len(rlzs))
        if self.indices:
            logging.info('Saving gmf_data/indices')
            with self.monitor('saving gmf_data/indices',
                              measuremem=True,
                              autoflush=True):
                self.datastore.save_vlen('gmf_data/indices', [
                    numpy.array(self.indices[sid], indices_dt)
                    for sid in self.sitecol.complete.sids
                ])
        return acc

    def save_gmf_bytes(self):
        """Save the attribute nbytes in the gmf_data datasets"""
        ds = self.datastore
        for sm_id in ds['gmf_data']:
            ds.set_nbytes('gmf_data/' + sm_id)
        ds.set_nbytes('gmf_data')

    def post_execute(self, result):
        """
        :param result:
            a dictionary (src_group_id, gsim) -> haz_curves or an empty
            dictionary if hazard_curves_from_gmfs is false
        """
        oq = self.oqparam
        if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields:
            return
        elif oq.hazard_curves_from_gmfs:
            rlzs = self.rlzs_assoc.realizations
            # save individual curves
            for i in sorted(result):
                key = 'hcurves/rlz-%03d' % i
                if result[i]:
                    self.datastore[key] = result[i]
                else:
                    self.datastore[key] = ProbabilityMap(oq.imtls.array.size)
                    logging.info('Zero curves for %s', key)
            # compute and save statistics; this is done in process
            # we don't need to parallelize, since event based calculations
            # involves a "small" number of sites (<= 65,536)
            weights = [rlz.weight for rlz in rlzs]
            hstats = self.oqparam.hazard_stats()
            if len(hstats) and len(rlzs) > 1:
                for kind, stat in hstats:
                    pmap = compute_pmap_stats(result.values(), [stat], weights)
                    self.datastore['hcurves/' + kind] = pmap
        if 'gmf_data' in self.datastore:
            self.save_gmf_bytes()
        if oq.compare_with_classical:  # compute classical curves
            export_dir = os.path.join(oq.export_dir, 'cl')
            if not os.path.exists(export_dir):
                os.makedirs(export_dir)
            oq.export_dir = export_dir
            # one could also set oq.number_of_logic_tree_samples = 0
            self.cl = ClassicalCalculator(oq, self.monitor('classical'))
            # TODO: perhaps it is possible to avoid reprocessing the source
            # model, however usually this is quite fast and do not dominate
            # the computation
            self.cl.run(close=False)
            cl_mean_curves = get_mean_curves(self.cl.datastore)
            eb_mean_curves = get_mean_curves(self.datastore)
            for imt in eb_mean_curves.dtype.names:
                rdiff, index = util.max_rel_diff_index(cl_mean_curves[imt],
                                                       eb_mean_curves[imt])
                logging.warn(
                    'Relative difference with the classical '
                    'mean curves for IMT=%s: %d%% at site index %d', imt,
                    rdiff * 100, index)
Ejemplo n.º 12
0
class EventBasedCalculator(ClassicalCalculator):
    """
    Event based PSHA calculator generating the ground motion fields and
    the hazard curves from the ruptures, depending on the configuration
    parameters.
    """
    pre_calculator = 'event_based_rupture'
    core_task = compute_gmfs_and_curves
    is_stochastic = True

    def pre_execute(self):
        """
        Read the precomputed ruptures (or compute them on the fly) and
        prepare some empty files in the export directory to store the gmfs
        (if any). If there were pre-existing files, they will be erased.
        """
        super(EventBasedCalculator, self).pre_execute()
        rlzs_by_tr_id = self.rlzs_assoc.get_rlzs_by_trt_id()
        num_rlzs = {t: len(rlzs) for t, rlzs in rlzs_by_tr_id.items()}
        self.sesruptures = []
        for serial in self.datastore['sescollection']:
            sr = self.datastore['sescollection/' + serial]
            sr.set_weight(num_rlzs, {})
            self.sesruptures.append(sr)
        self.sesruptures.sort(key=operator.attrgetter('serial'))
        if self.oqparam.ground_motion_fields:
            for rlz in self.rlzs_assoc.realizations:
                self.datastore.create_dset('gmf_data/%04d' % rlz.ordinal,
                                           gmv_dt)

    def combine_curves_and_save_gmfs(self, acc, res):
        """
        Combine the hazard curves (if any) and save the gmfs (if any)
        sequentially; notice that the gmfs may come from
        different tasks in any order.

        :param acc: an accumulator for the hazard curves
        :param res: a dictionary rlzi, imt -> [gmf_array, curves_by_imt]
        :returns: a new accumulator
        """
        sav_mon = self.monitor('saving gmfs')
        agg_mon = self.monitor('aggregating hcurves')
        for rlzi in res:
            gmfa, curves = res[rlzi]
            if gmfa is not None:
                with sav_mon:
                    hdf5.extend(self.datastore['gmf_data/%04d' % rlzi], gmfa)
            if curves is not None:  # aggregate hcurves
                with agg_mon:
                    self.agg_dicts(acc, {rlzi: curves})
        sav_mon.flush()
        agg_mon.flush()
        self.datastore.flush()
        return acc

    def execute(self):
        """
        Run in parallel `core_task(sources, sitecol, monitor)`, by
        parallelizing on the ruptures according to their weight and
        tectonic region type.
        """
        oq = self.oqparam
        if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields:
            return
        monitor = self.monitor(self.core_task.__name__)
        monitor.oqparam = oq
        min_iml = fix_minimum_intensity(oq.minimum_intensity, oq.imtls)
        acc = parallel.apply_reduce(
            self.core_task.__func__, (self.sesruptures, self.sitecol, oq.imtls,
                                      self.rlzs_assoc, min_iml, monitor),
            concurrent_tasks=self.oqparam.concurrent_tasks,
            agg=self.combine_curves_and_save_gmfs,
            acc=ProbabilityMap(),
            key=operator.attrgetter('trt_id'),
            weight=operator.attrgetter('weight'))
        if oq.ground_motion_fields:
            self.datastore.set_nbytes('gmf_data')
        return acc

    def post_execute(self, result):
        """
        :param result:
            a dictionary (trt_model_id, gsim) -> haz_curves or an empty
            dictionary if hazard_curves_from_gmfs is false
        """
        oq = self.oqparam
        if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields:
            return
        elif oq.hazard_curves_from_gmfs:
            rlzs = self.rlzs_assoc.realizations
            dic = {}
            for rlzi in result:
                dic[rlzs[rlzi]] = array_of_curves(result[rlzi],
                                                  len(self.sitecol), oq.imtls)
            self.save_curves(dic)
        if oq.compare_with_classical:  # compute classical curves
            export_dir = os.path.join(oq.export_dir, 'cl')
            if not os.path.exists(export_dir):
                os.makedirs(export_dir)
            oq.export_dir = export_dir
            # one could also set oq.number_of_logic_tree_samples = 0
            self.cl = ClassicalCalculator(oq, self.monitor)
            # TODO: perhaps it is possible to avoid reprocessing the source
            # model, however usually this is quite fast and do not dominate
            # the computation
            self.cl.run(hazard_calculation_id=self.datastore.calc_id)
            for imt in self.mean_curves.dtype.fields:
                rdiff, index = max_rel_diff_index(self.cl.mean_curves[imt],
                                                  self.mean_curves[imt])
                logging.warn(
                    'Relative difference with the classical '
                    'mean curves for IMT=%s: %d%% at site index %d', imt,
                    rdiff * 100, index)
Ejemplo n.º 13
0
class EventBasedCalculator(ClassicalCalculator):
    """
    Event based PSHA calculator generating the ruptures only
    """
    pre_calculator = 'event_based_rupture'
    core_func = compute_gmfs_and_curves
    is_stochastic = True

    def pre_execute(self):
        """
        Read the precomputed ruptures (or compute them on the fly) and
        prepare some empty files in the export directory to store the gmfs
        (if any). If there were pre-existing files, they will be erased.
        """
        super(EventBasedCalculator, self).pre_execute()
        self.sesruptures = []
        gsims_by_col = self.rlzs_assoc.get_gsims_by_col()
        self.datasets = {}
        for col_id, sescol in enumerate(self.datastore['sescollection']):
            gmf_dt = gsim_imt_dt(gsims_by_col[col_id], self.oqparam.imtls)
            for tag, sesrup in sorted(sescol.items()):
                sesrup = sescol[tag]
                self.sesruptures.append(sesrup)
            if self.oqparam.ground_motion_fields and sescol:
                self.datasets[col_id] = self.datastore.create_dset(
                    'gmfs/col%02d' % col_id, gmf_dt)

    def combine_curves_and_save_gmfs(self, acc, res):
        """
        Combine the hazard curves (if any) and save the gmfs (if any)
        sequentially; notice that the gmfs may come from
        different tasks in any order.

        :param acc: an accumulator for the hazard curves
        :param res: a dictionary trt_id, gsim -> gmf_array or curves_by_imt
        :returns: a new accumulator
        """
        sav_mon = self.monitor('saving gmfs')
        agg_mon = self.monitor('aggregating hcurves')
        save_gmfs = self.oqparam.ground_motion_fields
        for trt_id, gsim_or_col in res:
            if isinstance(gsim_or_col, int) and save_gmfs:
                with sav_mon:
                    gmfa = res[trt_id, gsim_or_col]
                    dataset = self.datasets[gsim_or_col]
                    dataset.attrs['trt_model_id'] = trt_id
                    dataset.extend(gmfa)
                    self.nbytes += gmfa.nbytes
                    self.datastore.hdf5.flush()
            elif isinstance(gsim_or_col, str):  # aggregate hcurves
                with agg_mon:
                    curves_by_imt = res[trt_id, gsim_or_col]
                    acc = agg_dicts(
                        acc, AccumDict({(trt_id, gsim_or_col): curves_by_imt}))
        sav_mon.flush()
        agg_mon.flush()
        return acc

    def execute(self):
        """
        Run in parallel `core_func(sources, sitecol, monitor)`, by
        parallelizing on the ruptures according to their weight and
        tectonic region type.
        """
        oq = self.oqparam
        if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields:
            return
        monitor = self.monitor(self.core_func.__name__)
        monitor.oqparam = oq
        zc = zero_curves(len(self.sitecol.complete), self.oqparam.imtls)
        zerodict = AccumDict((key, zc) for key in self.rlzs_assoc)
        self.nbytes = 0
        curves_by_trt_gsim = parallel.apply_reduce(
            self.core_func.__func__,
            (self.sesruptures, self.sitecol, self.rlzs_assoc, monitor),
            concurrent_tasks=self.oqparam.concurrent_tasks,
            acc=zerodict, agg=self.combine_curves_and_save_gmfs,
            key=operator.attrgetter('col_id'))
        if oq.ground_motion_fields:
            # sanity check on the saved gmfs size
            expected_nbytes = self.datastore[
                'counts_per_rlz'].attrs['gmfs_nbytes']
            self.datastore['gmfs'].attrs['nbytes'] = self.nbytes
            assert self.nbytes == expected_nbytes, (
                self.nbytes, expected_nbytes)
        return curves_by_trt_gsim

    def post_execute(self, result):
        """
        :param result:
            a dictionary (trt_model_id, gsim) -> haz_curves or an empty
            dictionary if hazard_curves_from_gmfs is false
        """
        oq = self.oqparam
        if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields:
            return
        if oq.hazard_curves_from_gmfs:
            ClassicalCalculator.post_execute.__func__(self, result)
        if oq.compare_with_classical:  # compute classical curves
            export_dir = os.path.join(oq.export_dir, 'cl')
            if not os.path.exists(export_dir):
                os.makedirs(export_dir)
            oq.export_dir = export_dir
            # use a different datastore
            self.cl = ClassicalCalculator(oq, self.monitor)
            self.cl.datastore.parent = self.datastore
            result = self.cl.run(pre_execute=False, clean_up=False)
            for imt in self.mean_curves.dtype.fields:
                rdiff, index = max_rel_diff_index(
                    self.cl.mean_curves[imt], self.mean_curves[imt])
                logging.warn('Relative difference with the classical '
                             'mean curves for IMT=%s: %d%% at site index %d',
                             imt, rdiff * 100, index)
Ejemplo n.º 14
0
class EventBasedCalculator(ClassicalCalculator):
    """
    Event based PSHA calculator generating the ground motion fields and
    the hazard curves from the ruptures, depending on the configuration
    parameters.
    """
    pre_calculator = 'event_based_rupture'
    core_task = compute_gmfs_and_curves
    is_stochastic = True

    def combine_pmaps_and_save_gmfs(self, acc, res):
        """
        Combine the hazard curves (if any) and save the gmfs (if any)
        sequentially; notice that the gmfs may come from
        different tasks in any order.

        :param acc: an accumulator for the hazard curves
        :param res: a dictionary rlzi, imt -> [gmf_array, curves_by_imt]
        :returns: a new accumulator
        """
        sav_mon = self.monitor('saving gmfs')
        agg_mon = self.monitor('aggregating hcurves')
        if res['gmfcoll'] is not None:
            with sav_mon:
                for rlz, array in res['gmfcoll'].items():
                    if len(array):
                        key = 'gmf_data/%04d' % rlz.ordinal
                        self.datastore.extend(key, array)
        slicedic = self.oqparam.imtls.slicedic
        with agg_mon:
            for key, poes in res['hcurves'].items():
                rlzi, sid, imt = str2rsi(key)
                array = acc[rlzi].setdefault(sid, 0).array[slicedic[imt], 0]
                array[:] = 1. - (1. - array) * (1. - poes)
        sav_mon.flush()
        agg_mon.flush()
        self.datastore.flush()
        if 'ruptures' in res:
            vars(EventBasedRuptureCalculator)['save_ruptures'](
                self, res['ruptures'])
        return acc

    def gen_args(self, ebruptures):
        """
        :param ebruptures: a list of EBRupture objects to be split
        :yields: the arguments for compute_gmfs_and_curves
        """
        oq = self.oqparam
        monitor = self.monitor(self.core_task.__name__)
        monitor.oqparam = oq
        imts = list(oq.imtls)
        min_iml = calc.fix_minimum_intensity(oq.minimum_intensity, imts)
        grp_trt = {sg.id: sg.trt for sm in self.csm.info.source_models
                   for sg in sm.src_groups}
        rlzs_by_grp = self.rlzs_assoc.get_rlzs_by_grp_id()
        correl_model = oq.get_correl_model()
        for block in split_in_blocks(
                ebruptures, oq.concurrent_tasks or 1,
                key=operator.attrgetter('grp_id')):
            grp_id = block[0].grp_id
            trt = grp_trt[grp_id]
            gsims = [dic[trt] for dic in self.rlzs_assoc.gsim_by_trt]
            samples = self.rlzs_assoc.samples[grp_id]
            getter = GmfGetter(gsims, block, self.sitecol,
                               imts, min_iml, oq.truncation_level,
                               correl_model, samples)
            yield getter, rlzs_by_grp[grp_id], monitor

    def execute(self):
        """
        Run in parallel `core_task(sources, sitecol, monitor)`, by
        parallelizing on the ruptures according to their weight and
        tectonic region type.
        """
        oq = self.oqparam
        if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields:
            return
        self.sesruptures = []
        if self.precalc:  # the ruptures are already in memory
            for grp_id, sesruptures in self.precalc.result.items():
                for sr in sesruptures:
                    self.sesruptures.append(sr)
        else:  # read the ruptures from the datastore
            for serial in self.datastore['sescollection']:
                sr = self.datastore['sescollection/' + serial]
                self.sesruptures.append(sr)
        self.sesruptures.sort(key=operator.attrgetter('serial'))
        if self.oqparam.ground_motion_fields:
            calc.check_overflow(self)

        L = len(oq.imtls.array)
        res = parallel.starmap(
            self.core_task.__func__, self.gen_args(self.sesruptures)
        ).submit_all()
        acc = functools.reduce(self.combine_pmaps_and_save_gmfs, res, {
            rlz.ordinal: ProbabilityMap(L, 1)
            for rlz in self.rlzs_assoc.realizations})
        self.save_data_transfer(res)
        return acc

    def post_execute(self, result):
        """
        :param result:
            a dictionary (src_group_id, gsim) -> haz_curves or an empty
            dictionary if hazard_curves_from_gmfs is false
        """
        oq = self.oqparam
        if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields:
            return
        elif oq.hazard_curves_from_gmfs:
            rlzs = self.rlzs_assoc.realizations
            # save individual curves
            if self.oqparam.individual_curves:
                for i in sorted(result):
                    key = 'hcurves/rlz-%03d' % i
                    if result[i]:
                        self.datastore[key] = result[i]
                    else:
                        logging.info('Zero curves for %s', key)
            # compute and save statistics; this is done in process
            # we don't need to parallelize, since event based calculations
            # involves a "small" number of sites (<= 65,536)
            weights = (None if self.oqparam.number_of_logic_tree_samples
                       else [rlz.weight for rlz in rlzs])
            pstats = PmapStats(self.oqparam.quantile_hazard_curves, weights)
            for kind, stat in pstats.compute(
                    self.sitecol.sids, list(result.values())):
                if kind == 'mean' and not self.oqparam.mean_hazard_curves:
                    continue
                self.datastore['hcurves/' + kind] = stat

        if ('gmf_data' in self.datastore and 'nbytes' not
                in self.datastore['gmf_data'].attrs):
            self.datastore.set_nbytes('gmf_data')

        if oq.compare_with_classical:  # compute classical curves
            export_dir = os.path.join(oq.export_dir, 'cl')
            if not os.path.exists(export_dir):
                os.makedirs(export_dir)
            oq.export_dir = export_dir
            # one could also set oq.number_of_logic_tree_samples = 0
            self.cl = ClassicalCalculator(oq, self.monitor)
            # TODO: perhaps it is possible to avoid reprocessing the source
            # model, however usually this is quite fast and do not dominate
            # the computation
            self.cl.run(close=False)
            cl_mean_curves = get_mean_curves(self.cl.datastore)
            eb_mean_curves = get_mean_curves(self.datastore)
            for imt in eb_mean_curves.dtype.names:
                rdiff, index = max_rel_diff_index(
                    cl_mean_curves[imt], eb_mean_curves[imt])
                logging.warn('Relative difference with the classical '
                             'mean curves for IMT=%s: %d%% at site index %d',
                             imt, rdiff * 100, index)
Ejemplo n.º 15
0
class EventBasedCalculator(ClassicalCalculator):
    """
    Event based PSHA calculator generating the ground motion fields and
    the hazard curves from the ruptures, depending on the configuration
    parameters.
    """
    pre_calculator = 'event_based_rupture'
    core_task = compute_gmfs_and_curves
    is_stochastic = True

    def pre_execute(self):
        """
        Read the precomputed ruptures (or compute them on the fly) and
        prepare some empty files in the export directory to store the gmfs
        (if any). If there were pre-existing files, they will be erased.
        """
        super(EventBasedCalculator, self).pre_execute()
        self.sesruptures = []
        for serial in self.datastore['sescollection']:
            self.sesruptures.append(self.datastore['sescollection/' + serial])
        self.sesruptures.sort(key=operator.attrgetter('serial'))

    def combine_curves_and_save_gmfs(self, acc, res):
        """
        Combine the hazard curves (if any) and save the gmfs (if any)
        sequentially; notice that the gmfs may come from
        different tasks in any order.

        :param acc: an accumulator for the hazard curves
        :param res: a dictionary trt_id, gsim -> gmf_array or curves_by_imt
        :returns: a new accumulator
        """
        sav_mon = self.monitor('saving gmfs')
        agg_mon = self.monitor('aggregating hcurves')
        save_gmfs = self.oqparam.ground_motion_fields
        for trt_id in res:
            if isinstance(trt_id, int) and save_gmfs:
                with sav_mon:
                    gmfa_sids_etags = res[trt_id]
                    for serial in sorted(gmfa_sids_etags):
                        gst = gmfa_sids_etags[serial]
                        self.datastore['gmf_data/%s' % serial] = gst.gmfa
                        self.datastore['sid_data/%s' % serial] = gst.sids
                        self.datastore.set_attrs('gmf_data/%s' % serial,
                                                 trt_id=trt_id,
                                                 etags=gst.etags)
                    self.datastore.hdf5.flush()
            elif isinstance(trt_id, tuple):  # aggregate hcurves
                with agg_mon:
                    self.agg_dicts(acc, {trt_id: res[trt_id]})
        sav_mon.flush()
        agg_mon.flush()
        return acc

    def execute(self):
        """
        Run in parallel `core_task(sources, sitecol, monitor)`, by
        parallelizing on the ruptures according to their weight and
        tectonic region type.
        """
        oq = self.oqparam
        if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields:
            return
        monitor = self.monitor(self.core_task.__name__)
        monitor.oqparam = oq
        zc = zero_curves(len(self.sitecol.complete), self.oqparam.imtls)
        zerodict = AccumDict((key, zc) for key in self.rlzs_assoc)
        curves_by_trt_gsim = parallel.apply_reduce(
            self.core_task.__func__,
            (self.sesruptures, self.sitecol, self.rlzs_assoc, monitor),
            concurrent_tasks=self.oqparam.concurrent_tasks,
            acc=zerodict, agg=self.combine_curves_and_save_gmfs,
            key=operator.attrgetter('trt_id'),
            weight=operator.attrgetter('multiplicity'))
        if oq.ground_motion_fields:
            self.datastore.set_nbytes('gmf_data')
            self.datastore.set_nbytes('sid_data')
        return curves_by_trt_gsim

    def post_execute(self, result):
        """
        :param result:
            a dictionary (trt_model_id, gsim) -> haz_curves or an empty
            dictionary if hazard_curves_from_gmfs is false
        """
        oq = self.oqparam
        if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields:
            return
        if oq.hazard_curves_from_gmfs:
            ClassicalCalculator.post_execute.__func__(self, result)
        if oq.compare_with_classical:  # compute classical curves
            export_dir = os.path.join(oq.export_dir, 'cl')
            if not os.path.exists(export_dir):
                os.makedirs(export_dir)
            oq.export_dir = export_dir
            # use a different datastore
            self.cl = ClassicalCalculator(oq, self.monitor)
            # TODO: perhaps it is possible to avoid reprocessing the source
            # model, however usually this is quite fast and do not dominate
            # the computation
            self.cl.run(hazard_calculation_id=self.datastore.calc_id)
            for imt in self.mean_curves.dtype.fields:
                rdiff, index = max_rel_diff_index(
                    self.cl.mean_curves[imt], self.mean_curves[imt])
                logging.warn('Relative difference with the classical '
                             'mean curves for IMT=%s: %d%% at site index %d',
                             imt, rdiff * 100, index)
Ejemplo n.º 16
0
class EventBasedCalculator(base.HazardCalculator):
    """
    Event based PSHA calculator generating the ground motion fields and
    the hazard curves from the ruptures, depending on the configuration
    parameters.
    """
    core_task = compute_gmfs
    is_stochastic = True
    build_ruptures = sample_ruptures

    @cached_property
    def csm_info(self):
        """
        :returns: a cached CompositionInfo object
        """
        try:
            return self.csm.info
        except AttributeError:
            return self.datastore.parent['csm_info']

    def init(self):
        if hasattr(self, 'csm'):
            self.check_floating_spinning()
        self.rupser = calc.RuptureSerializer(self.datastore)

    def init_logic_tree(self, csm_info):
        self.grp_trt = csm_info.grp_by("trt")
        self.rlzs_assoc = csm_info.get_rlzs_assoc()
        self.rlzs_by_gsim_grp = csm_info.get_rlzs_by_gsim_grp()
        self.samples_by_grp = csm_info.get_samples_by_grp()
        self.num_rlzs_by_grp = {
            grp_id:
            sum(len(rlzs) for rlzs in self.rlzs_by_gsim_grp[grp_id].values())
            for grp_id in self.rlzs_by_gsim_grp
        }
        self.R = len(self.rlzs_assoc.realizations)

    def zerodict(self):
        """
        Initial accumulator, a dictionary (grp_id, gsim) -> curves
        """
        self.L = len(self.oqparam.imtls.array)
        zd = {r: ProbabilityMap(self.L) for r in range(self.R)}
        self.E = len(self.datastore['events'])
        return zd

    def from_sources(self, par):
        """
        Prefilter the composite source model and store the source_info
        """
        oq = self.oqparam
        gsims_by_trt = self.csm.gsim_lt.values

        def weight_src(src):
            return src.num_ruptures

        logging.info('Building ruptures')
        smap = parallel.Starmap(self.build_ruptures.__func__,
                                monitor=self.monitor())
        eff_ruptures = AccumDict(accum=0)  # grp_id => potential ruptures
        calc_times = AccumDict(accum=numpy.zeros(3, F32))
        ses_idx = 0
        for sm_id, sm in enumerate(self.csm.source_models):
            logging.info('Sending %s', sm)
            for sg in sm.src_groups:
                if not sg.sources:
                    continue
                par['gsims'] = gsims_by_trt[sg.trt]
                for block in self.block_splitter(sg.sources, weight_src):
                    if 'ucerf' in oq.calculation_mode:
                        for i in range(oq.ses_per_logic_tree_path):
                            par['ses_seeds'] = [(ses_idx, oq.ses_seed + i + 1)]
                            smap.submit(block, self.src_filter, par)
                            ses_idx += 1
                    else:
                        smap.submit(block, par, self.src_filter)
        mon = self.monitor('saving ruptures')
        for dic in smap:
            if dic['calc_times']:
                calc_times += dic['calc_times']
            if dic['eff_ruptures']:
                eff_ruptures += dic['eff_ruptures']
            if dic['rup_array']:
                with mon:
                    self.rupser.save(dic['rup_array'])
        self.rupser.close()

        # logic tree reduction, must be called before storing the events
        self.store_csm_info(eff_ruptures)
        store_rlzs_by_grp(self.datastore)
        self.init_logic_tree(self.csm.info)
        with self.monitor('store source_info', autoflush=True):
            self.store_source_info(calc_times)

        logging.info('Reordering the ruptures and storing the events')
        attrs = self.datastore.getitem('ruptures').attrs
        sorted_ruptures = self.datastore.getitem('ruptures').value
        # order the ruptures by serial
        sorted_ruptures.sort(order='serial')
        self.datastore['ruptures'] = sorted_ruptures
        self.datastore.set_attrs('ruptures', **attrs)
        rgetters = self.save_events(sorted_ruptures)
        return ((rgetter, self.sitecol, par) for rgetter in rgetters)

    def get_rupture_getters(self):
        """
        :returns: a list of RuptureGetters
        """
        dstore = (self.datastore.parent
                  if self.datastore.parent else self.datastore)
        hdf5cache = dstore.hdf5cache()
        with hdf5.File(hdf5cache, 'r+') as cache:
            if 'rupgeoms' not in cache:
                dstore.hdf5.copy('rupgeoms', cache)
        rgetters = get_rupture_getters(dstore,
                                       split=self.oqparam.concurrent_tasks,
                                       hdf5cache=hdf5cache)
        num_events = self.E if hasattr(self, 'E') else len(dstore['events'])
        num_ruptures = len(dstore['ruptures'])
        logging.info('Found {:,d} ruptures and {:,d} events'.format(
            num_ruptures, num_events))
        if self.datastore.parent:
            self.datastore.parent.close()
        return rgetters

    def agg_dicts(self, acc, result):
        """
        :param acc: accumulator dictionary
        :param result: an AccumDict with events, ruptures, gmfs and hcurves
        """
        try:
            eid2idx = self.eid2idx
        except AttributeError:  # first call
            eid2idx = self.eid2idx = dict(
                zip(self.datastore['events']['eid'], range(self.E)))
        sav_mon = self.monitor('saving gmfs')
        agg_mon = self.monitor('aggregating hcurves')
        with sav_mon:
            data = result.pop('gmfdata')
            if len(data) == 0:
                return acc
            idxs = get_idxs(data, eid2idx)  # this has to be fast
            data['eid'] = idxs  # replace eid with idx
            self.datastore.extend('gmf_data/data', data)
            # it is important to save the number of bytes while the
            # computation is going, to see the progress
            update_nbytes(self.datastore, 'gmf_data/data', data)
            for sid, start, stop in result['indices']:
                self.indices[sid, 0].append(start + self.offset)
                self.indices[sid, 1].append(stop + self.offset)
            self.offset += len(data)
            if self.offset >= TWO32:
                raise RuntimeError('The gmf_data table has more than %d rows' %
                                   TWO32)
        imtls = self.oqparam.imtls
        with agg_mon:
            for key, poes in result.get('hcurves', {}).items():
                r, sid, imt = str2rsi(key)
                array = acc[r].setdefault(sid, 0).array[imtls(imt), 0]
                array[:] = 1. - (1. - array) * (1. - poes)
        sav_mon.flush()
        agg_mon.flush()
        self.datastore.flush()
        return acc

    def save_events(self, rup_array):
        """
        :param rup_array: an array of ruptures with fields grp_id
        :returns: a list of RuptureGetters
        """
        # this is very fast compared to saving the ruptures
        eids = rupture.get_eids(rup_array, self.samples_by_grp,
                                self.num_rlzs_by_grp)
        self.E = len(eids)
        self.check_overflow()  # check the number of events
        events = numpy.zeros(len(eids), rupture.events_dt)
        events['eid'] = eids
        self.eid2idx = eid2idx = dict(zip(events['eid'], range(self.E)))
        rgetters = self.get_rupture_getters()

        # build the associations eid -> rlz in parallel
        smap = parallel.Starmap(RuptureGetter.get_eid_rlz,
                                ((rgetter, ) for rgetter in rgetters),
                                self.monitor('get_eid_rlz'),
                                progress=logging.debug)
        for eid_rlz in smap:
            # fast: 30 million of events associated in 1 minute
            for eid, rlz in eid_rlz:
                events[eid2idx[eid]]['rlz'] = rlz
        self.datastore['events'] = events  # fast too
        return rgetters

    def check_overflow(self):
        """
        Raise a ValueError if the number of sites is larger than 65,536 or the
        number of IMTs is larger than 256 or the number of ruptures is larger
        than 4,294,967,296. The limits are due to the numpy dtype used to
        store the GMFs (gmv_dt). They could be relaxed in the future.
        """
        max_ = dict(events=TWO32, imts=2**8)
        E = getattr(self, 'E', 0)  # 0 for non event based
        num_ = dict(events=E, imts=len(self.oqparam.imtls))
        if self.sitecol:
            max_['sites'] = min(self.oqparam.max_num_sites, TWO32)
            num_['sites'] = len(self.sitecol)
        for var in max_:
            if num_[var] > max_[var]:
                raise ValueError('The event based calculator is restricted to '
                                 '%d %s, got %d' % (max_[var], var, num_[var]))

    def execute(self):
        oq = self.oqparam
        self.offset = 0
        self.indices = collections.defaultdict(list)  # sid, idx -> indices
        self.min_iml = self.get_min_iml(oq)
        param = self.param.copy()
        param.update(oqparam=oq,
                     min_iml=self.min_iml,
                     gmf=oq.ground_motion_fields,
                     truncation_level=oq.truncation_level,
                     ruptures_per_block=oq.ruptures_per_block,
                     imtls=oq.imtls,
                     filter_distance=oq.filter_distance,
                     ses_per_logic_tree_path=oq.ses_per_logic_tree_path)
        if oq.hazard_calculation_id:  # from ruptures
            assert oq.ground_motion_fields, 'must be True!'
            self.datastore.parent = datastore.read(oq.hazard_calculation_id)
            self.init_logic_tree(self.csm_info)
            iterargs = ((rgetter, self.sitecol, param)
                        for rgetter in self.get_rupture_getters())
        else:  # from sources
            iterargs = self.from_sources(param)
            if oq.ground_motion_fields is False:
                return {}
        # call compute_gmfs in parallel
        acc = parallel.Starmap(self.core_task.__func__, iterargs,
                               self.monitor()).reduce(self.agg_dicts,
                                                      self.zerodict())

        if self.indices:
            N = len(self.sitecol.complete)
            logging.info('Saving gmf_data/indices')
            with self.monitor('saving gmf_data/indices',
                              measuremem=True,
                              autoflush=True):
                self.datastore['gmf_data/imts'] = ' '.join(oq.imtls)
                dset = self.datastore.create_dset('gmf_data/indices',
                                                  hdf5.vuint32,
                                                  shape=(N, 2),
                                                  fillvalue=None)
                num_evs = self.datastore.create_dset('gmf_data/events_by_sid',
                                                     U32, (N, ))
                for sid in self.sitecol.complete.sids:
                    start = numpy.array(self.indices[sid, 0])
                    stop = numpy.array(self.indices[sid, 1])
                    dset[sid, 0] = start
                    dset[sid, 1] = stop
                    num_evs[sid] = (stop - start).sum()
                avg_events_by_sid = num_evs.value.sum() / N
                logging.info('Found ~%d GMVs per site', avg_events_by_sid)
                self.datastore.set_attrs('gmf_data',
                                         avg_events_by_sid=avg_events_by_sid,
                                         max_events_by_sid=num_evs.value.max())
        elif oq.ground_motion_fields:
            raise RuntimeError('No GMFs were generated, perhaps they were '
                               'all below the minimum_intensity threshold')
        return acc

    def save_gmf_bytes(self):
        """Save the attribute nbytes in the gmf_data datasets"""
        ds = self.datastore
        for sm_id in ds['gmf_data']:
            ds.set_nbytes('gmf_data/' + sm_id)
        ds.set_nbytes('gmf_data')

    def post_execute(self, result):
        oq = self.oqparam
        if not oq.ground_motion_fields:
            return
        N = len(self.sitecol.complete)
        L = len(oq.imtls.array)
        if result and oq.hazard_curves_from_gmfs:
            rlzs = self.rlzs_assoc.realizations
            # compute and save statistics; this is done in process and can
            # be very slow if there are thousands of realizations
            weights = [rlz.weight for rlz in rlzs]
            # NB: in the future we may want to save to individual hazard
            # curves if oq.individual_curves is set; for the moment we
            # save the statistical curves only
            hstats = oq.hazard_stats()
            pmaps = list(result.values())
            if len(hstats):
                logging.info('Computing statistical hazard curves')
                if len(weights) != len(pmaps):
                    # this should never happen, unless I break the
                    # logic tree reduction mechanism during refactoring
                    raise AssertionError('Expected %d pmaps, got %d' %
                                         (len(weights), len(pmaps)))
                for statname, stat in hstats:
                    pmap = compute_pmap_stats(pmaps, [stat], weights)
                    arr = numpy.zeros((N, L), F32)
                    for sid in pmap:
                        arr[sid] = pmap[sid].array[:, 0]
                    self.datastore['hcurves/' + statname] = arr
                    if oq.poes:
                        P = len(oq.poes)
                        I = len(oq.imtls)
                        self.datastore.create_dset('hmaps/' + statname, F32,
                                                   (N, P * I))
                        self.datastore.set_attrs('hmaps/' + statname,
                                                 nbytes=N * P * I * 4)
                        hmap = calc.make_hmap(pmap, oq.imtls, oq.poes)
                        ds = self.datastore['hmaps/' + statname]
                        for sid in hmap:
                            ds[sid] = hmap[sid].array[:, 0]

        if self.datastore.parent:
            self.datastore.parent.open('r')
        if 'gmf_data' in self.datastore:
            self.save_gmf_bytes()
        if oq.compare_with_classical:  # compute classical curves
            export_dir = os.path.join(oq.export_dir, 'cl')
            if not os.path.exists(export_dir):
                os.makedirs(export_dir)
            oq.export_dir = export_dir
            # one could also set oq.number_of_logic_tree_samples = 0
            self.cl = ClassicalCalculator(oq)
            # TODO: perhaps it is possible to avoid reprocessing the source
            # model, however usually this is quite fast and do not dominate
            # the computation
            self.cl.run(close=False)
            cl_mean_curves = get_mean_curves(self.cl.datastore)
            eb_mean_curves = get_mean_curves(self.datastore)
            rdiff, index = util.max_rel_diff_index(cl_mean_curves,
                                                   eb_mean_curves)
            logging.warn(
                'Relative difference with the classical '
                'mean curves: %d%% at site index %d', rdiff * 100, index)