Ejemplo n.º 1
0
    def build_events_from_sources(self):
        """
        Prefilter the composite source model and store the source_info
        """
        oq = self.oqparam
        gsims_by_trt = self.csm.gsim_lt.values

        def weight_src(src):
            return src.num_ruptures

        logging.info('Building ruptures')
        smap = parallel.Starmap(
            self.build_ruptures.__func__, monitor=self.monitor())
        eff_ruptures = AccumDict(accum=0)  # grp_id => potential ruptures
        calc_times = AccumDict(accum=numpy.zeros(3, F32))
        ses_idx = 0
        for sm_id, sm in enumerate(self.csm.source_models):
            logging.info('Sending %s', sm)
            for sg in sm.src_groups:
                if not sg.sources:
                    continue
                par = self.param.copy()
                par['gsims'] = gsims_by_trt[sg.trt]
                for block in self.block_splitter(
                        sg.sources, weight_src, by_grp):
                    if 'ucerf' in oq.calculation_mode:
                        for i in range(oq.ses_per_logic_tree_path):
                            par['ses_seeds'] = [(ses_idx, oq.ses_seed + i + 1)]
                            smap.submit(block, self.src_filter, par)
                            ses_idx += 1
                    else:
                        smap.submit(block, self.src_filter, par)
        mon = self.monitor('saving ruptures')
        for dic in smap:
            if dic['calc_times']:
                calc_times += dic['calc_times']
            if dic['eff_ruptures']:
                eff_ruptures += dic['eff_ruptures']
            if dic['rup_array']:
                with mon:
                    self.rupser.save(dic['rup_array'])
        self.rupser.close()
        if not self.rupser.nruptures:
            raise RuntimeError('No ruptures were generated, perhaps the '
                               'investigation time is too short')

        # logic tree reduction, must be called before storing the events
        self.store_rlz_info(eff_ruptures)
        store_rlzs_by_grp(self.datastore)
        self.init_logic_tree(self.csm.info)
        with self.monitor('store source_info', autoflush=True):
            self.store_source_info(calc_times)
        logging.info('Reordering the ruptures and storing the events')
        attrs = self.datastore.getitem('ruptures').attrs
        sorted_ruptures = self.datastore.getitem('ruptures').value
        # order the ruptures by serial
        sorted_ruptures.sort(order='serial')
        ngroups = len(self.csm.info.trt_by_grp)
        grp_indices = numpy.zeros((ngroups, 2), U32)
        grp_ids = sorted_ruptures['grp_id']
        for grp_id, [startstop] in get_indices(grp_ids).items():
            grp_indices[grp_id] = startstop
        self.datastore['ruptures'] = sorted_ruptures
        self.datastore.set_attrs('ruptures', grp_indices=grp_indices, **attrs)
        self.save_events(sorted_ruptures)
Ejemplo n.º 2
0
    def full_disaggregation(self, curves):
        """
        Run the disaggregation phase.

        :param curves: a list of hazard curves, one per site

        The curves can be all None if iml_disagg is set in the job.ini
        """
        oq = self.oqparam
        tl = oq.truncation_level
        src_filter = SourceFilter(self.sitecol, oq.maximum_distance)
        csm = self.csm
        if not csm.get_sources():
            raise RuntimeError('All sources were filtered away!')

        R = len(self.rlzs_assoc.realizations)
        I = len(oq.imtls)
        P = len(oq.poes_disagg) or 1
        if R * I * P > 10:
            logging.warn(
                'You have %d realizations, %d IMTs and %d poes_disagg: the '
                'disaggregation will be heavy and memory consuming', R, I, P)
        iml4 = disagg.make_iml4(
            R, oq.iml_disagg, oq.imtls, oq.poes_disagg or (None,), curves)
        if oq.disagg_by_src:
            if R == 1:
                self.build_disagg_by_src(iml4)
            else:
                logging.warn('disagg_by_src works only with 1 realization, '
                             'you have %d', R)

        eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1)
        self.bin_edges = {}

        # build trt_edges
        trts = tuple(sorted(set(sg.trt for smodel in csm.source_models
                                for sg in smodel.src_groups)))
        trt_num = {trt: i for i, trt in enumerate(trts)}
        self.trts = trts

        # build mag_edges
        min_mag = min(sg.min_mag for smodel in csm.source_models
                      for sg in smodel.src_groups)
        max_mag = max(sg.max_mag for smodel in csm.source_models
                      for sg in smodel.src_groups)
        mag_edges = oq.mag_bin_width * numpy.arange(
            int(numpy.floor(min_mag / oq.mag_bin_width)),
            int(numpy.ceil(max_mag / oq.mag_bin_width) + 1))

        # build dist_edges
        maxdist = max(oq.maximum_distance(trt, max_mag) for trt in trts)
        dist_edges = oq.distance_bin_width * numpy.arange(
            0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1))

        # build eps_edges
        eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1)

        # build lon_edges, lat_edges per sid
        bbs = src_filter.get_bounding_boxes(mag=max_mag)
        lon_edges, lat_edges = {}, {}  # by sid
        for sid, bb in zip(self.sitecol.sids, bbs):
            lon_edges[sid], lat_edges[sid] = disagg.lon_lat_bins(
                bb, oq.coordinate_bin_width)
        self.bin_edges = mag_edges, dist_edges, lon_edges, lat_edges, eps_edges
        self.save_bin_edges()

        # build all_args
        all_args = []
        maxweight = csm.get_maxweight(weight, oq.concurrent_tasks)
        mon = self.monitor('disaggregation')
        R = iml4.shape[1]
        self.imldict = {}  # sid, rlzi, poe, imt -> iml
        for s in self.sitecol.sids:
            for r in range(R):
                for p, poe in enumerate(oq.poes_disagg or [None]):
                    for m, imt in enumerate(oq.imtls):
                        self.imldict[s, r, poe, imt] = iml4[s, r, m, p]

        for smodel in csm.source_models:
            sm_id = smodel.ordinal
            for trt, groups in groupby(
                    smodel.src_groups, operator.attrgetter('trt')).items():
                trti = trt_num[trt]
                sources = sum([grp.sources for grp in groups], [])
                rlzs_by_gsim = self.rlzs_assoc.get_rlzs_by_gsim(trt, sm_id)
                cmaker = ContextMaker(
                    rlzs_by_gsim, src_filter.integration_distance,
                    {'filter_distance': oq.filter_distance})
                for block in block_splitter(sources, maxweight, weight):
                    all_args.append(
                        (src_filter, block, cmaker, iml4, trti, self.bin_edges,
                         oq, mon))

        self.num_ruptures = [0] * len(self.trts)
        self.cache_info = numpy.zeros(3)  # operations, cache_hits, num_zeros
        results = parallel.Starmap(
            compute_disagg, all_args, self.monitor()
        ).reduce(self.agg_result, AccumDict(accum={}))

        # set eff_ruptures
        trti = csm.info.trt2i()
        for smodel in csm.info.source_models:
            for sg in smodel.src_groups:
                sg.eff_ruptures = self.num_ruptures[trti[sg.trt]]
        self.datastore['csm_info'] = csm.info

        ops, hits, num_zeros = self.cache_info
        logging.info('Cache speedup %s', ops / (ops - hits))
        logging.info('Discarded zero matrices: %d', num_zeros)
        return results
Ejemplo n.º 3
0
    def execute(self):
        """
        Run in parallel `core_task(sources, sitecol, monitor)`, by
        parallelizing on the sources according to their weight and
        tectonic region type.
        """
        oq = self.oqparam
        if oq.hazard_calculation_id and not oq.compare_with_classical:
            with util.read(self.oqparam.hazard_calculation_id) as parent:
                self.full_lt = parent['full_lt']
            self.calc_stats()  # post-processing
            return {}

        srcfilter = self.src_filter()
        srcs = self.csm.get_sources()
        if oq.is_ucerf():
            logging.info('Prefiltering UCERFSources')
            for src in srcs:
                if hasattr(src, 'start'):
                    src.src_filter = srcfilter  # hack for .iter_ruptures
                    src.all_ridx = src.get_ridx()
        calc_times = parallel.Starmap.apply(
            preclassical,
            (srcs, SourceFilter(self.sitecol, oq.maximum_distance)),
            concurrent_tasks=oq.concurrent_tasks or 1,
            num_cores=oq.num_cores,
            h5=self.datastore.hdf5).reduce()
        if oq.calculation_mode == 'preclassical':
            self.store_source_info(calc_times, nsites=True)
            self.datastore['full_lt'] = self.csm.full_lt
            self.datastore.swmr_on()  # fixes HDF5 error in build_hazard
            return

        self.update_source_info(calc_times, nsites=True)
        # if OQ_SAMPLE_SOURCES is set extract one source for group
        ss = os.environ.get('OQ_SAMPLE_SOURCES')
        if ss:
            logging.info('Reducing the number of sources')
            for sg in self.csm.src_groups:
                if not sg.atomic:
                    src = max(sg,
                              key=operator.attrgetter('nsites', 'source_id'))
                    sg.sources = [src]

        mags = self.datastore['source_mags']  # by TRT
        if len(mags) == 0:  # everything was discarded
            raise RuntimeError('All sources were discarded!?')
        gsims_by_trt = self.full_lt.get_gsims_by_trt()
        mags_by_trt = {}
        for trt in mags:
            mags_by_trt[trt] = mags[trt][()]
        psd = oq.pointsource_distance
        if psd is not None:
            psd.interp(mags_by_trt)
            for trt, dic in psd.ddic.items():
                # the sum is zero for {'default': [(1, 0), (10, 0)]}
                if sum(dic.values()):
                    it = list(dic.items())
                    md = '%s->%d ... %s->%d' % (it[0] + it[-1])
                    logging.info('ps_dist %s: %s', trt, md)
        imts_with_period = [
            imt for imt in oq.imtls if imt == 'PGA' or imt.startswith('SA')
        ]
        imts_ok = len(imts_with_period) == len(oq.imtls)
        if (imts_ok and psd and psd.suggested()) or (imts_ok
                                                     and oq.minimum_intensity):
            aw = get_effect(mags_by_trt, self.sitecol.one(), gsims_by_trt, oq)
            if psd:
                dic = {
                    trt: [(float(mag), int(dst))
                          for mag, dst in psd.ddic[trt].items()]
                    for trt in psd.ddic if trt != 'default'
                }
                logging.info('pointsource_distance=\n%s', pprint.pformat(dic))
            if len(vars(aw)) > 1:  # more than _extra
                self.datastore['effect_by_mag_dst'] = aw
        smap = parallel.Starmap(classical,
                                h5=self.datastore.hdf5,
                                num_cores=oq.num_cores)
        smap.monitor.save('srcfilter', srcfilter)
        self.submit_tasks(smap)
        acc0 = self.acc0()  # create the rup/ datasets BEFORE swmr_on()
        self.datastore.swmr_on()
        smap.h5 = self.datastore.hdf5
        self.calc_times = AccumDict(accum=numpy.zeros(3, F32))
        try:
            acc = smap.reduce(self.agg_dicts, acc0)
            self.store_rlz_info(acc.eff_ruptures)
        finally:
            with self.monitor('store source_info'):
                self.store_source_info(self.calc_times)
            if self.by_task:
                logging.info('Storing by_task information')
                num_tasks = max(self.by_task) + 1,
                er = self.datastore.create_dset('by_task/eff_ruptures', U32,
                                                num_tasks)
                es = self.datastore.create_dset('by_task/eff_sites', U32,
                                                num_tasks)
                si = self.datastore.create_dset('by_task/srcids',
                                                hdf5.vstr,
                                                num_tasks,
                                                fillvalue=None)
                for task_no, rec in self.by_task.items():
                    effrups, effsites, srcids = rec
                    er[task_no] = effrups
                    es[task_no] = effsites
                    si[task_no] = ' '.join(srcids)
                self.by_task.clear()
        self.numrups = sum(arr[0] for arr in self.calc_times.values())
        numsites = sum(arr[1] for arr in self.calc_times.values())
        logging.info('Effective number of ruptures: {:_d}/{:_d}'.format(
            int(self.numrups), self.totrups))
        logging.info('Effective number of sites per rupture: %d',
                     numsites / self.numrups)
        if psd:
            psdist = max(max(psd.ddic[trt].values()) for trt in psd.ddic)
            if psdist and self.maxradius >= psdist / 2:
                logging.warning(
                    'The pointsource_distance of %d km is too '
                    'small compared to a maxradius of %d km', psdist,
                    self.maxradius)
        self.calc_times.clear()  # save a bit of memory
        return acc
Ejemplo n.º 4
0
    def execute(self):
        oq = self.oqparam
        if oq.return_periods != [0]:
            # setting return_periods = 0 disable loss curves
            eff_time = oq.investigation_time * oq.ses_per_logic_tree_path
            if eff_time < 2:
                logging.warning(
                    'eff_time=%s is too small to compute loss curves',
                    eff_time)
                return
        if 'source_info' in self.datastore:  # missing for gmf_ebrisk
            logging.info('Building src_loss_table')
            source_ids, losses = get_src_loss_table(self.datastore, self.L)
            self.datastore['src_loss_table'] = losses
            self.datastore.set_shape_attrs('src_loss_table',
                                           source=source_ids,
                                           loss_type=oq.loss_names)
        shp = self.get_shape(self.L)  # (L, T...)
        text = ' x '.join('%d(%s)' % (n, t)
                          for t, n in zip(oq.aggregate_by, shp[1:]))
        logging.info('Producing %d(loss_types) x %s loss curves', self.L, text)
        builder = get_loss_builder(self.datastore)
        if oq.aggregate_by:
            self.build_datasets(builder, oq.aggregate_by, 'agg_')
        self.build_datasets(builder, [], 'app_')
        self.build_datasets(builder, [], 'tot_')
        parent = self.datastore.parent
        full_aggregate_by = (parent['oqparam'].aggregate_by if parent else
                             ()) or oq.aggregate_by
        if oq.aggregate_by:
            aggkeys = build_aggkeys(oq.aggregate_by, self.tagcol,
                                    full_aggregate_by)
            if parent and 'event_loss_table' in parent:
                ds = parent
            else:
                ds = self.datastore
                ds.swmr_on()
            smap = parallel.Starmap(post_ebrisk,
                                    [(ds, aggkey) for aggkey in aggkeys],
                                    h5=self.datastore.hdf5)
        else:
            smap = ()
        # do everything in process since it is really fast
        ds = self.datastore
        for res in smap:
            if not res:
                continue
            for r, dic in res.items():
                if oq.aggregate_by:
                    ds['agg_curves-rlzs'][(slice(None), r, slice(None)) +
                                          dic['idx']  # PRLT..
                                          ] = dic['agg_curves']
                    ds['agg_losses-rlzs'][(slice(None), r) +
                                          dic['idx']  # LRT...
                                          ] = dic['agg_losses']
                    ds['app_curves-rlzs'][:, r] += dic['agg_curves']  # PL

        lbe = ds['losses_by_event'][()]
        rlz_ids = ds['events']['rlz_id'][lbe['event_id']]
        dic = dict(enumerate(lbe['loss'].T))  # lti -> losses
        df = pandas.DataFrame(dic, rlz_ids)
        for r, losses_df in df.groupby(rlz_ids):
            losses = numpy.array(losses_df)
            curves = builder.build_curves(losses, r),
            ds['tot_curves-rlzs'][:, r] = curves  # PL
            ds['tot_losses-rlzs'][:, r] = losses.sum(axis=0) * oq.ses_ratio
        units = self.datastore['cost_calculator'].get_units(oq.loss_names)
        aggby = {
            tagname: encode(getattr(self.tagcol, tagname)[1:])
            for tagname in oq.aggregate_by
        }
        set_rlzs_stats(self.datastore,
                       'app_curves',
                       return_periods=builder.return_periods,
                       loss_types=oq.loss_names,
                       **aggby,
                       units=units)
        set_rlzs_stats(self.datastore,
                       'tot_curves',
                       return_periods=builder.return_periods,
                       loss_types=oq.loss_names,
                       **aggby,
                       units=units)
        set_rlzs_stats(self.datastore,
                       'tot_losses',
                       loss_types=oq.loss_names,
                       **aggby,
                       units=units)
        if oq.aggregate_by:
            set_rlzs_stats(self.datastore,
                           'agg_curves',
                           return_periods=builder.return_periods,
                           loss_types=oq.loss_names,
                           **aggby,
                           units=units)
            set_rlzs_stats(self.datastore,
                           'agg_losses',
                           loss_types=oq.loss_names,
                           **aggby,
                           units=units)
        return 1
Ejemplo n.º 5
0
 def test_countletters(self):
     data = [('hello', 'world'), ('ciao', 'mondo')]
     smap = parallel.Starmap(countletters, data)
     self.assertEqual(smap.reduce(), {'n': 19})
Ejemplo n.º 6
0
def get_csm(oq, full_lt, h5=None):
    """
    Build source models from the logic tree and to store
    them inside the `source_full_lt` dataset.
    """
    converter = sourceconverter.SourceConverter(
        oq.investigation_time, oq.rupture_mesh_spacing,
        oq.complex_fault_mesh_spacing, oq.width_of_mfd_bin,
        oq.area_source_discretization, oq.minimum_magnitude,
        oq.source_id, discard_trts=oq.discard_trts)
    classical = not oq.is_event_based()
    full_lt.ses_seed = oq.ses_seed
    if oq.is_ucerf():
        [grp] = nrml.to_python(oq.inputs["source_model"], converter)
        src_groups = []
        for grp_id, sm_rlz in enumerate(full_lt.sm_rlzs):
            sg = copy.copy(grp)
            src_groups.append(sg)
            src = sg[0].new(sm_rlz.ordinal, sm_rlz.value[0])  # one source
            src.checksum = src.grp_id = src.trt_smr = grp_id
            src.samples = sm_rlz.samples
            logging.info('Reading sections and rupture planes for %s', src)
            planes = src.get_planes()
            if classical:
                src.ruptures_per_block = oq.ruptures_per_block
                sg.sources = list(src)
                for s in sg:
                    s.planes = planes
                    s.sections = s.get_sections()
                # add background point sources
                sg = copy.copy(grp)
                src_groups.append(sg)
                sg.sources = src.get_background_sources()
            else:  # event_based, use one source
                sg.sources = [src]
                src.planes = planes
                src.sections = src.get_sections()
        return CompositeSourceModel(full_lt, src_groups)

    logging.info('Reading the source model(s) in parallel')

    # NB: the source models file are often NOT in the shared directory
    # (for instance in oq-engine/demos) so the processpool must be used
    dist = ('no' if os.environ.get('OQ_DISTRIBUTE') == 'no'
            else 'processpool')
    # NB: h5 is None in logictree_test.py
    allargs = []
    for fname in full_lt.source_model_lt.info.smpaths:
        allargs.append((fname, converter))
    smdict = parallel.Starmap(read_source_model, allargs, distribute=dist,
                              h5=h5 if h5 else None).reduce()
    if len(smdict) > 1:  # really parallel
        parallel.Starmap.shutdown()  # save memory
    fix_geometry_sections(smdict)
    groups = _build_groups(full_lt, smdict)

    # checking the changes
    changes = sum(sg.changes for sg in groups)
    if changes:
        logging.info('Applied {:_d} changes to the composite source model'.
                     format(changes))
    return _get_csm(full_lt, groups)
Ejemplo n.º 7
0
    def compute(self):
        """
        Submit disaggregation tasks and return the results
        """
        oq = self.oqparam
        dstore = (self.datastore.parent if self.datastore.parent
                  else self.datastore)
        magi = numpy.searchsorted(self.bin_edges[0], dstore['rup/mag'][:]) - 1
        magi[magi == -1] = 0  # when the magnitude is on the edge
        totrups = len(magi)
        logging.info('Reading {:_d} ruptures'.format(totrups))
        rdt = [('grp_id', U16), ('magi', U8), ('nsites', U16), ('idx', U32)]
        rdata = numpy.zeros(totrups, rdt)
        rdata['magi'] = magi
        rdata['idx'] = numpy.arange(totrups)
        rdata['grp_id'] = dstore['rup/grp_id'][:]
        rdata['nsites'] = dstore['rup/nsites'][:]
        totweight = rdata['nsites'].sum()
        et_ids = dstore['et_ids'][:]
        rlzs_by_gsim = self.full_lt.get_rlzs_by_gsim_list(et_ids)
        G = max(len(rbg) for rbg in rlzs_by_gsim)
        maxw = 2 * 1024**3 / (16 * G * self.M)  # at max 2 GB
        maxweight = min(
            numpy.ceil(totweight / (oq.concurrent_tasks or 1)), maxw)
        num_eff_rlzs = len(self.full_lt.sm_rlzs)
        task_inputs = []
        U = 0
        self.datastore.swmr_on()
        smap = parallel.Starmap(compute_disagg, h5=self.datastore.hdf5)
        # ABSURDLY IMPORTANT!! we rely on the fact that the classical part
        # of the calculation stores the ruptures in chunks of constant
        # grp_id, therefore it is possible to build (start, stop) slices;
        # we are NOT grouping by operator.itemgetter('grp_id', 'magi'):
        # that would break the ordering of the indices causing an incredibly
        # worse performance, but visible only in extra-large calculations!
        for block in block_splitter(rdata, maxweight,
                                    operator.itemgetter('nsites'),
                                    operator.itemgetter('grp_id')):
            grp_id = block[0]['grp_id']
            trti = et_ids[grp_id][0] // num_eff_rlzs
            trt = self.trts[trti]
            cmaker = ContextMaker(
                trt, rlzs_by_gsim[grp_id],
                {'truncation_level': oq.truncation_level,
                 'maximum_distance': oq.maximum_distance,
                 'collapse_level': oq.collapse_level,
                 'num_epsilon_bins': oq.num_epsilon_bins,
                 'investigation_time': oq.investigation_time,
                 'imtls': oq.imtls})
            U = max(U, block.weight)
            slc = slice(block[0]['idx'], block[-1]['idx'] + 1)
            smap.submit((dstore, slc, cmaker, self.hmap4, trti, magi[slc],
                         self.bin_edges))
            task_inputs.append((trti, slc.stop-slc.start))

        nbytes, msg = get_nbytes_msg(dict(M=self.M, G=G, U=U, F=2))
        logging.info('Maximum mean_std per task:\n%s', msg)

        s = self.shapedic
        sd = dict(N=s['N'], M=s['M'], P=s['P'], Z=s['Z'], D=s['dist'],
                  E=s['eps'], Lo=s['lon'], La=s['lat'])
        sd['tasks'] = numpy.ceil(len(task_inputs))
        nbytes, msg = get_nbytes_msg(sd)
        if nbytes > oq.max_data_transfer:
            raise ValueError(
                'Estimated data transfer too big\n%s > max_data_transfer=%s' %
                (msg, humansize(oq.max_data_transfer)))
        logging.info('Estimated data transfer:\n%s', msg)

        sd.pop('tasks')
        dt = numpy.dtype([('trti', U8), ('nrups', U32)])
        self.datastore['disagg_task'] = numpy.array(task_inputs, dt)
        results = smap.reduce(self.agg_result, AccumDict(accum={}))
        return results  # imti, sid -> trti, magi -> 6D array
Ejemplo n.º 8
0
    def execute(self):
        oq = self.oqparam
        if oq.return_periods != [0]:
            # setting return_periods = 0 disable loss curves
            eff_time = oq.investigation_time * oq.ses_per_logic_tree_path
            if eff_time < 2:
                logging.warning(
                    'eff_time=%s is too small to compute loss curves',
                    eff_time)
                return
        if 'source_info' in self.datastore:  # missing for gmf_ebrisk
            logging.info('Building src_loss_table')
            source_ids, losses = get_src_loss_table(self.datastore, self.L)
            self.datastore['src_loss_table'] = losses
            self.datastore.set_shape_attrs('src_loss_table',
                                           source=source_ids,
                                           loss_type=oq.loss_names)
        shp = self.get_shape(self.L)  # (L, T...)
        text = ' x '.join('%d(%s)' % (n, t)
                          for t, n in zip(oq.aggregate_by, shp[1:]))
        logging.info('Producing %d(loss_types) x %s loss curves', self.L, text)
        builder = get_loss_builder(self.datastore)
        if oq.aggregate_by:
            self.build_datasets(builder, oq.aggregate_by, 'agg_')
        self.build_datasets(builder, [], 'app_')
        self.build_datasets(builder, [], 'tot_')
        ds = (self.datastore.parent
              if oq.hazard_calculation_id else self.datastore)
        if oq.aggregate_by:
            aggkeys = sorted(ds['event_loss_table'])
            aggkeys = build_aggkeys(oq.aggregate_by, self.tagcol,
                                    ds['oqparam'].aggregate_by)
            if not oq.hazard_calculation_id:  # no parent
                ds.swmr_on()
            smap = parallel.Starmap(post_ebrisk,
                                    [(ds, aggkey) for aggkey in aggkeys],
                                    h5=self.datastore.hdf5)
        else:
            smap = ()
        # do everything in process since it is really fast
        ds = self.datastore
        for res in smap:
            if not res:
                continue
            for r, dic in res.items():
                if oq.aggregate_by:
                    ds['agg_curves-rlzs'][(slice(None), r, slice(None)) +
                                          dic['idx']  # PRLT..
                                          ] = dic['agg_curves']
                    ds['agg_losses-rlzs'][(slice(None), r) +
                                          dic['idx']  # LRT...
                                          ] = dic['agg_losses']
                    ds['app_curves-rlzs'][:, r] += dic['agg_curves']  # PL

        elt = ds.read_df('losses_by_event', ['event_id', 'rlzi'])
        for r, curves, losses in builder.gen_curves_by_rlz(elt, oq.ses_ratio):
            ds['tot_curves-rlzs'][:, r] = curves  # PL
            ds['tot_losses-rlzs'][:, r] = losses  # L
        units = self.datastore['cost_calculator'].get_units(oq.loss_names)
        aggby = {
            tagname: encode(getattr(self.tagcol, tagname)[1:])
            for tagname in oq.aggregate_by
        }
        set_rlzs_stats(self.datastore,
                       'app_curves',
                       return_periods=builder.return_periods,
                       loss_types=oq.loss_names,
                       **aggby,
                       units=units)
        set_rlzs_stats(self.datastore,
                       'tot_curves',
                       return_periods=builder.return_periods,
                       loss_types=oq.loss_names,
                       **aggby,
                       units=units)
        set_rlzs_stats(self.datastore,
                       'tot_losses',
                       loss_types=oq.loss_names,
                       **aggby,
                       units=units)
        if oq.aggregate_by:
            set_rlzs_stats(self.datastore,
                           'agg_curves',
                           return_periods=builder.return_periods,
                           loss_types=oq.loss_names,
                           **aggby,
                           units=units)
            set_rlzs_stats(self.datastore,
                           'agg_losses',
                           loss_types=oq.loss_names,
                           **aggby,
                           units=units)
        return 1
Ejemplo n.º 9
0
 def execute(self):
     """
     Compute the conditional spectrum
     """
     oq = self.oqparam
     self.full_lt = self.datastore['full_lt']
     self.trts = list(self.full_lt.gsim_lt.values)
     self.imts = list(oq.imtls)
     imti = self.imts.index(oq.imt_ref)
     self.M = M = len(self.imts)
     dstore = (self.datastore.parent
               if self.datastore.parent else self.datastore)
     totrups = len(dstore['rup/mag'])
     logging.info('Reading {:_d} ruptures'.format(totrups))
     rdt = [('grp_id', U16), ('nsites', U16), ('idx', U32)]
     rdata = numpy.zeros(totrups, rdt)
     rdata['idx'] = numpy.arange(totrups)
     rdata['grp_id'] = dstore['rup/grp_id'][:]
     rdata['nsites'] = [len(sids) for sids in dstore['rup/sids_']]
     totweight = rdata['nsites'].sum()
     trt_smrs = dstore['trt_smrs'][:]
     rlzs_by_gsim = self.full_lt.get_rlzs_by_gsim_list(trt_smrs)
     _G = sum(len(rbg) for rbg in rlzs_by_gsim)
     self.periods = [from_string(imt).period for imt in self.imts]
     if oq.imls_ref:
         self.imls = oq.imls_ref
     else:  # extract imls from the "mean" hazard map
         curve = self.datastore.sel('hcurves-stats', stat='mean')[0, 0,
                                                                  imti]
         [self.imls] = compute_hazard_maps(curve, oq.imtls[oq.imt_ref],
                                           oq.poes)  # there is 1 site
     self.P = P = len(self.imls)
     self.datastore.create_dset('cs-rlzs', float,
                                (self.R, M, self.N, 2, self.P))
     self.datastore.set_shape_descr('cs-rlzs',
                                    rlz_id=self.R,
                                    period=self.periods,
                                    sid=self.N,
                                    cs=2,
                                    poe_id=P)
     self.datastore.create_dset('cs-stats', float, (1, M, self.N, 2, P))
     self.datastore.set_shape_descr('cs-stats',
                                    stat='mean',
                                    period=self.periods,
                                    sid=self.N,
                                    cs=['spec', 'std'],
                                    poe_id=P)
     self.datastore.create_dset('_c', float, (_G, M, self.N, 2, P))
     self.datastore.create_dset('_s', float, (_G, self.N, P))
     G = max(len(rbg) for rbg in rlzs_by_gsim)
     maxw = 2 * 1024**3 / (16 * G * self.M)  # at max 2 GB
     maxweight = min(numpy.ceil(totweight / (oq.concurrent_tasks or 1)),
                     maxw)
     U = 0
     Ta = 0
     self.cmakers = read_cmakers(self.datastore)
     self.datastore.swmr_on()
     smap = parallel.Starmap(conditional_spectrum, h5=self.datastore.hdf5)
     # IMPORTANT!! we rely on the fact that the classical part
     # of the calculation stores the ruptures in chunks of constant
     # grp_id, therefore it is possible to build (start, stop) slices
     for block in general.block_splitter(rdata, maxweight,
                                         operator.itemgetter('nsites'),
                                         operator.itemgetter('grp_id')):
         Ta += 1
         grp_id = block[0]['grp_id']
         G = len(rlzs_by_gsim[grp_id])
         cmaker = self.cmakers[grp_id]
         U = max(U, block.weight)
         slc = slice(block[0]['idx'], block[-1]['idx'] + 1)
         smap.submit((dstore, slc, cmaker, imti, self.imls))
     return smap.reduce()
Ejemplo n.º 10
0
def get_csm(oq, full_lt, h5=None):
    """
    Build source models from the logic tree and to store
    them inside the `source_full_lt` dataset.
    """
    if oq.pointsource_distance is None:
        spinning_off = False
    else:
        spinning_off = sum(oq.pointsource_distance.max().values()) == 0
    if spinning_off:
        logging.info('Removing nodal plane and hypocenter distributions')
    converter = sourceconverter.SourceConverter(
        oq.investigation_time, oq.rupture_mesh_spacing,
        oq.complex_fault_mesh_spacing, oq.width_of_mfd_bin,
        oq.area_source_discretization, oq.minimum_magnitude,
        not spinning_off, oq.source_id, discard_trts=oq.discard_trts)
    logging.info('%d effective smlt realization(s)', len(full_lt.sm_rlzs))
    classical = not oq.is_event_based()

    if oq.is_ucerf():
        sample = .001 if os.environ.get('OQ_SAMPLE_SOURCES') else None
        [grp] = nrml.to_python(oq.inputs["source_model"], converter)
        src_groups = []
        for grp_id, sm_rlz in enumerate(full_lt.sm_rlzs):
            sg = copy.copy(grp)
            src_groups.append(sg)
            src = sg[0].new(sm_rlz.ordinal, sm_rlz.value)  # one source
            sg.mags = numpy.unique(numpy.round(src.mags))
            del src.__dict__['mags']  # remove cache
            src.checksum = src.grp_id = src.id = grp_id
            src.samples = sm_rlz.samples
            if classical:
                src.ruptures_per_block = oq.ruptures_per_block
                if sample:
                    sg.sources = [list(src)[0]]  # take the first source
                else:
                    sg.sources = list(src)
                # add background point sources
                sg.sources.extend(src.get_background_sources(sample))
            else:  # event_based, use one source
                sg.sources = [src]
        return CompositeSourceModel(full_lt, src_groups)

    logging.info('Reading the source model(s) in parallel')
    if 'OQ_SAMPLE_SOURCES' in os.environ and h5:
        srcfilter = calc.filters.SourceFilter(
            h5['sitecol'], h5['oqparam'].maximum_distance)
    else:
        srcfilter = None

    # NB: the source models file are often NOT in the shared directory
    # (for instance in oq-engine/demos) so the processpool must be used
    dist = ('no' if os.environ.get('OQ_DISTRIBUTE') == 'no'
            else 'processpool')
    # NB: h5 is None in logictree_test.py
    allargs = []
    for fname in full_lt.source_model_lt.info.smpaths:
        allargs.append((fname, converter, srcfilter))
    smdict = parallel.Starmap(read_source_model, allargs, distribute=dist,
                              h5=h5 if h5 else None).reduce()
    if len(smdict) > 1:  # really parallel
        parallel.Starmap.shutdown()  # save memory
    groups = _build_groups(full_lt, smdict)

    # checking the changes
    changes = sum(sg.changes for sg in groups)
    if changes:
        logging.info('Applied %d changes to the composite source model',
                     changes)

    #res = _get_csm(full_lt, groups)

    # #########################################################################
    # logging.info(f"Working for EarthQuake Ruptures Forecast")
    # srcs = [src for sg in res.src_groups for src in sg]
    # ruptures = [rup for src in srcs for rup in src.iter_ruptures()]
    #
    # rup_df = rup2df(ruptures[0])
    #
    # for i in tqdm.tqdm(range(1, len(ruptures))):
    #     rup = ruptures[i]
    #     try:
    #         df = rup2df(rup)
    #         rup_df = pd.concat([rup_df, df], axis=0, ignore_index=True)
    #         rup_df.to_csv("ruptures_from_source_tmp.csv", index=None)
    #         rup_df.to_csv("ruptures_from_source.csv", index=None)
    #     except Exception as e:
    #         logging.error(f"Error in iteration {i}: {e}")
    #         break
    #############################################################################################3

    return _get_csm(full_lt, groups)
Ejemplo n.º 11
0
    def full_disaggregation(self):
        """
        Run the disaggregation phase after hazard curve finalization.
        """
        oq = self.oqparam
        tl = self.oqparam.truncation_level
        bb_dict = self.datastore['bb_dict']
        sitecol = self.sitecol
        mag_bin_width = self.oqparam.mag_bin_width
        eps_edges = numpy.linspace(-tl, tl, self.oqparam.num_epsilon_bins + 1)
        logging.info('%d epsilon bins from %s to %s',
                     len(eps_edges) - 1, min(eps_edges), max(eps_edges))

        self.bin_edges = {}
        curves_dict = {sid: self.get_curves(sid) for sid in sitecol.sids}
        all_args = []
        num_trts = sum(len(sm.src_groups) for sm in self.csm.source_models)
        nblocks = math.ceil(oq.concurrent_tasks / num_trts)
        for smodel in self.csm.source_models:
            sm_id = smodel.ordinal
            trt_names = tuple(mod.trt for mod in smodel.src_groups)
            max_mag = max(mod.max_mag for mod in smodel.src_groups)
            min_mag = min(mod.min_mag for mod in smodel.src_groups)
            mag_edges = mag_bin_width * numpy.arange(
                int(numpy.floor(min_mag / mag_bin_width)),
                int(numpy.ceil(max_mag / mag_bin_width) + 1))
            logging.info('%d mag bins from %s to %s',
                         len(mag_edges) - 1, min_mag, max_mag)
            for src_group in smodel.src_groups:
                if src_group.id not in self.rlzs_assoc.gsims_by_grp_id:
                    continue  # the group has been filtered away
                for sid, site in zip(sitecol.sids, sitecol):
                    curves = curves_dict[sid]
                    if not curves:
                        continue  # skip zero-valued hazard curves
                    bb = bb_dict[sm_id, sid]
                    if not bb:
                        logging.info(
                            'location %s was too far, skipping disaggregation',
                            site.location)
                        continue

                    dist_edges, lon_edges, lat_edges = bb.bins_edges(
                        oq.distance_bin_width, oq.coordinate_bin_width)
                    logging.info('%d dist bins from %s to %s',
                                 len(dist_edges) - 1, min(dist_edges),
                                 max(dist_edges))
                    logging.info('%d lon bins from %s to %s',
                                 len(lon_edges) - 1, bb.west, bb.east)
                    logging.info('%d lat bins from %s to %s',
                                 len(lon_edges) - 1, bb.south, bb.north)

                    self.bin_edges[sm_id,
                                   sid] = (mag_edges, dist_edges, lon_edges,
                                           lat_edges, eps_edges)

                bin_edges = {}
                for sid, site in zip(sitecol.sids, sitecol):
                    if (sm_id, sid) in self.bin_edges:
                        bin_edges[sid] = self.bin_edges[sm_id, sid]

                src_filter = SourceFilter(sitecol, oq.maximum_distance)
                split_sources = []
                for src in src_group:
                    for split, _sites in src_filter(
                            sourceconverter.split_source(src), sitecol):
                        split_sources.append(split)
                mon = self.monitor('disaggregation')
                for srcs in split_in_blocks(split_sources, nblocks):
                    all_args.append(
                        (src_filter, srcs, src_group.id, self.rlzs_assoc,
                         trt_names, curves_dict, bin_edges, oq, mon))

        results = parallel.Starmap(compute_disagg,
                                   all_args).reduce(self.agg_result)
        self.save_disagg_results(results)
Ejemplo n.º 12
0
    def compute(self):
        """
        Submit disaggregation tasks and return the results
        """
        logging.info('Reading ruptures')
        oq = self.oqparam
        dstore = (self.datastore.parent
                  if self.datastore.parent else self.datastore)
        mags = set()
        for trt, dset in self.datastore['source_mags'].items():
            mags.update(dset[:])
        mags = sorted(mags)
        allargs = []
        totweight = sum(d['rctx']['nsites'].sum() for n, d in dstore.items()
                        if n.startswith('mag_') and len(d['rctx']))
        et_ids = dstore['et_ids'][:]
        rlzs_by_gsim = self.full_lt.get_rlzs_by_gsim_list(et_ids)
        G = max(len(rbg) for rbg in rlzs_by_gsim)
        maxw = 2 * 1024**3 / (16 * G * self.M)  # at max 2 GB
        maxweight = min(numpy.ceil(totweight / (oq.concurrent_tasks or 1)),
                        maxw)
        num_eff_rlzs = len(self.full_lt.sm_rlzs)
        task_inputs = []
        U = 0
        totrups = 0
        for mag in mags:
            rctx = dstore['mag_%s/rctx' % mag][:]
            totrups += len(rctx)
            for grp_id, gids in enumerate(et_ids):
                idxs, = numpy.where(rctx['grp_id'] == grp_id)
                if len(idxs) == 0:
                    continue
                trti = gids[0] // num_eff_rlzs
                trt = self.trts[trti]
                cmaker = ContextMaker(
                    trt, rlzs_by_gsim[grp_id], {
                        'truncation_level': oq.truncation_level,
                        'maximum_distance': oq.maximum_distance,
                        'collapse_level': oq.collapse_level,
                        'imtls': oq.imtls
                    })
                for blk in block_splitter(rctx[idxs], maxweight, nsites):
                    nr = len(blk)
                    U = max(U, blk.weight)
                    allargs.append((dstore, numpy.array(blk), cmaker,
                                    self.hmap4, trti, self.bin_edges, oq))
                    task_inputs.append((trti, mag, nr))
        logging.info('Found {:_d} ruptures'.format(totrups))
        nbytes, msg = get_array_nbytes(dict(M=self.M, G=G, U=U, F=2))
        logging.info('Maximum mean_std per task:\n%s', msg)

        s = self.shapedic
        sd = dict(N=s['N'],
                  M=s['M'],
                  P=s['P'],
                  Z=s['Z'],
                  D=s['dist'],
                  E=s['eps'],
                  Lo=s['lon'],
                  La=s['lat'])
        sd['tasks'] = numpy.ceil(len(allargs))
        nbytes, msg = get_array_nbytes(sd)
        if nbytes > oq.max_data_transfer:
            raise ValueError(
                'Estimated data transfer too big\n%s > max_data_transfer=%s' %
                (msg, humansize(oq.max_data_transfer)))
        logging.info('Estimated data transfer:\n%s', msg)

        sd.pop('tasks')
        sd['mags_trt'] = sum(
            len(mags) for mags in self.datastore['source_mags'].values())
        nbytes, msg = get_array_nbytes(sd)
        logging.info('Estimated memory on the master:\n%s', msg)

        dt = numpy.dtype([('trti', U8), ('mag', '|S4'), ('nrups', U32)])
        self.datastore['disagg_task'] = numpy.array(task_inputs, dt)
        self.datastore.swmr_on()
        smap = parallel.Starmap(compute_disagg,
                                allargs,
                                h5=self.datastore.hdf5)
        results = smap.reduce(self.agg_result, AccumDict(accum={}))
        return results  # imti, sid -> trti, magi -> 6D array
Ejemplo n.º 13
0
    def full_disaggregation(self):
        """
        Run the disaggregation phase after hazard curve finalization.
        """
        oq = self.oqparam
        tl = self.oqparam.truncation_level
        sitecol = self.sitecol
        eps_edges = numpy.linspace(-tl, tl, self.oqparam.num_epsilon_bins + 1)

        self.bin_edges = {}
        curves = [self.get_curves(sid) for sid in sitecol.sids]
        # determine the number of effective source groups
        sg_data = self.datastore['csm_info/sg_data']
        num_grps = sum(1 for effrup in sg_data['effrup'] if effrup > 0)
        nblocks = math.ceil(oq.concurrent_tasks / num_grps)
        src_filter = SourceFilter(sitecol, oq.maximum_distance)
        R = len(self.rlzs_assoc.realizations)
        max_poe = numpy.zeros(R, oq.imt_dt())

        # build trt_edges
        trts = tuple(
            sorted(
                set(sg.trt for smodel in self.csm.source_models
                    for sg in smodel.src_groups)))

        # build mag_edges
        min_mag = min(sg.min_mag for smodel in self.csm.source_models
                      for sg in smodel.src_groups)
        max_mag = max(sg.max_mag for smodel in self.csm.source_models
                      for sg in smodel.src_groups)
        mag_edges = oq.mag_bin_width * numpy.arange(
            int(numpy.floor(min_mag / oq.mag_bin_width)),
            int(numpy.ceil(max_mag / oq.mag_bin_width) + 1))

        # build dist_edges
        maxdist = max(oq.maximum_distance(trt, max_mag) for trt in trts)
        dist_edges = oq.distance_bin_width * numpy.arange(
            0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1))
        logging.info('dist = %s...%s', min(dist_edges), max(dist_edges))

        # build eps_edges
        eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1)

        # build lon_edges, lat_edges per sid
        bbs = src_filter.get_bounding_boxes(mag=max_mag)
        for sid, bb in zip(self.sitecol.sids, bbs):
            lon_edges, lat_edges = disagg.lon_lat_bins(bb,
                                                       oq.coordinate_bin_width)
            logging.info('site %d, lon = %s...%s', sid, min(lon_edges),
                         max(lon_edges))
            logging.info('site %d, lat = %s...%s', sid, min(lat_edges),
                         max(lat_edges))
            self.bin_edges[sid] = bs = (mag_edges, dist_edges, lon_edges,
                                        lat_edges, eps_edges)
            shape = [len(edges) - 1 for edges in bs] + [len(trts)]
            logging.info('%s for sid %d', shape, sid)

        # check poes
        for smodel in self.csm.source_models:
            sm_id = smodel.ordinal
            for i, site in enumerate(sitecol):
                sid = sitecol.sids[i]
                curve = curves[i]
                # populate max_poe array
                for rlzi, poes in curve.items():
                    for imt in oq.imtls:
                        max_poe[rlzi][imt] = max(max_poe[rlzi][imt],
                                                 poes[imt].max())
                if not curve:
                    continue  # skip zero-valued hazard curves

            # check for too big poes_disagg
            for poe in oq.poes_disagg:
                for rlz in self.rlzs_assoc.rlzs_by_smodel[sm_id]:
                    rlzi = rlz.ordinal
                    for imt in oq.imtls:
                        min_poe = max_poe[rlzi][imt]
                        if poe > min_poe:
                            raise ValueError(
                                self.POE_TOO_BIG %
                                (poe, sm_id, smodel.name, min_poe, rlzi, imt))

        # build all_args
        all_args = []
        for smodel in self.csm.source_models:
            for sg in smodel.src_groups:
                split_sources = []
                for src in sg:
                    for split, _sites in src_filter(
                            sourceconverter.split_source(src), sitecol):
                        split_sources.append(split)
                if not split_sources:
                    continue
                mon = self.monitor('disaggregation')
                rlzs_by_gsim = self.rlzs_assoc.get_rlzs_by_gsim(
                    sg.trt, smodel.ordinal)
                cmaker = ContextMaker(rlzs_by_gsim,
                                      src_filter.integration_distance)
                imls = [
                    disagg.make_imldict(rlzs_by_gsim, oq.imtls, oq.iml_disagg,
                                        oq.poes_disagg, curve)
                    for curve in curves
                ]
                for srcs in split_in_blocks(split_sources, nblocks):
                    all_args.append((src_filter, srcs, cmaker, imls, trts,
                                     self.bin_edges, oq, mon))

        self.cache_info = numpy.zeros(2)  # operations, cache_hits
        results = parallel.Starmap(compute_disagg,
                                   all_args).reduce(self.agg_result)
        ops, hits = self.cache_info
        logging.info('Cache speedup %s', ops / (ops - hits))
        self.save_disagg_results(results)
Ejemplo n.º 14
0
    def post_execute(self, times):
        """
        Compute and store average losses from the losses_by_event dataset,
        and then loss curves and maps.
        """
        if len(times):
            try:
                dset = self.datastore['task_info/start_ebrisk']
            except KeyError:
                # can happen for mysterious race conditions on some machines
                pass
            else:
                # store the time information plus the events_per_sid info
                dset.attrs['times'] = times
                dset.attrs['events_per_sid'] = numpy.mean(self.events_per_sid)

        oq = self.oqparam
        shp = self.get_shape(self.L)  # (L, T...)
        text = ' x '.join('%d(%s)' % (n, t)
                          for t, n in zip(oq.aggregate_by, shp[1:]))
        logging.info('Producing %d(loss_types) x %s loss curves', self.L, text)
        builder = get_loss_builder(self.datastore)
        self.build_datasets(builder)
        self.datastore.close()
        if 'losses_by_event' in self.datastore.parent:
            dstore = self.datastore.parent
        else:
            dstore = self.datastore
        allargs = [(dstore.filename, builder, rlzi) for rlzi in range(self.R)]
        h5 = hdf5.File(self.datastore.hdf5cache())
        acc = list(
            parallel.Starmap(compute_loss_curves_maps,
                             allargs,
                             hdf5path=h5.filename))
        # copy performance information from the cache to the datastore
        pd = h5['performance_data'][()]
        hdf5.extend3(self.datastore.filename, 'performance_data', pd)
        self.datastore.open('r+')  # reopen
        self.datastore['task_info/compute_loss_curves_and_maps'] = (
            h5['task_info/compute_loss_curves_maps'][()])
        self.datastore.open('r+')
        with self.monitor('saving loss_curves and maps', autoflush=True):
            for r, (curves, maps) in acc:
                if len(curves):  # some realization can give zero contribution
                    self.datastore['agg_curves-rlzs'][:, r] = curves
                if len(maps):  # conditional_loss_poes can be empty
                    self.datastore['agg_maps-rlzs'][:, r] = maps
        if self.R > 1:
            logging.info('Computing aggregate loss curves statistics')
            set_rlzs_stats(self.datastore, 'agg_curves')
            self.datastore.set_attrs('agg_curves-stats',
                                     return_periods=builder.return_periods,
                                     loss_types=' '.join(
                                         self.crmodel.loss_types))
            if oq.conditional_loss_poes:
                logging.info('Computing aggregate loss maps statistics')
                set_rlzs_stats(self.datastore, 'agg_maps')

        # sanity check with the asset_loss_table
        if oq.asset_loss_table and len(oq.aggregate_by) == 1:
            alt = self.datastore['asset_loss_table'][()]
            if alt.sum() == 0:  # nothing was saved
                return
            logging.info('Checking the loss curves')
            tags = getattr(self.assetcol.tagcol, oq.aggregate_by[0])[1:]
            T = len(tags)
            P = len(builder.return_periods)
            # sanity check on the loss curves for simple tag aggregation
            arr = self.assetcol.aggregate_by(oq.aggregate_by, alt)
            # shape (T, E, L)
            rlzs = self.datastore['events']['rlz']
            curves = numpy.zeros((P, self.R, self.L, T))
            for t in range(T):
                for r in range(self.R):
                    for l in range(self.L):
                        curves[:, r, l,
                               t] = losses_by_period(arr[t, rlzs == r, l],
                                                     builder.return_periods,
                                                     builder.num_events[r],
                                                     builder.eff_time)
            numpy.testing.assert_allclose(
                curves, self.datastore['agg_curves-rlzs'][()])
Ejemplo n.º 15
0
    def build_events_from_sources(self):
        """
        Prefilter the composite source model and store the source_info
        """
        oq = self.oqparam
        params = dict(imtls=oq.imtls,
                      ses_per_logic_tree_path=oq.ses_per_logic_tree_path,
                      ses_seed=oq.ses_seed)
        gsims_by_trt = self.csm.full_lt.get_gsims_by_trt()
        sources = self.csm.get_sources()
        # weighting the heavy sources
        nrups = parallel.Starmap(count_ruptures,
                                 [(src, )
                                  for src in sources if src.code in b'AMC'],
                                 progress=logging.debug).reduce()
        for src in sources:
            try:
                src.num_ruptures = nrups[src.source_id]
            except KeyError:
                src.num_ruptures = src.count_ruptures()
            src.weight = src.num_ruptures
        maxweight = sum(sg.weight for sg in self.csm.src_groups) / (
            self.oqparam.concurrent_tasks or 1)
        eff_ruptures = AccumDict(accum=0)  # grp_id => potential ruptures
        source_data = AccumDict(accum=[])
        allargs = []
        if self.oqparam.is_ucerf():
            # manage the filtering in a special way
            for sg in self.csm.src_groups:
                for src in sg:
                    src.src_filter = self.srcfilter
            srcfilter = nofilter  # otherwise it would be ultra-slow
        else:
            srcfilter = self.srcfilter
        logging.info('Building ruptures')
        for sg in self.csm.src_groups:
            if not sg.sources:
                continue
            logging.info('Sending %s', sg)
            cmaker = ContextMaker(sg.trt, gsims_by_trt[sg.trt], oq)
            for src_group in sg.split(maxweight):
                allargs.append((src_group, cmaker, srcfilter.sitecol))
        smap = parallel.Starmap(sample_ruptures,
                                allargs,
                                h5=self.datastore.hdf5)
        mon = self.monitor('saving ruptures')
        self.nruptures = 0  # estimated classical ruptures within maxdist
        for dic in smap:
            # NB: dic should be a dictionary, but when the calculation dies
            # for an OOM it can become None, thus giving a very confusing error
            if dic is None:
                raise MemoryError('You ran out of memory!')
            rup_array = dic['rup_array']
            if len(rup_array) == 0:
                continue
            if dic['source_data']:
                source_data += dic['source_data']
            if dic['eff_ruptures']:
                eff_ruptures += dic['eff_ruptures']
            with mon:
                n = len(rup_array)
                rup_array['id'] = numpy.arange(self.nruptures,
                                               self.nruptures + n)
                self.nruptures += n
                hdf5.extend(self.datastore['ruptures'], rup_array)
                hdf5.extend(self.datastore['rupgeoms'], rup_array.geom)
        if len(self.datastore['ruptures']) == 0:
            raise RuntimeError('No ruptures were generated, perhaps the '
                               'investigation time is too short')

        # don't change the order of the 3 things below!
        self.store_source_info(source_data)
        self.store_rlz_info(eff_ruptures)
        imp = calc.RuptureImporter(self.datastore)
        with self.monitor('saving ruptures and events'):
            imp.import_rups_events(
                self.datastore.getitem('ruptures')[()], get_rupture_getters)
Ejemplo n.º 16
0
    def full_disaggregation(self, curves):
        """
        Run the disaggregation phase.

        :param curves: a list of hazard curves, one per site

        The curves can be all None if iml_disagg is set in the job.ini
        """
        oq = self.oqparam
        tl = oq.truncation_level
        src_filter = SourceFilter(self.sitecol,
                                  oq.maximum_distance,
                                  use_rtree=False)
        csm = self.csm.filter(src_filter)  # fine filtering
        self.datastore['csm_info'] = csm.info
        eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1)
        self.bin_edges = {}

        # build trt_edges
        trts = tuple(
            sorted(
                set(sg.trt for smodel in csm.source_models
                    for sg in smodel.src_groups)))
        trt_num = {trt: i for i, trt in enumerate(trts)}
        self.trts = trts

        # build mag_edges
        min_mag = min(sg.min_mag for smodel in csm.source_models
                      for sg in smodel.src_groups)
        max_mag = max(sg.max_mag for smodel in csm.source_models
                      for sg in smodel.src_groups)
        mag_edges = oq.mag_bin_width * numpy.arange(
            int(numpy.floor(min_mag / oq.mag_bin_width)),
            int(numpy.ceil(max_mag / oq.mag_bin_width) + 1))

        # build dist_edges
        maxdist = max(oq.maximum_distance(trt, max_mag) for trt in trts)
        dist_edges = oq.distance_bin_width * numpy.arange(
            0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1))

        # build eps_edges
        eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1)

        # build lon_edges, lat_edges per sid
        bbs = src_filter.get_bounding_boxes(mag=max_mag)
        lon_edges, lat_edges = {}, {}  # by sid
        for sid, bb in zip(self.sitecol.sids, bbs):
            lon_edges[sid], lat_edges[sid] = disagg.lon_lat_bins(
                bb, oq.coordinate_bin_width)
        self.bin_edges = mag_edges, dist_edges, lon_edges, lat_edges, eps_edges
        self.save_bin_edges()

        # build all_args
        all_args = []
        maxweight = csm.get_maxweight(oq.concurrent_tasks)
        mon = self.monitor('disaggregation')
        R = len(self.rlzs_assoc.realizations)
        iml4 = disagg.make_iml4(R, oq.imtls, oq.iml_disagg, oq.poes_disagg
                                or (None, ), curves)
        self.imldict = {}  # sid, rlzi, poe, imt -> iml
        for s in self.sitecol.sids:
            for r in range(R):
                for p, poe in enumerate(oq.poes_disagg or [None]):
                    for m, imt in enumerate(oq.imtls):
                        self.imldict[s, r, poe, imt] = iml4[s, r, m, p]

        for smodel in csm.source_models:
            sm_id = smodel.ordinal
            for trt, groups in groupby(smodel.src_groups,
                                       operator.attrgetter('trt')).items():
                trti = trt_num[trt]
                sources = sum([grp.sources for grp in groups], [])
                rlzs_by_gsim = self.rlzs_assoc.get_rlzs_by_gsim(trt, sm_id)
                cmaker = ContextMaker(rlzs_by_gsim,
                                      src_filter.integration_distance)
                for block in csm.split_in_blocks(maxweight, sources):
                    all_args.append((src_filter, block, cmaker, iml4, trti,
                                     self.bin_edges, oq, mon))

        self.num_ruptures = [0] * len(self.trts)
        self.cache_info = numpy.zeros(3)  # operations, cache_hits, num_zeros
        results = parallel.Starmap(compute_disagg,
                                   all_args).reduce(self.agg_result,
                                                    AccumDict(accum={}))
        ops, hits, num_zeros = self.cache_info
        logging.info('Cache speedup %s', ops / (ops - hits))
        logging.info('Discarded zero matrices: %d', num_zeros)
        return results
Ejemplo n.º 17
0
    def build_events_from_sources(self):
        """
        Prefilter the composite source model and store the source_info
        """
        gsims_by_trt = self.csm.full_lt.get_gsims_by_trt()
        logging.info('Building ruptures')
        maxweight = sum(sg.weight for sg in self.csm.src_groups) / (
            self.oqparam.concurrent_tasks or 1)
        eff_ruptures = AccumDict(accum=0)  # trt => potential ruptures
        calc_times = AccumDict(accum=numpy.zeros(3, F32))  # nr, ns, dt
        allargs = []
        if self.oqparam.is_ucerf():
            # manage the filtering in a special way
            for sg in self.csm.src_groups:
                for src in sg:
                    src.src_filter = self.srcfilter
            srcfilter = nofilter  # otherwise it would be ultra-slow
        else:
            srcfilter = self.srcfilter
        for sg in self.csm.src_groups:
            if not sg.sources:
                continue
            logging.info('Sending %s', sg)
            par = self.param.copy()
            par['gsims'] = gsims_by_trt[sg.trt]
            for src_group in sg.split(maxweight):
                allargs.append((src_group, srcfilter, par))
        smap = parallel.Starmap(sample_ruptures,
                                allargs,
                                h5=self.datastore.hdf5)
        mon = self.monitor('saving ruptures')
        for dic in smap:
            if dic['calc_times']:
                calc_times += dic['calc_times']
            if dic['eff_ruptures']:
                eff_ruptures += dic['eff_ruptures']
            if dic['rup_array']:
                with mon:
                    self.rupser.save(dic['rup_array'])
        self.rupser.close()
        if not self.rupser.nruptures:
            if os.environ.get('OQ_SAMPLE_SOURCES'):
                raise SystemExit(0)  # success even with no ruptures
            raise RuntimeError('No ruptures were generated, perhaps the '
                               'investigation time is too short')

        # logic tree reduction, must be called before storing the events
        self.store_rlz_info(eff_ruptures)
        self.init_logic_tree(self.csm.full_lt)
        with self.monitor('store source_info'):
            self.store_source_info(calc_times)
        logging.info('Reordering the ruptures and storing the events')
        sorted_ruptures = self.datastore.getitem('ruptures')[()]
        # order the ruptures by rup_id
        sorted_ruptures.sort(order='serial')
        nr = len(sorted_ruptures)
        assert len(numpy.unique(sorted_ruptures['serial'])) == nr  # sanity
        self.datastore['ruptures'] = sorted_ruptures
        self.datastore['ruptures']['id'] = numpy.arange(nr)
        with self.monitor('saving events'):
            self.save_events(sorted_ruptures)
Ejemplo n.º 18
0
    def full_disaggregation(self):
        """
        Run the disaggregation phase.
        """
        oq = self.oqparam
        tl = oq.truncation_level
        src_filter = self.src_filter()
        if hasattr(self, 'csm'):
            for sg in self.csm.src_groups:
                if sg.atomic:
                    raise NotImplementedError(
                        'Atomic groups are not supported yet')

        self.full_lt = self.datastore['full_lt']
        self.poes_disagg = oq.poes_disagg or (None,)
        self.imts = list(oq.imtls)

        self.ws = [rlz.weight for rlz in self.full_lt.get_realizations()]
        self.pgetter = getters.PmapGetter(
            self.datastore, self.ws, self.sitecol.sids)

        # build array rlzs (N, Z)
        if oq.rlz_index is None:
            Z = oq.num_rlzs_disagg
            rlzs = numpy.zeros((self.N, Z), int)
            if self.R > 1:
                for sid in self.sitecol.sids:
                    curves = numpy.array(
                        [pc.array for pc in self.pgetter.get_pcurves(sid)])
                    mean = getters.build_stat_curve(
                        curves, oq.imtls, stats.mean_curve, self.ws)
                    rlzs[sid] = util.closest_to_ref(curves, mean.array)[:Z]
                self.datastore['best_rlzs'] = rlzs
        else:
            Z = len(oq.rlz_index)
            rlzs = numpy.zeros((self.N, Z), int)
            for z in range(Z):
                rlzs[:, z] = oq.rlz_index[z]
        assert Z <= self.R, (Z, self.R)
        self.Z = Z
        self.rlzs = rlzs

        if oq.iml_disagg:
            # no hazard curves are needed
            self.poe_id = {None: 0}
            curves = [[None for z in range(Z)] for s in range(self.N)]
            self.ok_sites = set(self.sitecol.sids)
        else:
            self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)}
            curves = [self.get_curve(sid, rlzs[sid])
                      for sid in self.sitecol.sids]
            self.ok_sites = set(self.check_poes_disagg(curves, rlzs))
        self.iml4 = _iml4(rlzs, oq.iml_disagg, oq.imtls,
                          self.poes_disagg, curves)
        if oq.disagg_by_src:
            self.build_disagg_by_src(rlzs)

        eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1)

        # build trt_edges
        trts = tuple(self.full_lt.trts)
        trt_num = {trt: i for i, trt in enumerate(trts)}
        self.trts = trts

        # build mag_edges
        mags = [float(mag) for mag in self.datastore['source_mags']]
        mag_edges = oq.mag_bin_width * numpy.arange(
            int(numpy.floor(min(mags) / oq.mag_bin_width)),
            int(numpy.ceil(max(mags) / oq.mag_bin_width) + 1))

        # build dist_edges
        maxdist = max(oq.maximum_distance(trt) for trt in trts)
        dist_edges = oq.distance_bin_width * numpy.arange(
            0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1))

        # build eps_edges
        eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1)

        # build lon_edges, lat_edges per sid
        bbs = src_filter.get_bounding_boxes(mag=max(mags))
        lon_edges, lat_edges = {}, {}  # by sid
        for sid, bb in zip(self.sitecol.sids, bbs):
            lon_edges[sid], lat_edges[sid] = disagg.lon_lat_bins(
                bb, oq.coordinate_bin_width)
        self.bin_edges = mag_edges, dist_edges, lon_edges, lat_edges, eps_edges
        shapedic = self.save_bin_edges()
        del shapedic['trt']
        shapedic['N'] = self.N
        shapedic['M'] = len(oq.imtls)
        shapedic['P'] = len(oq.poes_disagg)
        shapedic['Z'] = Z
        shapedic['concurrent_tasks'] = oq.concurrent_tasks
        nbytes, msg = get_array_nbytes(shapedic)
        if nbytes > oq.max_data_transfer:
            raise ValueError('Estimated data transfer too big\n%s' % msg)
        logging.info('Estimated data transfer: %s', msg)
        self.imldict = {}  # sid, rlz, poe, imt -> iml
        for s in self.sitecol.sids:
            for z, rlz in enumerate(rlzs[s]):
                for p, poe in enumerate(self.poes_disagg):
                    for m, imt in enumerate(oq.imtls):
                        self.imldict[s, rlz, poe, imt] = self.iml4[s, m, p, z]

        # submit #groups disaggregation tasks
        dstore = (self.datastore.parent if self.datastore.parent
                  else self.datastore)
        indices = get_indices(dstore, oq.concurrent_tasks or 1)
        self.datastore.swmr_on()
        smap = parallel.Starmap(compute_disagg, h5=self.datastore.hdf5)
        for grp_id, trt in self.full_lt.trt_by_grp.items():
            logging.info('Group #%d, sending rup_data for %s', grp_id, trt)
            trti = trt_num[trt]
            cmaker = ContextMaker(
                trt, self.full_lt.get_rlzs_by_gsim(grp_id),
                {'truncation_level': oq.truncation_level,
                 'maximum_distance': src_filter.integration_distance,
                 'filter_distance': oq.filter_distance, 'imtls': oq.imtls})
            for idxs in indices[grp_id]:
                smap.submit((dstore, idxs, cmaker, self.iml4, trti,
                             self.bin_edges))
        results = smap.reduce(self.agg_result, AccumDict(accum={}))
        return results  # sid -> trti-> 8D array
Ejemplo n.º 19
0
    def get_models(self):
        """
        :yields: :class:`openquake.commonlib.logictree.LtSourceModel` tuples
        """
        oq = self.oqparam
        spinning_off = self.oqparam.pointsource_distance == {'default': 0.0}
        if spinning_off:
            logging.info('Removing nodal plane and hypocenter distributions')
        dist = ('no'
                if os.environ.get('OQ_DISTRIBUTE') == 'no' else 'processpool')
        smlt_dir = os.path.dirname(self.source_model_lt.filename)
        converter = sourceconverter.SourceConverter(
            oq.investigation_time, oq.rupture_mesh_spacing,
            oq.complex_fault_mesh_spacing, oq.width_of_mfd_bin,
            oq.area_source_discretization, oq.minimum_magnitude,
            not spinning_off, oq.source_id)
        if oq.calculation_mode.startswith('ucerf'):
            [grp] = nrml.to_python(oq.inputs["source_model"], converter)
            dic = {'ucerf': grp}
        elif self.in_memory:
            logging.info('Reading the source model(s) in parallel')
            smap = parallel.Starmap(
                nrml.read_source_models,
                distribute=dist,
                hdf5path=self.hdf5.filename if self.hdf5 else None)
            for sm in self.source_model_lt.gen_source_models(self.gsim_lt):
                for name in sm.names.split():
                    fname = os.path.abspath(os.path.join(smlt_dir, name))
                    smap.submit([fname], converter)
            dic = {sm.fname: sm for sm in smap}
        else:
            dic = {}
        # consider only the effective realizations
        idx = 0
        if self.hdf5:
            sources = hdf5.create(self.hdf5, 'source_info', source_info_dt)
            hdf5.create(self.hdf5, 'source_geom', point3d)
            hdf5.create(self.hdf5, 'source_mfds', hdf5.vstr)
        grp_id = 0
        for sm in self.source_model_lt.gen_source_models(self.gsim_lt):
            if 'ucerf' in dic:
                sg = copy.copy(dic['ucerf'])
                sm.src_groups = [sg]
                sg.id = grp_id
                src = sg[0].new(sm.ordinal, sm.names)  # one source
                src.src_group_id = grp_id
                src.id = idx
                if oq.number_of_logic_tree_samples:
                    src.samples = sm.samples
                sg.sources = [src]
                idx += 1
                grp_id += 1
                data = [((sg.id, src.source_id, src.code, 0, 0, -1,
                          src.num_ruptures, 0, 0, 0, idx))]
                hdf5.extend(sources, numpy.array(data, source_info_dt))
            else:
                self.apply_uncertainties(sm, idx, dic)
            yield sm
        if self.mags and self.hdf5 and 'site_model' not in oq.inputs:
            mags_by_trt = {trt: sorted(ms) for trt, ms in self.mags.items()}
            idist = self.gsim_lt.get_integration_distance(mags_by_trt, oq)
            self.hdf5['integration_distance'] = idist
            self.hdf5.set_nbytes('integration_distance')
            hdf5.extend(self.hdf5['source_mfds'],
                        numpy.array(list(self.mfds), hdf5.vstr))

        # log if some source file is being used more than once
        dupl = 0
        for fname, hits in self.fname_hits.items():
            if hits > 1:
                logging.info('%s has been considered %d times', fname, hits)
                if not self.changes:
                    dupl += hits
        if self.changes:
            logging.info('Applied %d changes to the composite source model',
                         self.changes)
Ejemplo n.º 20
0
    def execute(self):
        """
        Run in parallel `core_task(sources, sitecol, monitor)`, by
        parallelizing on the sources according to their weight and
        tectonic region type.
        """
        oq = self.oqparam
        if oq.hazard_calculation_id and not oq.compare_with_classical:
            with util.read(self.oqparam.hazard_calculation_id) as parent:
                self.full_lt = parent['full_lt']
            self.calc_stats()  # post-processing
            return {}

        assert oq.max_sites_per_tile > oq.max_sites_disagg, (
            oq.max_sites_per_tile, oq.max_sites_disagg)
        psd = self.set_psd()  # must go before to set the pointsource_distance
        run_preclassical(self.csm, oq, self.datastore)

        # exit early if we want to perform only a preclassical
        if oq.calculation_mode == 'preclassical':
            recs = [tuple(row) for row in self.csm.source_info.values()]
            self.datastore['source_info'] = numpy.array(
                recs, readinput.source_info_dt)
            self.datastore['full_lt'] = self.csm.full_lt
            self.datastore.swmr_on()  # fixes HDF5 error in build_hazard
            return

        acc0 = self.acc0()  # create the rup/ datasets BEFORE swmr_on()
        smap = parallel.Starmap(classical,
                                self.get_args(acc0),
                                h5=self.datastore.hdf5)
        smap.monitor.save('srcfilter', self.src_filter())
        self.datastore.swmr_on()
        smap.h5 = self.datastore.hdf5
        self.calc_times = AccumDict(accum=numpy.zeros(3, F32))
        try:
            acc = smap.reduce(self.agg_dicts, acc0)
            self.store_rlz_info(acc.eff_ruptures)
        finally:
            source_ids = self.store_source_info(self.calc_times)
            if self.by_task:
                logging.info('Storing by_task information')
                num_tasks = max(self.by_task) + 1,
                er = self.datastore.create_dset('by_task/eff_ruptures', U32,
                                                num_tasks)
                es = self.datastore.create_dset('by_task/eff_sites', U32,
                                                num_tasks)
                si = self.datastore.create_dset('by_task/srcids',
                                                hdf5.vstr,
                                                num_tasks,
                                                fillvalue=None)
                for task_no, rec in self.by_task.items():
                    effrups, effsites, srcids = rec
                    er[task_no] = effrups
                    es[task_no] = effsites
                    si[task_no] = ' '.join(source_ids[s] for s in srcids)
                self.by_task.clear()
        if self.calc_times:  # can be empty in case of errors
            self.numctxs = sum(arr[0] for arr in self.calc_times.values())
            numsites = sum(arr[1] for arr in self.calc_times.values())
            logging.info('Total number of contexts: {:_d}'.format(
                int(self.numctxs)))
            logging.info('Average number of sites per context: %d',
                         numsites / self.numctxs)
        if psd:
            psdist = max(max(psd.ddic[trt].values()) for trt in psd.ddic)
            if psdist and self.maxradius >= psdist / 2:
                logging.warning(
                    'The pointsource_distance of %d km is too '
                    'small compared to a maxradius of %d km', psdist,
                    self.maxradius)
        self.calc_times.clear()  # save a bit of memory
        return acc
Ejemplo n.º 21
0
    def full_disaggregation(self):
        """
        Run the disaggregation phase.
        """
        oq = self.oqparam
        mags_by_trt = self.datastore['source_mags']
        all_edges, self.shapedic = disagg.get_edges_shapedic(
            oq, self.sitecol, mags_by_trt)
        *self.bin_edges, self.trts = all_edges
        if hasattr(self, 'csm'):
            for sg in self.csm.src_groups:
                if sg.atomic:
                    raise NotImplementedError(
                        'Atomic groups are not supported yet')
        elif self.datastore['source_info'].attrs['atomic']:
            raise NotImplementedError('Atomic groups are not supported yet')

        self.full_lt = self.datastore['full_lt']
        self.poes_disagg = oq.poes_disagg or (None, )
        self.imts = list(oq.imtls)
        self.M = len(self.imts)
        ws = [rlz.weight for rlz in self.full_lt.get_realizations()]
        self.pgetter = getters.PmapGetter(self.datastore, ws,
                                          self.sitecol.sids)

        # build array rlzs (N, Z)
        if oq.rlz_index is None:
            Z = oq.num_rlzs_disagg or 1
            rlzs = numpy.zeros((self.N, Z), int)
            if self.R > 1:
                for sid in self.sitecol.sids:
                    curves = numpy.array(
                        [pc.array for pc in self.pgetter.get_pcurves(sid)])
                    mean = getters.build_stat_curve(curves, oq.imtls,
                                                    stats.mean_curve, ws)
                    rlzs[sid] = util.closest_to_ref(curves, mean.array)[:Z]
            self.datastore['best_rlzs'] = rlzs
        else:
            Z = len(oq.rlz_index)
            rlzs = numpy.zeros((self.N, Z), int)
            for z in range(Z):
                rlzs[:, z] = oq.rlz_index[z]
            self.datastore['best_rlzs'] = rlzs
        assert Z <= self.R, (Z, self.R)
        self.Z = Z
        self.rlzs = rlzs

        if oq.iml_disagg:
            # no hazard curves are needed
            self.poe_id = {None: 0}
            curves = [[None for z in range(Z)] for s in range(self.N)]
            self.ok_sites = set(self.sitecol.sids)
        else:
            self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)}
            curves = [
                self.get_curve(sid, rlzs[sid]) for sid in self.sitecol.sids
            ]
            self.ok_sites = set(self.check_poes_disagg(curves, rlzs))
        self.iml4 = _iml4(rlzs, oq.iml_disagg, oq.imtls, self.poes_disagg,
                          curves)
        self.datastore['iml4'] = self.iml4
        self.datastore['poe4'] = numpy.zeros_like(self.iml4.array)

        self.save_bin_edges()
        tot = get_outputs_size(self.shapedic, oq.disagg_outputs)
        logging.info('Total output size: %s', humansize(sum(tot.values())))
        self.imldic = {}  # sid, rlz, poe, imt -> iml
        for s in self.sitecol.sids:
            iml3 = self.iml4[s]
            for z, rlz in enumerate(rlzs[s]):
                for p, poe in enumerate(self.poes_disagg):
                    for m, imt in enumerate(oq.imtls):
                        self.imldic[s, rlz, poe, imt] = iml3[m, p, z]

        # submit disaggregation tasks
        dstore = (self.datastore.parent
                  if self.datastore.parent else self.datastore)
        mag_edges = self.bin_edges[0]
        indices = get_indices_by_gidx_mag(dstore, mag_edges)
        allargs = []
        totweight = sum(sum(ri.weight for ri in indices[gm]) for gm in indices)
        maxweight = int(numpy.ceil(totweight / (oq.concurrent_tasks or 1)))
        grp_ids = dstore['grp_ids'][:]
        rlzs_by_gsim = self.full_lt.get_rlzs_by_gsim_list(grp_ids)
        num_eff_rlzs = len(self.full_lt.sm_rlzs)
        task_inputs = []
        G, U = 0, 0
        for gidx, magi in indices:
            trti = grp_ids[gidx][0] // num_eff_rlzs
            trt = self.trts[trti]
            cmaker = ContextMaker(
                trt, rlzs_by_gsim[gidx], {
                    'truncation_level': oq.truncation_level,
                    'maximum_distance': oq.maximum_distance,
                    'collapse_level': oq.collapse_level,
                    'imtls': oq.imtls
                })
            G = max(G, len(cmaker.gsims))
            for rupidxs in block_splitter(indices[gidx, magi], maxweight,
                                          weight):
                idxs = numpy.array([ri.index for ri in rupidxs])
                U = max(U, len(idxs))
                allargs.append((dstore, idxs, cmaker, self.iml4, trti, magi,
                                self.bin_edges[1:], oq))
                task_inputs.append((trti, magi, len(idxs)))

        nbytes, msg = get_array_nbytes(dict(N=self.N, M=self.M, G=G, U=U))
        logging.info('Maximum mean_std per task:\n%s', msg)
        sd = self.shapedic.copy()
        sd.pop('trt')
        sd.pop('mag')
        sd['tasks'] = numpy.ceil(len(allargs))
        nbytes, msg = get_array_nbytes(sd)
        if nbytes > oq.max_data_transfer:
            raise ValueError(
                'Estimated data transfer too big\n%s > max_data_transfer=%s' %
                (msg, humansize(oq.max_data_transfer)))
        logging.info('Estimated data transfer:\n%s', msg)
        dt = numpy.dtype([('trti', U8), ('magi', U8), ('nrups', U32)])
        self.datastore['disagg_task'] = numpy.array(task_inputs, dt)
        self.datastore.swmr_on()
        smap = parallel.Starmap(compute_disagg,
                                allargs,
                                h5=self.datastore.hdf5)
        results = smap.reduce(self.agg_result, AccumDict(accum={}))
        return results  # imti, sid -> trti, magi -> 6D array
Ejemplo n.º 22
0
def run_preclassical(csm, oqparam, h5):
    """
    :param csm: a CompositeSourceModel with attribute .srcfilter
    :param oqparam: the parameters in job.ini file
    :param h5: a DataStore instance
    """
    logging.info('Sending %s', csm.sitecol)

    # do nothing for atomic sources except counting the ruptures
    for src in csm.get_sources(atomic=True):
        src.num_ruptures = src.count_ruptures()
        src.nsites = len(csm.sitecol)

    # run preclassical for non-atomic sources
    sources_by_grp = groupby(csm.get_sources(atomic=False), lambda src:
                             (src.grp_id, msr_name(src)))
    param = dict(maximum_distance=oqparam.maximum_distance,
                 pointsource_distance=oqparam.pointsource_distance,
                 ps_grid_spacing=oqparam.ps_grid_spacing,
                 split_sources=oqparam.split_sources)
    srcfilter = SourceFilter(
        csm.sitecol.reduce(10000) if csm.sitecol else None,
        oqparam.maximum_distance)
    res = parallel.Starmap(
        preclassical,
        ((srcs, srcfilter, param) for srcs in sources_by_grp.values()),
        h5=h5,
        distribute=None if len(sources_by_grp) > 1 else 'no').reduce()

    if res and res['before'] != res['after']:
        logging.info(
            'Reduced the number of sources from {:_d} -> {:_d}'.format(
                res['before'], res['after']))

    if res and h5:
        csm.update_source_info(res['calc_times'], nsites=True)

    for grp_id, srcs in res.items():
        # srcs can be empty if the minimum_magnitude filter is on
        if srcs and not isinstance(grp_id, str):
            newsg = SourceGroup(srcs[0].tectonic_region_type)
            newsg.sources = srcs
            csm.src_groups[grp_id] = newsg

    # sanity check
    for sg in csm.src_groups:
        for src in sg:
            assert src.num_ruptures
            assert src.nsites

    # store ps_grid data, if any
    for key, sources in res.items():
        if isinstance(key, str) and key.startswith('ps_grid/'):
            arrays = []
            for ps in sources:
                if hasattr(ps, 'location'):
                    lonlats = [ps.location.x, ps.location.y]
                    for src in getattr(ps, 'pointsources', []):
                        lonlats.extend([src.location.x, src.location.y])
                    arrays.append(F32(lonlats))
            h5[key] = arrays
Ejemplo n.º 23
0
    def execute(self):
        oq = self.oqparam
        self.set_param()
        self.offset = 0
        srcfilter = self.src_filter(self.datastore.tempname)
        self.indices = AccumDict(accum=[])  # sid, idx -> indices
        if oq.hazard_calculation_id:  # from ruptures
            self.datastore.parent = util.read(oq.hazard_calculation_id)
            self.init_logic_tree(self.datastore.parent['full_lt'])
        else:  # from sources
            self.build_events_from_sources(srcfilter)
            if (oq.ground_motion_fields is False
                    and oq.hazard_curves_from_gmfs is False):
                return {}
        if not oq.imtls:
            raise InvalidFile('There are no intensity measure types in %s' %
                              oq.inputs['job_ini'])
        N = len(self.sitecol.complete)
        if oq.ground_motion_fields:
            nrups = len(self.datastore['ruptures'])
            self.datastore.create_dset('gmf_data/data', oq.gmf_data_dt())
            self.datastore.create_dset('gmf_data/sigma_epsilon',
                                       sig_eps_dt(oq.imtls))
            self.datastore.create_dset('gmf_data/indices',
                                       hdf5.vuint32,
                                       shape=(N, 2),
                                       fillvalue=None)
            self.datastore.create_dset('gmf_data/events_by_sid', U32, (N, ))
            self.datastore.create_dset('gmf_data/time_by_rup',
                                       time_dt, (nrups, ),
                                       fillvalue=None)
        if oq.hazard_curves_from_gmfs:
            self.param['rlz_by_event'] = self.datastore['events']['rlz_id']

        # compute_gmfs in parallel
        self.datastore.swmr_on()
        logging.info('Reading %d ruptures', len(self.datastore['ruptures']))
        iterargs = (
            (rgetter, srcfilter, self.param)
            for rgetter in gen_rupture_getters(self.datastore, srcfilter))
        acc = parallel.Starmap(self.core_task.__func__,
                               iterargs,
                               h5=self.datastore.hdf5,
                               num_cores=oq.num_cores).reduce(
                                   self.agg_dicts, self.acc0())

        if self.indices:
            dset = self.datastore['gmf_data/indices']
            num_evs = self.datastore['gmf_data/events_by_sid']
            logging.info('Saving gmf_data/indices')
            with self.monitor('saving gmf_data/indices', measuremem=True):
                self.datastore['gmf_data/imts'] = ' '.join(oq.imtls)
                for sid in self.sitecol.complete.sids:
                    start = numpy.array(self.indices[sid, 0])
                    stop = numpy.array(self.indices[sid, 1])
                    dset[sid, 0] = start
                    dset[sid, 1] = stop
                    num_evs[sid] = (stop - start).sum()
            avg_events_by_sid = num_evs[()].sum() / N
            logging.info('Found ~%d GMVs per site', avg_events_by_sid)
        elif oq.ground_motion_fields:
            raise RuntimeError('No GMFs were generated, perhaps they were '
                               'all below the minimum_intensity threshold')
        return acc
Ejemplo n.º 24
0
    def execute(self):
        """
        Run in parallel `core_task(sources, sitecol, monitor)`, by
        parallelizing on the sources according to their weight and
        tectonic region type.
        """
        oq = self.oqparam
        if oq.hazard_calculation_id and not oq.compare_with_classical:
            with util.read(self.oqparam.hazard_calculation_id) as parent:
                self.csm_info = parent['csm_info']
            self.calc_stats()  # post-processing
            return {}

        mags = self.datastore['source_mags'][()]
        if len(mags) == 0:  # everything was discarded
            raise RuntimeError('All sources were discarded!?')
        gsims_by_trt = self.csm_info.get_gsims_by_trt()
        dist_bins = {
            trt: oq.maximum_distance.get_dist_bins(trt)
            for trt in gsims_by_trt
        }
        if oq.pointsource_distance:
            logging.info('Computing effect of the ruptures')
            mon = self.monitor('rupture effect')
            effect = parallel.Starmap.apply(
                get_effect_by_mag,
                (mags, self.sitecol.one(), gsims_by_trt, oq.maximum_distance,
                 oq.imtls, mon)).reduce()
            self.datastore['effect'] = effect
            self.datastore.set_attrs('effect', **dist_bins)
            self.effect = {
                trt: Effect({mag: effect[mag][:, t]
                             for mag in effect}, dist_bins[trt],
                            getdefault(oq.pointsource_distance, trt))
                for t, trt in enumerate(gsims_by_trt)
            }
            for trt, eff in self.effect.items():
                oq.maximum_distance.magdist[trt] = eff.dist_by_mag()
                oq.pointsource_distance[trt] = eff.dist_by_mag(
                    eff.collapse_value)
        else:
            self.effect = {}
        smap = parallel.Starmap(self.core_task.__func__,
                                h5=self.datastore.hdf5,
                                num_cores=oq.num_cores)
        smap.task_queue = list(self.gen_task_queue())  # really fast
        acc0 = self.acc0()  # create the rup/ datasets BEFORE swmr_on()
        self.datastore.swmr_on()
        smap.h5 = self.datastore.hdf5
        self.calc_times = AccumDict(accum=numpy.zeros(3, F32))
        try:
            acc = smap.get_results().reduce(self.agg_dicts, acc0)
            self.store_rlz_info(acc.eff_ruptures)
        finally:
            with self.monitor('store source_info'):
                self.store_source_info(self.calc_times)
            if self.by_task:
                logging.info('Storing by_task information')
                num_tasks = max(self.by_task) + 1,
                er = self.datastore.create_dset('by_task/eff_ruptures', U32,
                                                num_tasks)
                es = self.datastore.create_dset('by_task/eff_sites', U32,
                                                num_tasks)
                si = self.datastore.create_dset('by_task/srcids',
                                                hdf5.vuint32,
                                                num_tasks,
                                                fillvalue=None)
                for task_no, rec in self.by_task.items():
                    effrups, effsites, srcids = rec
                    er[task_no] = effrups
                    es[task_no] = effsites
                    si[task_no] = srcids
                self.by_task.clear()
        numrups = sum(arr[0] for arr in self.calc_times.values())
        numsites = sum(arr[1] for arr in self.calc_times.values())
        logging.info('Effective number of ruptures: %d/%d', numrups,
                     self.totrups)
        logging.info('Effective number of sites per rupture: %d',
                     numsites / numrups)
        self.calc_times.clear()  # save a bit of memory
        return acc
Ejemplo n.º 25
0
    def build_events_from_sources(self, srcfilter):
        """
        Prefilter the composite source model and store the source_info
        """
        oq = self.oqparam
        gsims_by_trt = self.csm.info.get_gsims_by_trt()
        logging.info('Building ruptures')
        eff_ruptures = AccumDict(accum=0)  # grp_id => potential ruptures
        calc_times = AccumDict(accum=numpy.zeros(3, F32))  # nr, ns, dt
        ses_idx = 0
        allargs = []
        for sm_id, sm in enumerate(self.csm.source_models):
            logging.info('Sending %s', sm)
            for sg in sm.src_groups:
                if not sg.sources:
                    continue
                par = self.param.copy()
                par['gsims'] = gsims_by_trt[sg.trt]
                if sg.atomic:  # do not split the group
                    allargs.append((sg, srcfilter, par))
                else:  # traditional groups
                    for block in self.block_splitter(sg.sources, key=by_grp):
                        if 'ucerf' in oq.calculation_mode:
                            for i in range(oq.ses_per_logic_tree_path):
                                par = par.copy()  # avoid mutating the dict
                                par['ses_seeds'] = [
                                    (ses_idx, oq.ses_seed + i + 1)]
                                allargs.append((block, srcfilter, par))
                                ses_idx += 1
                        else:
                            allargs.append((block, srcfilter, par))
        smap = parallel.Starmap(
            self.build_ruptures.__func__, allargs, h5=self.datastore.hdf5)
        mon = self.monitor('saving ruptures')
        for dic in smap:
            if dic['calc_times']:
                calc_times += dic['calc_times']
            if dic['eff_ruptures']:
                eff_ruptures += dic['eff_ruptures']
            if dic['rup_array']:
                with mon:
                    self.rupser.save(dic['rup_array'])
        self.rupser.close()
        if not self.rupser.nruptures:
            raise RuntimeError('No ruptures were generated, perhaps the '
                               'investigation time is too short')

        # logic tree reduction, must be called before storing the events
        self.store_rlz_info(eff_ruptures)
        self.init_logic_tree(self.csm.info)
        with self.monitor('store source_info'):
            self.store_source_info(calc_times)
        logging.info('Reordering the ruptures and storing the events')
        attrs = self.datastore.getitem('ruptures').attrs
        sorted_ruptures = self.datastore.getitem('ruptures')[()]
        # order the ruptures by rup_id
        sorted_ruptures.sort(order='serial')
        ngroups = len(self.csm.info.trt_by_grp)
        grp_indices = numpy.zeros((ngroups, 2), U32)
        grp_ids = sorted_ruptures['grp_id']
        for grp_id, [startstop] in get_indices(grp_ids).items():
            grp_indices[grp_id] = startstop
        self.datastore['ruptures'] = sorted_ruptures
        self.datastore['ruptures']['id'] = numpy.arange(len(sorted_ruptures))
        self.datastore.set_attrs('ruptures', grp_indices=grp_indices, **attrs)
        with self.monitor('saving events'):
            self.save_events(sorted_ruptures)
Ejemplo n.º 26
0
    def execute(self):
        """
        Run in parallel `core_task(sources, sitecol, monitor)`, by
        parallelizing on the sources according to their weight and
        tectonic region type.
        """
        oq = self.oqparam
        if oq.hazard_calculation_id and not oq.compare_with_classical:
            with util.read(self.oqparam.hazard_calculation_id) as parent:
                self.full_lt = parent['full_lt']
            self.calc_stats()  # post-processing
            return {}

        mags = self.datastore['source_mags']  # by TRT
        if len(mags) == 0:  # everything was discarded
            raise RuntimeError('All sources were discarded!?')
        gsims_by_trt = self.full_lt.get_gsims_by_trt()
        if oq.pointsource_distance is not None:
            for trt in gsims_by_trt:
                oq.pointsource_distance[trt] = getdefault(
                    oq.pointsource_distance, trt)
        mags_by_trt = {}
        for trt in mags:
            mags_by_trt[trt] = mags[trt][()]
        imts_with_period = [
            imt for imt in oq.imtls if imt == 'PGA' or imt.startswith('SA')
        ]
        imts_ok = len(imts_with_period) == len(oq.imtls)
        if (imts_ok and oq.pointsource_distance
                and oq.pointsource_distance.suggested()) or (
                    imts_ok and oq.minimum_intensity):
            aw, self.psd = get_effect(mags_by_trt, self.sitecol.one(),
                                      gsims_by_trt, oq)
            if len(vars(aw)) > 1:  # more than _extra
                self.datastore['effect_by_mag_dst'] = aw
        elif oq.pointsource_distance:
            self.psd = oq.pointsource_distance.interp(mags_by_trt)
        else:
            self.psd = {}
        smap = parallel.Starmap(classical,
                                h5=self.datastore.hdf5,
                                num_cores=oq.num_cores)
        self.submit_tasks(smap)
        acc0 = self.acc0()  # create the rup/ datasets BEFORE swmr_on()
        self.datastore.swmr_on()
        smap.h5 = self.datastore.hdf5
        self.calc_times = AccumDict(accum=numpy.zeros(3, F32))
        try:
            acc = smap.reduce(self.agg_dicts, acc0)
            self.store_rlz_info(acc.eff_ruptures)
        finally:
            with self.monitor('store source_info'):
                self.store_source_info(self.calc_times)
            if self.by_task:
                logging.info('Storing by_task information')
                num_tasks = max(self.by_task) + 1,
                er = self.datastore.create_dset('by_task/eff_ruptures', U32,
                                                num_tasks)
                es = self.datastore.create_dset('by_task/eff_sites', U32,
                                                num_tasks)
                si = self.datastore.create_dset('by_task/srcids',
                                                hdf5.vstr,
                                                num_tasks,
                                                fillvalue=None)
                for task_no, rec in self.by_task.items():
                    effrups, effsites, srcids = rec
                    er[task_no] = effrups
                    es[task_no] = effsites
                    si[task_no] = ' '.join(srcids)
                self.by_task.clear()
        self.numrups = sum(arr[0] for arr in self.calc_times.values())
        numsites = sum(arr[1] for arr in self.calc_times.values())
        logging.info('Effective number of ruptures: {:_d}/{:_d}'.format(
            int(self.numrups), self.totrups))
        logging.info('Effective number of sites per rupture: %d',
                     numsites / self.numrups)
        if self.psd:
            psdist = max(max(self.psd[trt].values()) for trt in self.psd)
            if psdist != -1 and self.maxradius >= psdist / 2:
                logging.warning(
                    'The pointsource_distance of %d km is too '
                    'small compared to a maxradius of %d km', psdist,
                    self.maxradius)
        self.calc_times.clear()  # save a bit of memory
        return acc
Ejemplo n.º 27
0
    def execute(self):
        """
        Run in parallel `core_task(sources, sitecol, monitor)`, by
        parallelizing on the sources according to their weight and
        tectonic region type.
        """
        oq = self.oqparam
        if oq.hazard_calculation_id and not oq.compare_with_classical:
            with util.read(self.oqparam.hazard_calculation_id) as parent:
                self.csm_info = parent['csm_info']
            self.calc_stats()  # post-processing
            return {}

        mags = self.datastore['source_mags'][()]
        gsims_by_trt = self.csm_info.get_gsims_by_trt()
        dist_bins = {
            trt: oq.maximum_distance.get_dist_bins(trt)
            for trt in gsims_by_trt
        }
        if oq.minimum_intensity and len(self.sitecol) == 1 and len(mags):
            logging.info('Computing effect of the ruptures')
            mon = self.monitor('rupture effect')
            effect = parallel.Starmap.apply(
                get_effect, (mags, self.sitecol, gsims_by_trt,
                             oq.maximum_distance, oq.imtls, mon)).reduce()
            self.datastore['effect'] = effect
            self.datastore.set_attrs('effect', **dist_bins)
            threshold = getdefault(oq.minimum_intensity, list(oq.imtls)[-1])
            self.effect = {
                trt: Effect({mag: effect[mag][:, t]
                             for mag in effect},
                            dists=dist_bins[trt],
                            threshold=threshold)
                for t, trt in enumerate(gsims_by_trt)
            }
        else:
            self.effect = {}
        if oq.calculation_mode == 'preclassical' and self.N == 1:
            mags = sorted(set('%.3f' % mag for mag in mags))
            smap = parallel.Starmap(ruptures_by_mag_dist)
            for func, args in self.gen_task_queue():
                smap.submit(args)
            counts = smap.reduce()
            ndists = oq.maximum_distance.get_dist_bins.__defaults__[0]
            for mag, mag in enumerate(mags):
                arr = numpy.zeros((ndists, len(gsims_by_trt)), U32)
                for trti, trt in enumerate(gsims_by_trt):
                    try:
                        arr[:, trti] = counts[trt][mag]
                    except KeyError:
                        pass
                self.datastore['rups_by_mag_dist/' + mag] = arr
            self.datastore.set_attrs('rups_by_mag_dist', **dist_bins)
            self.datastore['csm_info'] = self.csm_info
            return {}
        smap = parallel.Starmap(self.core_task.__func__)
        smap.task_queue = list(self.gen_task_queue())  # really fast
        acc0 = self.acc0()  # create the rup/ datasets BEFORE swmr_on()
        self.datastore.swmr_on()
        smap.h5 = self.datastore.hdf5
        self.calc_times = AccumDict(accum=numpy.zeros(3, F32))
        self.maxdists = []
        try:
            acc = smap.get_results().reduce(self.agg_dicts, acc0)
            self.store_rlz_info(acc.eff_ruptures)
        finally:
            if self.maxdists:
                maxdist = numpy.mean(self.maxdists)
                logging.info(
                    'Using effective maximum distance for '
                    'point sources %d km', maxdist)
            with self.monitor('store source_info'):
                self.store_source_info(self.calc_times)
            if self.sources_by_task:
                num_tasks = max(self.sources_by_task) + 1
                sbt = numpy.zeros(num_tasks, [('eff_ruptures', U32),
                                              ('eff_sites', U32),
                                              ('srcids', hdf5.vuint32)])
                for task_no in range(num_tasks):
                    sbt[task_no] = self.sources_by_task.get(
                        task_no, (0, 0, U32([])))
                self.datastore['sources_by_task'] = sbt
                self.sources_by_task.clear()
        numrups = sum(arr[0] for arr in self.calc_times.values())
        if self.totrups != numrups:
            logging.info('Considered %d/%d ruptures', numrups, self.totrups)
        self.calc_times.clear()  # save a bit of memory
        return acc
Ejemplo n.º 28
0
 def calc_stats(self):
     oq = self.oqparam
     hstats = oq.hazard_stats()
     # initialize datasets
     imls = oq.imtls.array
     N = len(self.sitecol.complete)
     P = len(oq.poes)
     M = self.M = len(oq.imtls)
     imts = list(oq.imtls)
     if oq.soil_intensities is not None:
         L = M * len(oq.soil_intensities)
     else:
         L = len(imls)
     L1 = self.L1 = L // M
     R = len(self.realizations)
     S = len(hstats)
     if R > 1 and oq.individual_curves or not hstats:
         self.datastore.create_dset('hcurves-rlzs', F32, (N, R, M, L1))
         self.datastore.set_shape_attrs('hcurves-rlzs',
                                        site_id=N,
                                        rlz_id=R,
                                        imt=imts,
                                        lvl=L1)
         if oq.poes:
             self.datastore.create_dset('hmaps-rlzs', F32, (N, R, M, P))
             self.datastore.set_shape_attrs('hmaps-rlzs',
                                            site_id=N,
                                            rlz_id=R,
                                            imt=list(oq.imtls),
                                            poe=oq.poes)
     if hstats:
         self.datastore.create_dset('hcurves-stats', F32, (N, S, M, L1))
         self.datastore.set_shape_attrs('hcurves-stats',
                                        site_id=N,
                                        stat=list(hstats),
                                        imt=imts,
                                        lvl=numpy.arange(L1))
         if oq.poes:
             self.datastore.create_dset('hmaps-stats', F32, (N, S, M, P))
             self.datastore.set_shape_attrs('hmaps-stats',
                                            site_id=N,
                                            stat=list(hstats),
                                            imt=list(oq.imtls),
                                            poe=oq.poes)
     ct = oq.concurrent_tasks or 1
     logging.info('Building hazard statistics')
     self.weights = [rlz.weight for rlz in self.realizations]
     allargs = [  # this list is very fast to generate
         (getters.PmapGetter(self.datastore, self.weights, t.sids,
                             oq.poes), N, hstats, oq.individual_curves,
          oq.max_sites_disagg, self.amplifier)
         for t in self.sitecol.split_in_tiles(ct)
     ]
     if self.few_sites:
         dist = 'no'
     else:
         dist = None  # parallelize as usual
         self.datastore.swmr_on()
     parallel.Starmap(build_hazard,
                      allargs,
                      distribute=dist,
                      h5=self.datastore.hdf5).reduce(self.save_hazard)
Ejemplo n.º 29
0
 def calc_stats(self):
     oq = self.oqparam
     hstats = oq.hazard_stats()
     # initialize datasets
     imls = oq.imtls.array
     N = len(self.sitecol.complete)
     P = len(oq.poes)
     M = self.M = len(oq.imtls)
     imts = list(oq.imtls)
     if oq.soil_intensities is not None:
         L = M * len(oq.soil_intensities)
     else:
         L = len(imls)
     L1 = self.L1 = L // M
     R = len(self.realizations)
     S = len(hstats)
     if R > 1 and oq.individual_curves or not hstats:
         self.datastore.create_dset('hcurves-rlzs', F32, (N, R, M, L1))
         self.datastore.set_shape_attrs('hcurves-rlzs',
                                        site_id=N,
                                        rlz_id=R,
                                        imt=imts,
                                        lvl=L1)
         if oq.poes:
             self.datastore.create_dset('hmaps-rlzs', F32, (N, R, M, P))
             self.datastore.set_shape_attrs('hmaps-rlzs',
                                            site_id=N,
                                            rlz_id=R,
                                            imt=list(oq.imtls),
                                            poe=oq.poes)
     if hstats:
         self.datastore.create_dset('hcurves-stats', F32, (N, S, M, L1))
         self.datastore.set_shape_attrs('hcurves-stats',
                                        site_id=N,
                                        stat=list(hstats),
                                        imt=imts,
                                        lvl=numpy.arange(L1))
         if oq.poes:
             self.datastore.create_dset('hmaps-stats', F32, (N, S, M, P))
             self.datastore.set_shape_attrs('hmaps-stats',
                                            site_id=N,
                                            stat=list(hstats),
                                            imt=list(oq.imtls),
                                            poe=oq.poes)
     ct = oq.concurrent_tasks or 1
     logging.info('Building hazard statistics')
     self.weights = [rlz.weight for rlz in self.realizations]
     dstore = (self.datastore.parent
               if oq.hazard_calculation_id else self.datastore)
     allargs = [  # this list is very fast to generate
         (getters.PmapGetter(dstore, self.weights, t.sids, oq.imtls,
                             oq.poes), N, hstats, oq.individual_curves,
          oq.max_sites_disagg, self.amplifier)
         for t in self.sitecol.split_in_tiles(ct)
     ]
     if self.few_sites:
         dist = 'no'
     else:
         dist = None  # parallelize as usual
     parallel.Starmap(build_hazard,
                      allargs,
                      distribute=dist,
                      h5=self.datastore.hdf5).reduce(self.save_hazard)
     if 'hmaps-stats' in self.datastore:
         hmaps = self.datastore.sel('hmaps-stats', stat='mean')  # NSMP
         maxhaz = hmaps.max(axis=(0, 1, 3))
         mh = dict(zip(self.oqparam.imtls, maxhaz))
         logging.info('The maximum hazard map values are %s', mh)
         if Image is None or not self.from_engine:  # missing PIL
             return
         M, P = hmaps.shape[2:]
         logging.info('Saving %dx%d mean hazard maps', M, P)
         inv_time = oq.investigation_time
         allargs = []
         for m, imt in enumerate(self.oqparam.imtls):
             for p, poe in enumerate(self.oqparam.poes):
                 dic = dict(m=m,
                            p=p,
                            imt=imt,
                            poe=poe,
                            inv_time=inv_time,
                            calc_id=self.datastore.calc_id,
                            array=hmaps[:, 0, m, p])
                 allargs.append((dic, self.sitecol.lons, self.sitecol.lats))
         smap = parallel.Starmap(make_hmap_png, allargs)
         for dic in smap:
             self.datastore['png/hmap_%(m)d_%(p)d' % dic] = dic['img']
Ejemplo n.º 30
0
    def build_events_from_sources(self):
        """
        Prefilter the composite source model and store the source_info
        """
        gsims_by_trt = self.csm.full_lt.get_gsims_by_trt()
        sources = self.csm.get_sources()
        # weighting the heavy sources
        nrups = parallel.Starmap(count_ruptures,
                                 [(src, )
                                  for src in sources if src.code in b'AMC'],
                                 h5=self.datastore.hdf5).reduce()
        for src in sources:
            src.nsites = 1  # avoid 0 weight
            try:
                src.num_ruptures = nrups[src.source_id]
            except KeyError:
                src.num_ruptures = src.count_ruptures()
        maxweight = sum(sg.weight for sg in self.csm.src_groups) / (
            self.oqparam.concurrent_tasks or 1)
        eff_ruptures = AccumDict(accum=0)  # trt => potential ruptures
        calc_times = AccumDict(accum=numpy.zeros(3, F32))  # nr, ns, dt
        allargs = []
        if self.oqparam.is_ucerf():
            # manage the filtering in a special way
            for sg in self.csm.src_groups:
                for src in sg:
                    src.src_filter = self.srcfilter
            srcfilter = nofilter  # otherwise it would be ultra-slow
        else:
            srcfilter = self.srcfilter
        logging.info('Building ruptures')
        for sg in self.csm.src_groups:
            if not sg.sources:
                continue
            logging.info('Sending %s', sg)
            par = self.param.copy()
            par['gsims'] = gsims_by_trt[sg.trt]
            for src_group in sg.split(maxweight):
                allargs.append((src_group, srcfilter, par))
        smap = parallel.Starmap(sample_ruptures,
                                allargs,
                                h5=self.datastore.hdf5)
        mon = self.monitor('saving ruptures')
        self.nruptures = 0
        for dic in smap:
            # NB: dic should be a dictionary, but when the calculation dies
            # for an OOM it can become None, thus giving a very confusing error
            if dic is None:
                raise MemoryError('You ran out of memory!')
            rup_array = dic['rup_array']
            if len(rup_array) == 0:
                continue
            if dic['calc_times']:
                calc_times += dic['calc_times']
            if dic['eff_ruptures']:
                eff_ruptures += dic['eff_ruptures']
            with mon:
                n = len(rup_array)
                rup_array['id'] = numpy.arange(self.nruptures,
                                               self.nruptures + n)
                self.nruptures += n
                hdf5.extend(self.datastore['ruptures'], rup_array)
                hdf5.extend(self.datastore['rupgeoms'], rup_array.geom)
        if len(self.datastore['ruptures']) == 0:
            raise RuntimeError('No ruptures were generated, perhaps the '
                               'investigation time is too short')

        # must be called before storing the events
        self.store_rlz_info(eff_ruptures)  # store full_lt
        self.store_source_info(calc_times)
        imp = calc.RuptureImporter(self.datastore)
        with self.monitor('saving ruptures and events'):
            imp.import_rups(self.datastore.getitem('ruptures')[()])