Exemple #1
0
def run_preclassical(csm, oqparam, h5):
    """
    :param csm: a CompositeSourceModel with attribute .srcfilter
    :param oqparam: the parameters in job.ini file
    :param h5: a DataStore instance
    """
    logging.info('Sending %s', csm.sitecol)

    # do nothing for atomic sources except counting the ruptures
    for src in csm.get_sources(atomic=True):
        src.num_ruptures = src.count_ruptures()
        src.nsites = len(csm.sitecol)

    # run preclassical for non-atomic sources
    sources_by_grp = groupby(
        csm.get_sources(atomic=False),
        lambda src: (src.grp_id, msr_name(src)))
    param = dict(maximum_distance=oqparam.maximum_distance,
                 pointsource_distance=oqparam.pointsource_distance,
                 ps_grid_spacing=oqparam.ps_grid_spacing,
                 split_sources=oqparam.split_sources)
    srcfilter = SourceFilter(
        csm.sitecol.reduce(10000) if csm.sitecol else None,
        oqparam.maximum_distance)
    res = parallel.Starmap(
        preclassical,
        ((srcs, srcfilter, param) for srcs in sources_by_grp.values()),
        h5=h5, distribute=None if len(sources_by_grp) > 1 else 'no').reduce()

    if res and res['before'] != res['after']:
        logging.info('Reduced the number of sources from {:_d} -> {:_d}'.
                     format(res['before'], res['after']))

    if res and h5:
        csm.update_source_info(res['calc_times'], nsites=True)

    for grp_id, srcs in res.items():
        # srcs can be empty if the minimum_magnitude filter is on
        if srcs and not isinstance(grp_id, str):
            newsg = SourceGroup(srcs[0].tectonic_region_type)
            newsg.sources = srcs
            csm.src_groups[grp_id] = newsg

    # sanity check
    for sg in csm.src_groups:
        for src in sg:
            assert src.num_ruptures
            assert src.nsites

    # store ps_grid data, if any
    for key, sources in res.items():
        if isinstance(key, str) and key.startswith('ps_grid/'):
            arrays = []
            for ps in sources:
                if hasattr(ps, 'location'):
                    lonlats = [ps.location.x, ps.location.y]
                    for src in getattr(ps, 'pointsources', []):
                        lonlats.extend([src.location.x, src.location.y])
                    arrays.append(F32(lonlats))
            h5[key] = arrays
Exemple #2
0
def read_source_groups(fname):
    """
    :param fname: a path to a source model XML file
    :return: a list of SourceGroup objects containing source nodes
    """
    smodel = nrml.read(fname).sourceModel
    src_groups = []
    if smodel[0].tag.endswith('sourceGroup'):  # NRML 0.5 format
        for sg_node in smodel:
            sg = SourceGroup(sg_node['tectonicRegion'])
            sg.sources = sg_node.nodes
            src_groups.append(sg)
    else:  # NRML 0.4 format: smodel is a list of source nodes
        src_groups.extend(SourceGroup.collect(smodel))
    return src_groups
Exemple #3
0
def run_preclassical(csm, oqparam, h5):
    """
    :param csm: a CompositeSourceModel with attribute .srcfilter
    :param oqparam: the parameters in job.ini file
    :param h5: a DataStore instance
    """
    # do nothing for atomic sources except counting the ruptures
    for src in csm.get_sources(atomic=True):
        src.num_ruptures = src.count_ruptures()
        src.nsites = len(csm.sitecol) if csm.sitecol else 1

    # run preclassical for non-atomic sources
    sources_by_grp = groupby(csm.get_sources(atomic=False), lambda src:
                             (src.grp_id, msr_name(src)))
    param = dict(maximum_distance=oqparam.maximum_distance,
                 pointsource_distance=oqparam.pointsource_distance,
                 ps_grid_spacing=oqparam.ps_grid_spacing,
                 split_sources=oqparam.split_sources)
    srcfilter = SourceFilter(
        csm.sitecol.reduce(10000) if csm.sitecol else None,
        oqparam.maximum_distance)
    if csm.sitecol:
        logging.info('Sending %s', srcfilter.sitecol)
    if oqparam.ps_grid_spacing:
        # produce a preclassical task for each group
        allargs = ((srcs, srcfilter, param)
                   for srcs in sources_by_grp.values())
    else:
        # produce many preclassical task
        maxw = sum(len(srcs) for srcs in sources_by_grp.values()) / (
            oqparam.concurrent_tasks or 1)
        allargs = ((blk, srcfilter, param) for srcs in sources_by_grp.values()
                   for blk in block_splitter(srcs, maxw))
    res = parallel.Starmap(
        preclassical,
        allargs,
        h5=h5,
        distribute=None if len(sources_by_grp) > 1 else 'no').reduce()

    if res and res['before'] != res['after']:
        logging.info(
            'Reduced the number of sources from {:_d} -> {:_d}'.format(
                res['before'], res['after']))

    if res and h5:
        csm.update_source_info(res['calc_times'], nsites=True)

    acc = AccumDict(accum=0)
    code2cls = get_code2cls()
    for grp_id, srcs in res.items():
        # srcs can be empty if the minimum_magnitude filter is on
        if srcs and not isinstance(grp_id, str):
            newsg = SourceGroup(srcs[0].tectonic_region_type)
            newsg.sources = srcs
            csm.src_groups[grp_id] = newsg
            for src in srcs:
                acc[src.code] += int(src.num_ruptures)
    for val, key in sorted((val, key) for key, val in acc.items()):
        cls = code2cls[key].__name__
        logging.info('{} ruptures: {:_d}'.format(cls, val))

    # sanity check
    for sg in csm.src_groups:
        for src in sg:
            assert src.num_ruptures
            assert src.nsites

    # store ps_grid data, if any
    for key, sources in res.items():
        if isinstance(key, str) and key.startswith('ps_grid/'):
            arrays = []
            for ps in sources:
                if hasattr(ps, 'location'):
                    lonlats = [ps.location.x, ps.location.y]
                    for src in getattr(ps, 'pointsources', []):
                        lonlats.extend([src.location.x, src.location.y])
                    arrays.append(F32(lonlats))
            h5[key] = arrays
def run_preclassical(calc):
    """
    :param csm: a CompositeSourceModel
    :param oqparam: the parameters in job.ini file
    :param h5: a DataStore instance
    """
    csm = calc.csm
    calc.datastore['trt_smrs'] = csm.get_trt_smrs()
    calc.datastore['toms'] = numpy.array(
        [sg.tom_name for sg in csm.src_groups], hdf5.vstr)
    cmakers = read_cmakers(calc.datastore, csm.full_lt)
    h5 = calc.datastore.hdf5
    calc.sitecol = sites = csm.sitecol if csm.sitecol else None
    # do nothing for atomic sources except counting the ruptures
    atomic_sources = []
    normal_sources = []
    for sg in csm.src_groups:
        grp_id = sg.sources[0].grp_id
        if sg.atomic:
            cmakers[grp_id].set_weight(sg, sites)
            atomic_sources.extend(sg)
        else:
            normal_sources.extend(sg)
    # run preclassical for non-atomic sources
    sources_by_grp = groupby(normal_sources, lambda src:
                             (src.grp_id, msr_name(src)))
    if csm.sitecol:
        logging.info('Sending %s', sites)
    smap = parallel.Starmap(preclassical, h5=h5)
    for (grp_id, msr), srcs in sources_by_grp.items():
        pointsources, pointlike, others = [], [], []
        for src in srcs:
            if hasattr(src, 'location'):
                pointsources.append(src)
            elif hasattr(src, 'nodal_plane_distribution'):
                pointlike.append(src)
            else:
                others.append(src)
        if calc.oqparam.ps_grid_spacing:
            if pointsources or pointlike:
                smap.submit((pointsources + pointlike, sites, cmakers[grp_id]))
        else:
            smap.submit_split((pointsources, sites, cmakers[grp_id]), 10, 100)
            for src in pointlike:  # area, multipoint
                smap.submit(([src], sites, cmakers[grp_id]))
        smap.submit_split((others, sites, cmakers[grp_id]), 10, 100)
    normal = smap.reduce()
    if atomic_sources:  # case_35
        n = len(atomic_sources)
        atomic = AccumDict({'before': n, 'after': n})
        for grp_id, srcs in groupby(atomic_sources,
                                    lambda src: src.grp_id).items():
            atomic[grp_id] = srcs
    else:
        atomic = AccumDict()
    res = normal + atomic
    if res['before'] != res['after']:
        logging.info(
            'Reduced the number of point sources from {:_d} -> {:_d}'.format(
                res['before'], res['after']))
    acc = AccumDict(accum=0)
    code2cls = get_code2cls()
    for grp_id, srcs in res.items():
        # srcs can be empty if the minimum_magnitude filter is on
        if srcs and not isinstance(grp_id, str) and grp_id not in atomic:
            # check if OQ_SAMPLE_SOURCES is set
            ss = os.environ.get('OQ_SAMPLE_SOURCES')
            if ss:
                logging.info('Sampled sources for group #%d', grp_id)
                srcs = general.random_filter(srcs, float(ss)) or [srcs[0]]
            newsg = SourceGroup(srcs[0].tectonic_region_type)
            newsg.sources = srcs
            csm.src_groups[grp_id] = newsg
            for src in srcs:
                assert src.weight
                assert src.num_ruptures
                acc[src.code] += int(src.num_ruptures)
    for val, key in sorted((val, key) for key, val in acc.items()):
        cls = code2cls[key].__name__
        logging.info('{} ruptures: {:_d}'.format(cls, val))

    source_data = zero_times(csm.get_sources())
    calc.store_source_info(source_data)
    # store ps_grid data, if any
    for key, sources in res.items():
        if isinstance(key, str) and key.startswith('ps_grid/'):
            arrays = []
            for ps in sources:
                if hasattr(ps, 'location'):
                    lonlats = [ps.location.x, ps.location.y]
                    for src in getattr(ps, 'pointsources', []):
                        lonlats.extend([src.location.x, src.location.y])
                    arrays.append(F32(lonlats))
            h5[key] = arrays

    h5['full_lt'] = csm.full_lt
    return res
Exemple #5
0
    def execute(self):
        """
        Run in parallel `core_task(sources, sitecol, monitor)`, by
        parallelizing on the sources according to their weight and
        tectonic region type.
        """
        oq = self.oqparam
        if oq.hazard_calculation_id and not oq.compare_with_classical:
            with util.read(self.oqparam.hazard_calculation_id) as parent:
                self.full_lt = parent['full_lt']
            self.calc_stats()  # post-processing
            return {}

        assert oq.max_sites_per_tile > oq.max_sites_disagg, (
            oq.max_sites_per_tile, oq.max_sites_disagg)
        psd = self.set_psd()
        srcfilter = self.src_filter()
        performance.Monitor.save(self.datastore, 'srcfilter', srcfilter)
        srcs = self.csm.get_sources(atomic=False)
        if srcs:
            res = parallel.Starmap.apply(preclassical, (srcs, self.params),
                                         concurrent_tasks=oq.concurrent_tasks
                                         or 1,
                                         h5=self.datastore.hdf5).reduce()

            if oq.calculation_mode == 'preclassical':
                self.store_source_info(res['calc_times'], nsites=True)
                self.datastore['full_lt'] = self.csm.full_lt
                self.datastore.swmr_on()  # fixes HDF5 error in build_hazard
                return

            self.update_source_info(res['calc_times'], nsites=True)
            sources_by_grp = groupby(res['sources'],
                                     operator.attrgetter('grp_id'))
        else:
            for src in self.csm.get_sources(atomic=True):
                src.num_ruptures = src.count_ruptures()
                src.nsites = self.N
            sources_by_grp = {}
        self.csm.src_groups = [sg for sg in self.csm.src_groups if sg.atomic]
        if oq.ps_grid_spacing:
            smap = parallel.Starmap(
                grid_point_sources,
                h5=self.datastore.hdf5,
                distribute=None if len(sources_by_grp) > 1 else 'no')
            for grp_id, sources in sources_by_grp.items():
                smap.submit((sources, oq.ps_grid_spacing))
            dic = smap.reduce()
            before, after = 0, 0
            for grp_id, sources in sources_by_grp.items():
                before += len(sources)
                after += len(dic[grp_id])
                sg = SourceGroup(sources[0].tectonic_region_type)
                sg.sources = dic[grp_id]
                self.csm.src_groups.append(sg)
            logging.info('Reduced point sources %d->%d', before, after)
        else:
            for grp_id, sources in sources_by_grp.items():
                sg = SourceGroup(sources[0].tectonic_region_type)
                sg.sources = sources
                self.csm.src_groups.append(sg)
        smap = parallel.Starmap(classical, h5=self.datastore.hdf5)
        self.submit_tasks(smap)
        acc0 = self.acc0()  # create the rup/ datasets BEFORE swmr_on()
        self.datastore.swmr_on()
        smap.h5 = self.datastore.hdf5
        self.calc_times = AccumDict(accum=numpy.zeros(3, F32))
        try:
            acc = smap.reduce(self.agg_dicts, acc0)
            self.store_rlz_info(acc.eff_ruptures)
        finally:
            source_ids = self.store_source_info(self.calc_times)
            if self.by_task:
                logging.info('Storing by_task information')
                num_tasks = max(self.by_task) + 1,
                er = self.datastore.create_dset('by_task/eff_ruptures', U32,
                                                num_tasks)
                es = self.datastore.create_dset('by_task/eff_sites', U32,
                                                num_tasks)
                si = self.datastore.create_dset('by_task/srcids',
                                                hdf5.vstr,
                                                num_tasks,
                                                fillvalue=None)
                for task_no, rec in self.by_task.items():
                    effrups, effsites, srcids = rec
                    er[task_no] = effrups
                    es[task_no] = effsites
                    si[task_no] = ' '.join(source_ids[s] for s in srcids)
                self.by_task.clear()
        if self.calc_times:  # can be empty in case of errors
            self.numrups = sum(arr[0] for arr in self.calc_times.values())
            numsites = sum(arr[1] for arr in self.calc_times.values())
            logging.info('Effective number of ruptures: {:_d}/{:_d}'.format(
                int(self.numrups), self.totrups))
            logging.info('Effective number of sites per rupture: %d',
                         numsites / self.numrups)
        if psd:
            psdist = max(max(psd.ddic[trt].values()) for trt in psd.ddic)
            if psdist and self.maxradius >= psdist / 2:
                logging.warning(
                    'The pointsource_distance of %d km is too '
                    'small compared to a maxradius of %d km', psdist,
                    self.maxradius)
        self.calc_times.clear()  # save a bit of memory
        return acc