Esempio n. 1
0
def classical_split_filter(srcs, srcfilter, gsims, params, monitor):
    """
    Split the given sources, filter the subsources and the compute the
    PoEs. Yield back subtasks if the split sources contain more than
    maxweight ruptures.
    """
    # first check if we are sampling the sources
    ss = int(os.environ.get('OQ_SAMPLE_SOURCES', 0))
    if ss:
        splits, stime = split_sources(srcs)
        srcs = random_filtered_sources(splits, srcfilter, ss)
        yield classical(srcs, srcfilter, gsims, params, monitor)
        return
    # NB: splitting all the sources improves the distribution significantly,
    # compared to splitting only the big source
    sources = []
    with monitor("filtering/splitting sources"):
        for src, _sites in srcfilter(srcs):
            splits, _stime = split_sources([src])
            sources.extend(srcfilter.filter(splits))
    if sources:
        sources.sort(key=weight)
        totsites = len(srcfilter.sitecol)
        mw = 1000 if totsites <= params['max_sites_disagg'] else 50000
        mweight = max(mw, sum(src.weight for src in sources) /
                      params['task_multiplier'])
        blocks = list(block_splitter(sources, mweight, weight))
        for block in blocks[:-1]:
            yield classical, block, srcfilter, gsims, params
        yield classical(blocks[-1], srcfilter, gsims, params, monitor)
Esempio n. 2
0
 def execute(self):
     """
     Run in parallel `core_task(sources, sitecol, monitor)`, by
     parallelizing on the sources according to their weight and
     tectonic region type.
     """
     monitor = self.monitor(self.core_task.__name__)
     oq = self.oqparam
     acc = self.acc0()
     self.nsites = []  # used in agg_dicts
     self.maxdists = []
     param = dict(imtls=oq.imtls, truncation_level=oq.truncation_level,
                  filter_distance=oq.filter_distance,
                  max_weight=oq.max_weight,
                  max_sites_disagg=oq.max_sites_disagg)
     self.calc_times = general.AccumDict(accum=np.zeros(3, np.float32))
     [gsims] = self.csm.info.get_gsims_by_trt().values()
     sample = .001 if os.environ.get('OQ_SAMPLE_SOURCES') else None
     srcfilter = self.src_filter()
     for sm in self.csm.source_models:  # one branch at the time
         [grp] = sm.src_groups
         [src] = grp
         srcs = list(src)
         if sample:
             srcs = random_filtered_sources(srcs, srcfilter, 1)
         acc = parallel.Starmap.apply(
             classical_split_filter,
             (srcs, srcfilter, gsims, param, monitor),
             weight=operator.attrgetter('weight'),
             concurrent_tasks=oq.concurrent_tasks,
             h5=self.datastore.hdf5
         ).reduce(self.agg_dicts, acc)
         ucerf = grp.sources[0].orig
         logging.info('Getting background sources from %s', ucerf.source_id)
         srcs = ucerf.get_background_sources(srcfilter, sample)
         acc = parallel.Starmap.apply(
             classical, (srcs, srcfilter, gsims, param, monitor),
             weight=operator.attrgetter('weight'),
             concurrent_tasks=oq.concurrent_tasks,
             h5=self.datastore.hdf5
         ).reduce(self.agg_dicts, acc)
     self.store_rlz_info(acc.eff_ruptures)
     self.store_source_info(self.calc_times)
     return acc  # {grp_id: pmap}
Esempio n. 3
0
def classical_split_filter(srcs, gsims, params, monitor):
    """
    Split the given sources, filter the subsources and the compute the
    PoEs. Yield back subtasks if the split sources contain more than
    maxweight ruptures.
    """
    srcfilter = monitor.read('srcfilter')
    # first check if we are sampling the sources
    ss = int(os.environ.get('OQ_SAMPLE_SOURCES', 0))
    if ss:
        splits, stime = split_sources(srcs)
        srcs = random_filtered_sources(splits, srcfilter, ss)
        yield classical(srcs, srcfilter, gsims, params, monitor)
        return
    # NB: splitting all the sources improves the distribution significantly,
    # compared to splitting only the big sources
    with monitor("splitting/filtering sources"):
        splits, _stime = split_sources(srcs)
        sources = list(srcfilter.filter(splits))
    if not sources:
        yield {'pmap': {}}
        return
    maxw = params['max_weight']
    N = len(srcfilter.sitecol.complete)

    def weight(src):
        n = 10 * numpy.sqrt(len(src.indices) / N)
        return src.weight * params['rescale_weight'] * n

    blocks = list(block_splitter(sources, maxw, weight))
    subtasks = len(blocks) - 1
    for block in blocks[:-1]:
        yield classical_, block, gsims, params
    if monitor.calc_id and subtasks:
        msg = 'produced %d subtask(s) with mean weight %d' % (
            subtasks, numpy.mean([b.weight for b in blocks[:-1]]))
        try:
            logs.dbcmd('log', monitor.calc_id, datetime.utcnow(), 'DEBUG',
                       'classical_split_filter#%d' % monitor.task_no, msg)
        except Exception:
            # a foreign key error in case of `oq run` is expected
            print(msg)
    yield classical(blocks[-1], srcfilter, gsims, params, monitor)
Esempio n. 4
0
def classical_split_filter(srcs, srcfilter, gsims, params, monitor):
    """
    Split the given sources, filter the subsources and the compute the
    PoEs. Yield back subtasks if the split sources contain more than
    maxweight ruptures.
    """
    # first check if we are sampling the sources
    ss = int(os.environ.get('OQ_SAMPLE_SOURCES', 0))
    if ss:
        splits, stime = split_sources(srcs)
        srcs = random_filtered_sources(splits, srcfilter, ss)
        yield classical(srcs, srcfilter, gsims, params, monitor)
        return
    # NB: splitting all the sources improves the distribution significantly,
    # compared to splitting only the big sources
    with monitor("splitting/filtering sources"):
        splits, _stime = split_sources(srcs)
        sources = list(srcfilter.filter(splits))
    if not sources:
        yield {'pmap': {}}
        return
    maxw = min(sum(src.weight for src in sources)/5, params['max_weight'])
    if maxw < MINWEIGHT*5:  # task too small to be resubmitted
        yield classical(sources, srcfilter, gsims, params, monitor)
        return
    blocks = list(block_splitter(sources, maxw, weight))
    subtasks = len(blocks) - 1
    for block in blocks[:-1]:
        yield classical, block, srcfilter, gsims, params
    if monitor.calc_id and subtasks:
        msg = 'produced %d subtask(s) with max weight=%d' % (
            subtasks, max(b.weight for b in blocks))
        try:
            logs.dbcmd('log', monitor.calc_id, datetime.utcnow(), 'DEBUG',
                       'classical_split_filter#%d' % monitor.task_no, msg)
        except Exception:
            # a foreign key error in case of `oq run` is expected
            print(msg)
    yield classical(blocks[-1], srcfilter, gsims, params, monitor)
Esempio n. 5
0
def classical_split_filter(srcs, srcfilter, gsims, params, monitor):
    """
    Split the given sources, filter the subsources and the compute the
    PoEs. Yield back subtasks if the split sources contain more than
    maxweight ruptures.
    """
    # first check if we are sampling the sources
    ss = int(os.environ.get('OQ_SAMPLE_SOURCES', 0))
    if ss:
        splits, stime = split_sources(srcs)
        srcs = random_filtered_sources(splits, srcfilter, ss)
        yield classical(srcs, srcfilter, gsims, params, monitor)
        return
    # NB: splitting all the sources improves the distribution significantly,
    # compared to splitting only the big source
    sources = []
    with monitor("filtering/splitting sources"):
        for src, _sites in srcfilter(srcs):
            splits, _stime = split_sources([src])
            sources.extend(srcfilter.filter(splits))
    if sources:
        yield from parallel.split_task(
                classical, sources, srcfilter, gsims, params, monitor,
                duration=params['task_duration'])