def classical_split_filter(srcs, srcfilter, gsims, params, monitor): """ Split the given sources, filter the subsources and the compute the PoEs. Yield back subtasks if the split sources contain more than maxweight ruptures. """ # first check if we are sampling the sources ss = int(os.environ.get('OQ_SAMPLE_SOURCES', 0)) if ss: splits, stime = split_sources(srcs) srcs = readinput.random_filtered_sources(splits, srcfilter, ss) yield classical(srcs, srcfilter, gsims, params, monitor) return sources = [] with monitor("filtering/splitting sources"): for src, _sites in srcfilter(srcs): if src.num_ruptures >= params['maxweight']: splits, stime = split_sources([src]) sources.extend(srcfilter.filter(splits)) else: sources.append(src) blocks = list(block_splitter(sources, params['maxweight'], operator.attrgetter('num_ruptures'))) if blocks: # yield the first blocks (if any) and compute the last block in core # NB: the last block is usually the smallest one for block in blocks[:-1]: yield classical, block, srcfilter, gsims, params yield classical(blocks[-1], srcfilter, gsims, params, monitor)
def classical_split_filter(srcs, srcfilter, gsims, params, monitor): """ Split the given sources, filter the subsources and the compute the PoEs. Yield back subtasks if the split sources contain more than maxweight ruptures. """ # first check if we are sampling the sources ss = int(os.environ.get('OQ_SAMPLE_SOURCES', 0)) if ss: splits, stime = split_sources(srcs) srcs = random_filtered_sources(splits, srcfilter, ss) yield classical(srcs, srcfilter, gsims, params, monitor) return # NB: splitting all the sources improves the distribution significantly, # compared to splitting only the big source sources = [] with monitor("filtering/splitting sources"): for src, _sites in srcfilter(srcs): splits, _stime = split_sources([src]) sources.extend(srcfilter.filter(splits)) if sources: yield from parallel.split_task(classical, sources, srcfilter, gsims, params, monitor, duration=params['task_duration'])
def classical_split_filter(srcs, srcfilter, gsims, params, monitor): """ Split the given sources, filter the subsources and the compute the PoEs. Yield back subtasks if the split sources contain more than maxweight ruptures. """ # first check if we are sampling the sources ss = int(os.environ.get('OQ_SAMPLE_SOURCES', 0)) if ss: splits, stime = split_sources(srcs) srcs = random_filtered_sources(splits, srcfilter, ss) yield classical(srcs, srcfilter, gsims, params, monitor) return # NB: splitting all the sources improves the distribution significantly, # compared to splitting only the big source sources = [] with monitor("filtering/splitting sources"): for src, _sites in srcfilter(srcs): splits, _stime = split_sources([src]) sources.extend(srcfilter.filter(splits)) if sources: sources.sort(key=weight) totsites = len(srcfilter.sitecol) mw = 1000 if totsites <= params['max_sites_disagg'] else 50000 mweight = max(mw, sum(src.weight for src in sources) / params['task_multiplier']) blocks = list(block_splitter(sources, mweight, weight)) for block in blocks[:-1]: yield classical, block, srcfilter, gsims, params yield classical(blocks[-1], srcfilter, gsims, params, monitor)
def preclassical(srcs, srcfilter, gsims, params, monitor): """ Split and prefilter the sources """ calc_times = AccumDict(accum=numpy.zeros(3, F32)) # nrups, nsites, time pmap = AccumDict(accum=0) with monitor("splitting/filtering sources"): splits, _stime = split_sources(srcs) totrups = 0 maxradius = 0 for src in splits: t0 = time.time() totrups += src.num_ruptures if srcfilter.get_close_sites(src) is None: continue if hasattr(src, 'radius'): # for point sources maxradius = max(maxradius, src.radius) dt = time.time() - t0 calc_times[src.source_id] += F32( [src.num_ruptures, src.nsites, dt]) for grp_id in src.grp_ids: pmap[grp_id] += 0 return dict(pmap=pmap, calc_times=calc_times, rup_data={'grp_id': []}, extra=dict(task_no=monitor.task_no, totrups=totrups, trt=src.tectonic_region_type, maxradius=maxradius))
def classical_split_filter(srcs, gsims, params, monitor): """ Split the given sources, filter the subsources and the compute the PoEs. Yield back subtasks if the split sources contain more than maxweight ruptures. """ srcfilter = monitor.read('srcfilter') # first check if we are sampling the sources ss = int(os.environ.get('OQ_SAMPLE_SOURCES', 0)) if ss: splits, stime = split_sources(srcs) srcs = random_filtered_sources(splits, srcfilter, ss) yield classical(srcs, srcfilter, gsims, params, monitor) return # NB: splitting all the sources improves the distribution significantly, # compared to splitting only the big sources with monitor("splitting/filtering sources"): splits, _stime = split_sources(srcs) sources = list(srcfilter.filter(splits)) if not sources: yield {'pmap': {}} return maxw = params['max_weight'] N = len(srcfilter.sitecol.complete) def weight(src): n = 10 * numpy.sqrt(len(src.indices) / N) return src.weight * params['rescale_weight'] * n blocks = list(block_splitter(sources, maxw, weight)) subtasks = len(blocks) - 1 for block in blocks[:-1]: yield classical_, block, gsims, params if monitor.calc_id and subtasks: msg = 'produced %d subtask(s) with mean weight %d' % ( subtasks, numpy.mean([b.weight for b in blocks[:-1]])) try: logs.dbcmd('log', monitor.calc_id, datetime.utcnow(), 'DEBUG', 'classical_split_filter#%d' % monitor.task_no, msg) except Exception: # a foreign key error in case of `oq run` is expected print(msg) yield classical(blocks[-1], srcfilter, gsims, params, monitor)
def classical_split_filter(srcs, srcfilter, gsims, params, monitor): """ Split the given sources, filter the subsources and the compute the PoEs. Yield back subtasks if the split sources contain more than maxweight ruptures. """ # first check if we are sampling the sources ss = int(os.environ.get('OQ_SAMPLE_SOURCES', 0)) if ss: splits, stime = split_sources(srcs) srcs = readinput.random_filtered_sources(splits, srcfilter, ss) yield classical(srcs, srcfilter, gsims, params, monitor) return # NB: splitting all the sources improves the distribution significantly, # compared to splitting only the big source sources = [] with monitor("filtering/splitting sources"): for src, _sites in srcfilter(srcs): splits, _stime = split_sources([src]) sources.extend(srcfilter.filter(splits)) if sources: tot = 0 sd = AccumDict(accum=numpy.zeros(3)) # nsites, nrupts, weight for src in sources: arr = numpy.array([src.nsites, src.num_ruptures, src.weight]) sd[src.id] += arr tot += 1 source_data = numpy.array([(monitor.task_no, src_id, s / tot, r, w) for src_id, (s, r, w) in sd.items()], source_data_dt) first = True for out in parallel.split_task(classical, sources, srcfilter, gsims, params, monitor, duration=params['task_duration'], weight=nrup): if first: out['source_data'] = source_data first = False yield out
def test(self): # make sure the src_group_id is transferred also for single split # sources, since this caused hard to track bugs fname = gettemp(characteric_source) [[char]] = nrml.to_python(fname) char.id = 1 char.src_group_id = 1 os.remove(fname) [src], _ = split_sources([char]) self.assertEqual(char.id, src.id) self.assertEqual(char.source_id, src.source_id) self.assertEqual(char.src_group_id, src.src_group_id)
def classical_split_filter(srcs, srcfilter, gsims, params, monitor): """ Split the given sources, filter the subsources and the compute the PoEs. Yield back subtasks if the split sources contain more than maxweight ruptures. """ # first check if we are sampling the sources ss = int(os.environ.get('OQ_SAMPLE_SOURCES', 0)) if ss: splits, stime = split_sources(srcs) srcs = random_filtered_sources(splits, srcfilter, ss) yield classical(srcs, srcfilter, gsims, params, monitor) return # NB: splitting all the sources improves the distribution significantly, # compared to splitting only the big sources with monitor("splitting/filtering sources"): splits, _stime = split_sources(srcs) sources = list(srcfilter.filter(splits)) if not sources: yield {'pmap': {}} return maxw = min(sum(src.weight for src in sources)/5, params['max_weight']) if maxw < MINWEIGHT*5: # task too small to be resubmitted yield classical(sources, srcfilter, gsims, params, monitor) return blocks = list(block_splitter(sources, maxw, weight)) subtasks = len(blocks) - 1 for block in blocks[:-1]: yield classical, block, srcfilter, gsims, params if monitor.calc_id and subtasks: msg = 'produced %d subtask(s) with max weight=%d' % ( subtasks, max(b.weight for b in blocks)) try: logs.dbcmd('log', monitor.calc_id, datetime.utcnow(), 'DEBUG', 'classical_split_filter#%d' % monitor.task_no, msg) except Exception: # a foreign key error in case of `oq run` is expected print(msg) yield classical(blocks[-1], srcfilter, gsims, params, monitor)
def split_filter(srcs, srcfilter, seed, monitor): """ Split the given source and filter the subsources by distance and by magnitude. Perform sampling if a nontrivial sample_factor is passed. Yields a pair (split_sources, split_time) if split_sources is non-empty. """ splits, stime = split_sources(srcs) if splits and seed: # debugging tip to reduce the size of a calculation splits = readinput.random_filtered_sources(splits, srcfilter, seed) # NB: for performance, sample before splitting if splits and srcfilter: splits = list(srcfilter.filter(splits)) if splits: yield splits, stime
def classical_split_filter(srcs, gsims, params, monitor): """ Split the given sources, filter the subsources and the compute the PoEs. Yield back subtasks if the split sources contain more than maxweight ruptures. """ srcfilter = monitor.read('srcfilter') sf_tiles = srcfilter.split_in_tiles(params['hint']) nt = len(sf_tiles) maxw = params['max_weight'] / 2 * nt splits = [] if nt > 1 or params['split_sources'] is False: sources = srcs else: sources = [] with monitor("splitting sources"): for src in srcs: if src.weight > maxw or src.num_ruptures > 10_000: splits.append(src.source_id) for s, _ in srcfilter.filter(split_sources([src])[0]): sources.append(s) else: sources.append(src) if splits: # produce more subtasks maxw /= 5 msg = 'split %s; ' % ' '.join(splits) if splits else '' for sf in sf_tiles: blocks = list(block_splitter(sources, maxw, get_weight)) if not blocks: yield {'pmap': {}, 'extra': {}} continue light = list(blocks[-1]) for block in blocks[:-1]: if block.weight > params['min_weight']: msg += 'producing subtask with weight %d\n' % block.weight try: logs.dbcmd('log', monitor.calc_id, datetime.utcnow(), 'DEBUG', 'classical_split_filter#%d' % monitor.task_no, msg) except Exception: # a foreign key error in case of `oq run` is expected print(msg) yield classical, block, gsims, params, sf.slc else: light.extend(block) yield classical(light, gsims, params, sf.slc, monitor)
def split_filter(srcs, srcfilter, seed, sample_factor, monitor): """ Split the given source and filter the subsources by distance and by magnitude. Perform sampling if a nontrivial sample_factor is passed. Yields a pair (split_sources, split_time) if split_sources is non-empty. """ splits, stime = split_sources(srcs) if splits and sample_factor: # debugging tip to reduce the size of a calculation # OQ_SAMPLE_SOURCES=.01 oq engine --run job.ini # will run a computation 100 times smaller splits = random_filter(splits, sample_factor, seed) # NB: for performance, sample before splitting if splits and srcfilter: splits = list(srcfilter.filter(splits)) if splits: yield splits, stime
def preclassical(srcs, srcfilter, gsims, params, monitor): """ Split and prefilter the sources """ calc_times = AccumDict(accum=numpy.zeros(3, F32)) # nrups, nsites, time pmap = AccumDict(accum=0) with monitor("splitting/filtering sources"): splits, _stime = split_sources(srcs) for src in splits: t0 = time.time() if srcfilter.get_close_sites(src) is None: continue dt = time.time() - t0 calc_times[src.id] += F32([src.num_ruptures, src.nsites, dt]) for grp_id in src.src_group_ids: pmap[grp_id] += 0 return dict(pmap=pmap, calc_times=calc_times, rup_data={'grp_id': []}, extra=dict(task_no=monitor.task_no, totrups=src.num_ruptures))
def submit_sources(self, smap): """ Send the sources split in tasks """ oq = self.oqparam many_sites = len(self.sitecol) > int(config.general.max_sites_disagg) trt_sources = self.csm.get_trt_sources(optimize_dupl=True) maxweight = min( self.csm.get_maxweight(trt_sources, nrup, oq.concurrent_tasks), 1E6) param = dict(truncation_level=oq.truncation_level, imtls=oq.imtls, filter_distance=oq.filter_distance, reqv=oq.get_reqv(), pointsource_distance=oq.pointsource_distance, task_duration=oq.task_duration, maxweight=maxweight) logging.info('Max ruptures per task = %(maxweight)d', param) for trt, sources in trt_sources: heavy_sources = [] gsims = self.csm.info.gsim_lt.get_gsims(trt) if hasattr(sources, 'atomic') and sources.atomic: smap.submit(sources, self.src_filter, gsims, param, func=classical) else: # regroup the sources in blocks for block in block_splitter(sources, maxweight, nrup): if many_sites and block.weight > maxweight: heavy_sources.extend(block) else: # light sources to be split on the workers smap.submit(block, self.src_filter, gsims, param) # heavy source are split on the master node for src in heavy_sources: logging.info('Splitting %s', src) srcs, _ = split_sources([src]) for blk in block_splitter(srcs, maxweight, nrup): smap.submit(blk, self.src_filter, gsims, param)