def __init__(self, oqparam, monitor=Monitor(), calc_id=None): self.monitor = monitor self.datastore = datastore.DataStore(calc_id) self.monitor.calc_id = self.datastore.calc_id self.monitor.hdf5path = self.datastore.hdf5path self.datastore.export_dir = oqparam.export_dir self.oqparam = oqparam
def gen_ctxs(self, ruptures, sites, src_id, mon=Monitor()): """ :param ruptures: a list of ruptures generated by the same source :param sites: a (filtered) SiteCollection :param src_id: the ID of the source (for debugging purposes) :param mon: a Monitor object :yields: fat RuptureContexts """ fewsites = len(sites.complete) <= self.max_sites_disagg for rup in ruptures: with mon: try: ctx, r_sites, dctx = self.make_contexts( getattr(rup, 'sites', sites), rup) except FarAwayRupture: continue for par in self.REQUIRES_SITES_PARAMETERS: setattr(ctx, par, r_sites[par]) ctx.sids = r_sites.sids ctx.src_id = src_id for par in self.REQUIRES_DISTANCES | {'rrup'}: setattr(ctx, par, getattr(dctx, par)) if fewsites: # get closest point on the surface closest = rup.surface.get_closest_points(sites.complete) ctx.clon = closest.lons[ctx.sids] ctx.clat = closest.lats[ctx.sids] yield ctx
def grid_point_sources(sources, ps_grid_spacing, monitor=Monitor()): """ :param sources: a list of sources with the same grp_id (point sources and not) :param ps_grid_spacing: value of the point source grid spacing in km; if None, do nothing :returns: a dict grp_id -> list of non-point sources and collapsed point sources """ grp_id = sources[0].grp_id for src in sources[1:]: assert src.grp_id == grp_id, (src.grp_id, grp_id) if ps_grid_spacing is None: return {grp_id: sources} out = [src for src in sources if not hasattr(src, 'location')] ps = numpy.array([src for src in sources if hasattr(src, 'location')]) if len(ps) < 2: # nothing to collapse return {grp_id: out + list(ps)} coords = _coords(ps) deltax = angular_distance(ps_grid_spacing, lat=coords[:, 1].mean()) deltay = angular_distance(ps_grid_spacing) grid = groupby_grid(coords[:, 0], coords[:, 1], deltax, deltay) task_no = getattr(monitor, 'task_no', 0) for i, idxs in enumerate(grid.values()): if len(idxs) > 1: cps = CollapsedPointSource('cps-%d-%d' % (task_no, i), ps[idxs]) cps.id = ps[0].id cps.grp_id = ps[0].grp_id cps.trt_smr = ps[0].trt_smr out.append(cps) else: # there is a single source out.append(ps[idxs[0]]) return {grp_id: out}
def classical(group, src_filter, gsims, param, monitor=Monitor()): """ Compute the hazard curves for a set of sources belonging to the same tectonic region type for all the GSIMs associated to that TRT. The arguments are the same as in :func:`calc_hazard_curves`, except for ``gsims``, which is a list of GSIM instances. :returns: a dictionary {grp_id: pmap} with attributes .grp_ids, .calc_times, .eff_ruptures """ if getattr(group, 'src_interdep', None) == 'mutex': mutex_weight = { src.source_id: weight for src, weight in zip(group.sources, group.srcs_weights) } else: mutex_weight = None grp_ids = set() for src in group: grp_ids.update(src.src_group_ids) maxdist = src_filter.integration_distance with GroundShakingIntensityModel.forbid_instantiation(): imtls = param['imtls'] trunclevel = param.get('truncation_level') cmaker = ContextMaker(gsims, maxdist) ctx_mon = monitor('make_contexts', measuremem=False) poe_mon = monitor('get_poes', measuremem=False) pmap = AccumDict({ grp_id: ProbabilityMap(len(imtls.array), len(gsims)) for grp_id in grp_ids }) # AccumDict of arrays with 4 elements weight, nsites, calc_time, split pmap.calc_times = AccumDict(accum=numpy.zeros(4)) pmap.eff_ruptures = AccumDict() # grp_id -> num_ruptures for src, s_sites in src_filter(group): # filter now t0 = time.time() indep = group.rup_interdep == 'indep' if mutex_weight else True poemap = cmaker.poe_map(src, s_sites, imtls, trunclevel, ctx_mon, poe_mon, indep) if mutex_weight: # mutex sources weight = mutex_weight[src.source_id] for sid in poemap: pcurve = pmap[group.id].setdefault(sid, 0) pcurve += poemap[sid] * weight elif poemap: for grp_id in src.src_group_ids: pmap[grp_id] |= poemap src_id = src.source_id.split(':', 1)[0] pmap.calc_times[src_id] += numpy.array( [src.weight, len(s_sites), time.time() - t0, 1]) # storing the number of contributing ruptures too pmap.eff_ruptures += { grp_id: getattr(poemap, 'eff_ruptures', 0) for grp_id in src.src_group_ids } if mutex_weight and group.grp_probability is not None: pmap[group.id] *= group.grp_probability return pmap
def __init__(self, oqparam, calc_id=None): self.datastore = datastore.DataStore(calc_id) self._monitor = Monitor( '%s.run' % self.__class__.__name__, measuremem=True) self.oqparam = oqparam if 'performance_data' not in self.datastore: self.datastore.create_dset('performance_data', perf_dt)
def safely_call(func, args, pickle=False): """ Call the given function with the given arguments safely, i.e. by trapping the exceptions. Return a pair (result, exc_type) where exc_type is None if no exceptions occur, otherwise it is the exception class and the result is a string containing error message and traceback. :param func: the function to call :param args: the arguments :param pickle: if set, the input arguments are unpickled and the return value is pickled; otherwise they are left unchanged """ if pickle: args = [a.unpickle() for a in args] ismon = args and isinstance(args[-1], Monitor) mon = args[-1] if ismon else Monitor() try: got = func(*args) if inspect.isgenerator(got): got = list(got) res = got, None, mon except: etype, exc, tb = sys.exc_info() tb_str = ''.join(traceback.format_tb(tb)) res = ('\n%s%s: %s' % (tb_str, etype.__name__, exc), etype, mon) if pickle: return Pickled(res) return res
def make_gmfs(eb_ruptures, sitecol, imts, gsims, trunc_level, correl_model, monitor=Monitor()): """ :param eb_ruptures: a list of EBRuptures :param sitecol: a SiteCollection instance :param imts: an ordered list of intensity measure type strings :param gsims: an order list of GSIM instance :param trunc_level: truncation level :param correl_model: correlation model instance :param monitor: a monitor instance :returns: a dictionary serial -> GmfaSidsEtags """ dic = {} # serial -> GmfaSidsEtags ctx_mon = monitor('make contexts') gmf_mon = monitor('compute poes') sites = sitecol.complete for ebr in eb_ruptures: with ctx_mon: r_sites = site.FilteredSiteCollection(ebr.indices, sites) computer = calc.gmf.GmfComputer( ebr.rupture, r_sites, imts, gsims, trunc_level, correl_model) with gmf_mon: gmfa = computer.calcgmfs(ebr.multiplicity, ebr.rupture.seed) dic[ebr.serial] = GmfaSidsEtags(gmfa, r_sites.indices, ebr.etags) return dic
def __init__(self, task_func, task_args=(), monitor=None, distribute=None, progress=logging.info): self.__class__.init(distribute=distribute or OQ_DISTRIBUTE) self.task_func = task_func self.monitor = monitor or Monitor(task_func.__name__) self.calc_id = getattr(self.monitor, 'calc_id', None) self.name = self.monitor.operation or task_func.__name__ self.task_args = task_args self.distribute = distribute or oq_distribute(task_func) self.progress = progress try: self.num_tasks = len(self.task_args) except TypeError: # generators have no len self.num_tasks = None # a task can be a function, a class or an instance with a __call__ if inspect.isfunction(task_func): self.argnames = inspect.getfullargspec(task_func).args elif inspect.isclass(task_func): self.argnames = inspect.getfullargspec(task_func.__init__).args[1:] else: # instance with a __call__ method self.argnames = inspect.getfullargspec(task_func.__call__).args[1:] self.receiver = 'tcp://%s:%s' % ( config.dbserver.listen, config.dbserver.receiver_ports) self.sent = numpy.zeros(len(self.argnames) - 1) self.monitor.backurl = None # overridden later self.tasks = [] # populated by .submit h5 = self.monitor.hdf5 task_info = 'task_info/' + self.name if h5 and task_info not in h5: # first time # task_info and performance_data should be generated in advance hdf5.create(h5, task_info, task_info_dt) if h5 and 'performance_data' not in h5: hdf5.create(h5, 'performance_data', perf_dt)
def collect_bins_data(trt_num, sources, site, cmaker, imldict, truncation_level, n_epsilons, mon=Monitor()): sitecol = SiteCollection([site]) # NB: instantiating truncnorm is slow and calls the infamous "doccer" truncnorm = scipy.stats.truncnorm(-truncation_level, truncation_level) acc = AccumDict(accum=[]) for source in sources: try: trti = trt_num[source.tectonic_region_type] rupdict = cmaker.disaggregate(sitecol, source.iter_ruptures(), imldict, truncnorm, n_epsilons, mon) acc['trti'].extend([trti] * len(rupdict['mags'])) acc += rupdict except Exception as err: etype, err, tb = sys.exc_info() msg = 'An error occurred with source id=%s. Error: %s' msg %= (source.source_id, err) raise_(etype, msg, tb) return acc
def init(cls, poolsize=None, distribute=OQ_DISTRIBUTE): if distribute == 'processpool' and not hasattr(cls, 'pool'): cls.pool = multiprocessing.Pool(poolsize, init_workers) m = Monitor('wakeup') cls(_wakeup, [(.2, m) for _ in range(cls.pool._processes)]) elif distribute == 'threadpool' and not hasattr(cls, 'pool'): cls.pool = multiprocessing.dummy.Pool(poolsize)
def info(calculators, gsims, views, exports, report, input_file=''): """ Give information. You can pass the name of an available calculator, a job.ini file, or a zip archive with the input files. """ logging.basicConfig(level=logging.INFO) if calculators: for calc in sorted(base.calculators): print(calc) if gsims: for gs in gsim.get_available_gsims(): print(gs) if views: for name in sorted(datastore.view): print(name) if exports: dic = groupby(export, operator.itemgetter(0), lambda group: [r[1] for r in group]) n = 0 for exporter, formats in dic.items(): print(exporter, formats) n += len(formats) print('There are %d exporters defined.' % n) if input_file.endswith('.xml'): print(nrml.read(input_file).to_str()) elif input_file.endswith(('.ini', '.zip')): with Monitor('info', measuremem=True) as mon: if report: print('Generated', reportwriter.build_report(input_file)) else: print_csm_info(input_file) if mon.duration > 1: print(mon) elif input_file: print("No info for '%s'" % input_file)
def collect_bin_data(sources, sitecol, cmaker, iml4, truncation_level, n_epsilons, monitor=Monitor()): """ :param sources: a list of sources :param sitecol: a SiteCollection instance :param cmaker: a ContextMaker instance :param iml4: an ArrayWrapper of intensities of shape (N, R, M, P) :param truncation_level: the truncation level :param n_epsilons: the number of epsilons :param monitor: a Monitor instance :returns: a dictionary (poe, imt, rlzi) -> probabilities of shape (N, E) """ # NB: instantiating truncnorm is slow and calls the infamous "doccer" truncnorm = scipy.stats.truncnorm(-truncation_level, truncation_level) epsilons = numpy.linspace(truncnorm.a, truncnorm.b, n_epsilons + 1) acc = AccumDict(accum=[]) for source in sources: ruptures = source.iter_ruptures() try: acc += cmaker.disaggregate( sitecol, ruptures, iml4, truncnorm, epsilons, monitor) except Exception as err: etype, err, tb = sys.exc_info() msg = 'An error occurred with source id=%s. Error: %s' msg %= (source.source_id, err) raise_(etype, msg, tb) return pack(acc, 'mags dists lons lats'.split())
def sample_ruptures(sources, srcfilter, param, monitor=Monitor()): """ :param sources: a sequence of sources of the same group :param srcfilter: SourceFilter instance used also for bounding box post filtering :param param: a dictionary of additional parameters including ses_per_logic_tree_path :param monitor: monitor instance :yields: dictionaries with keys rup_array, calc_times """ # AccumDict of arrays with 3 elements num_ruptures, num_sites, calc_time calc_times = AccumDict(accum=numpy.zeros(3, numpy.float32)) # Compute and save stochastic event sets num_ses = param['ses_per_logic_tree_path'] [grp_id] = set(src.src_group_id for src in sources) # Compute the number of occurrences of the source group. This is used # for cluster groups or groups with mutually exclusive sources. if (getattr(sources, 'atomic', False) and getattr(sources, 'cluster', False)): eb_ruptures, calc_times = sample_cluster( sources, srcfilter, num_ses, param) # Yield ruptures yield AccumDict(rup_array=get_rup_array(eb_ruptures, srcfilter), calc_times=calc_times, eff_ruptures={grp_id: len(eb_ruptures)}) else: eb_ruptures = [] eff_ruptures = 0 # AccumDict of arrays with 2 elements weight, calc_time calc_times = AccumDict(accum=numpy.zeros(3, numpy.float32)) for src, _sites in srcfilter(sources): eff_ruptures += 1 t0 = time.time() if len(eb_ruptures) > MAX_RUPTURES: # yield partial result to avoid running out of memory yield AccumDict(rup_array=get_rup_array(eb_ruptures, srcfilter), calc_times={}, eff_ruptures={}) eb_ruptures.clear() samples = getattr(src, 'samples', 1) n_occ = 0 for rup, n_occ in src.sample_ruptures(samples * num_ses): ebr = EBRupture(rup, src.id, grp_id, n_occ, samples) eb_ruptures.append(ebr) n_occ += ebr.n_occ dt = time.time() - t0 try: n_sites = len(_sites) except (TypeError, ValueError): # for None or a closed dataset n_sites = 0 calc_times[src.id] += numpy.array([n_occ, n_sites, dt]) rup_array = get_rup_array(eb_ruptures, srcfilter) yield AccumDict(rup_array=rup_array, calc_times=calc_times, eff_ruptures={grp_id: eff_ruptures})
def sample_ruptures(sources, srcfilter, param, monitor=Monitor()): """ :param sources: a sequence of sources of the same group :param srcfilter: SourceFilter instance used also for bounding box post filtering :param param: a dictionary of additional parameters including ses_per_logic_tree_path :param monitor: monitor instance :yields: dictionaries with keys rup_array, calc_times """ # AccumDict of arrays with 3 elements num_ruptures, num_sites, calc_time calc_times = AccumDict(accum=numpy.zeros(3, numpy.float32)) # Compute and save stochastic event sets num_ses = param['ses_per_logic_tree_path'] trt = sources[0].tectonic_region_type # Compute the number of occurrences of the source group. This is used # for cluster groups or groups with mutually exclusive sources. if (getattr(sources, 'atomic', False) and getattr(sources, 'cluster', False)): eb_ruptures, calc_times = sample_cluster(sources, srcfilter, num_ses, param) # Yield ruptures yield AccumDict( dict(rup_array=get_rup_array(eb_ruptures, srcfilter), calc_times=calc_times, eff_ruptures={trt: len(eb_ruptures)})) else: eb_ruptures = [] eff_ruptures = 0 # AccumDict of arrays with 2 elements weight, calc_time calc_times = AccumDict(accum=numpy.zeros(3, numpy.float32)) for src, _ in srcfilter.filter(sources): nr = src.num_ruptures eff_ruptures += nr t0 = time.time() if len(eb_ruptures) > MAX_RUPTURES: # yield partial result to avoid running out of memory yield AccumDict( dict(rup_array=get_rup_array(eb_ruptures, srcfilter), calc_times={}, eff_ruptures={})) eb_ruptures.clear() samples = getattr(src, 'samples', 1) for rup, trt_smrlz, n_occ in src.sample_ruptures( samples * num_ses, param['ses_seed']): ebr = EBRupture(rup, src.source_id, trt_smrlz, n_occ) eb_ruptures.append(ebr) dt = time.time() - t0 calc_times[src.id] += numpy.array([nr, src.nsites, dt]) rup_array = get_rup_array(eb_ruptures, srcfilter) yield AccumDict( dict(rup_array=rup_array, calc_times=calc_times, eff_ruptures={trt: eff_ruptures}))
def test(self): mon = Monitor() iterargs = ((i, mon) for i in range(10)) res = _starmap(double, iterargs, self.host, self.task_in_port, self.receiver_ports) num_tasks = next(res) self.assertEqual(num_tasks, 10) self.assertEqual(sum(r[0] for r in res), 90)
def classical(group, src_filter, gsims, param, monitor=Monitor()): """ Compute the hazard curves for a set of sources belonging to the same tectonic region type for all the GSIMs associated to that TRT. The arguments are the same as in :func:`calc_hazard_curves`, except for ``gsims``, which is a list of GSIM instances. :returns: a dictionary {grp_id: pmap} with attributes .grp_ids, .calc_times, .eff_ruptures """ grp_ids = set() for src in group: if not src.num_ruptures: # src.num_ruptures is set when parsing the XML, but not when # the source is instantiated manually, so it is set here src.num_ruptures = src.count_ruptures() grp_ids.update(src.src_group_ids) maxdist = src_filter.integration_distance imtls = param['imtls'] trunclevel = param.get('truncation_level') cmaker = ContextMaker(gsims, maxdist, param, monitor) pmap = AccumDict({ grp_id: ProbabilityMap(len(imtls.array), len(gsims)) for grp_id in grp_ids }) # AccumDict of arrays with 3 elements weight, nsites, calc_time pmap.calc_times = AccumDict(accum=numpy.zeros(3, numpy.float32)) pmap.eff_ruptures = AccumDict() # grp_id -> num_ruptures src_mutex = param.get('src_interdep') == 'mutex' rup_mutex = param.get('rup_interdep') == 'mutex' for src, s_sites in src_filter(group): # filter now t0 = time.time() try: poemap = cmaker.poe_map(src, s_sites, imtls, trunclevel, not rup_mutex) except Exception as err: etype, err, tb = sys.exc_info() msg = '%s (source id=%s)' % (str(err), src.source_id) raise etype(msg).with_traceback(tb) if src_mutex: # mutex sources, there is a single group for sid in poemap: pcurve = pmap[src.src_group_id].setdefault(sid, 0) pcurve += poemap[sid] * src.mutex_weight elif poemap: for gid in src.src_group_ids: pmap[gid] |= poemap pmap.calc_times[src.id] += numpy.array( [src.weight, len(s_sites), time.time() - t0]) # storing the number of contributing ruptures too pmap.eff_ruptures += { gid: getattr(poemap, 'eff_ruptures', 0) for gid in src.src_group_ids } if src_mutex and param.get('grp_probability'): pmap[src.src_group_id] *= param['grp_probability'] return pmap
def sample_ruptures(group, src_filter=filters.source_site_noop_filter, gsims=(), param=(), monitor=Monitor()): """ :param group: a SourceGroup or a sequence of sources of the same group :param src_filter: a source site filter (default noop filter) :param gsims: a list of GSIMs for the current tectonic region model :param param: a dictionary of additional parameters (by default ses_per_logic_tree_path=1, samples=1, seed=42, filter_distance=1000) :param monitor: monitor instance :returns: a dictionary with eb_ruptures, num_events, num_ruptures, calc_times """ if not param: param = dict(ses_per_logic_tree_path=1, samples=1, seed=42, filter_distance=1000) if getattr(group, 'src_interdep', None) == 'mutex': prob = {src: sw for src, sw in zip(group, group.srcs_weights)} else: prob = {src: 1 for src in group} eb_ruptures = [] calc_times = [] rup_mon = monitor('making contexts', measuremem=False) # Compute and save stochastic event sets num_ruptures = 0 eids = numpy.zeros(0) cmaker = ContextMaker(gsims, src_filter.integration_distance, param['filter_distance'], monitor) for src, s_sites in src_filter(group): t0 = time.time() num_ruptures += src.num_ruptures num_occ_by_rup = _sample_ruptures(src, prob[src], param['ses_per_logic_tree_path'], param['samples'], param['seed']) # NB: the number of occurrences is very low, << 1, so it is # more efficient to filter only the ruptures that occur, i.e. # to call sample_ruptures *before* the filtering for ebr in _build_eb_ruptures(src, num_occ_by_rup, cmaker, s_sites, param['seed'], rup_mon): eb_ruptures.append(ebr) eids = set_eids(eb_ruptures) src_id = src.source_id.split(':', 1)[0] dt = time.time() - t0 calc_times.append((src_id, src.nsites, eids, dt)) dic = dict(eb_ruptures=eb_ruptures, num_events=len(eids), calc_times=calc_times, num_ruptures=num_ruptures) return dic
def info(calculators, gsims, views, exports, report, input_file=''): """ Give information. You can pass the name of an available calculator, a job.ini file, or a zip archive with the input files. """ logging.basicConfig(level=logging.INFO) if calculators: for calc in sorted(base.calculators): print(calc) if gsims: for gs in gsim.get_available_gsims(): print(gs) if views: for name in sorted(view): print(name) if exports: dic = groupby(export, operator.itemgetter(0), lambda group: [r[1] for r in group]) n = 0 for exporter, formats in dic.items(): print(exporter, formats) n += len(formats) print('There are %d exporters defined.' % n) if os.path.isdir(input_file) and report: with Monitor('info', measuremem=True) as mon: with mock.patch.object(logging.root, 'info'): # reduce logging do_build_reports(input_file) print(mon) elif input_file.endswith('.xml'): node = nrml.read(input_file) if node[0].tag.endswith('sourceModel'): assert node['xmlns'].endswith('nrml/0.5'), node['xmlns'] print(source_model_info(node[0])) else: print(node.to_str()) elif input_file.endswith(('.ini', '.zip')): with Monitor('info', measuremem=True) as mon: if report: print('Generated', reportwriter.build_report(input_file)) else: print_csm_info(input_file) if mon.duration > 1: print(mon) elif input_file: print("No info for '%s'" % input_file)
def classical(group, src_filter, gsims, param, monitor=Monitor()): """ Compute the hazard curves for a set of sources belonging to the same tectonic region type for all the GSIMs associated to that TRT. The arguments are the same as in :func:`calc_hazard_curves`, except for ``gsims``, which is a list of GSIM instances. :returns: a dictionary with keys pmap, calc_times, rup_data, extra """ if not hasattr(src_filter, 'sitecol'): # do not filter src_filter = SourceFilter(src_filter, {}) # Get the parameters assigned to the group src_mutex = getattr(group, 'src_interdep', None) == 'mutex' cluster = getattr(group, 'cluster', None) trts = set() maxradius = 0 for src in group: if not src.num_ruptures: # src.num_ruptures may not be set, so it is set here src.num_ruptures = src.count_ruptures() # set the proper TOM in case of a cluster if cluster: src.temporal_occurrence_model = FatedTOM(time_span=1) trts.add(src.tectonic_region_type) if hasattr(src, 'radius'): # for prefiltered point sources maxradius = max(maxradius, src.radius) param['maximum_distance'] = src_filter.integration_distance [trt] = trts # there must be a single tectonic region type cmaker = ContextMaker(trt, gsims, param, monitor) try: cmaker.tom = group.temporal_occurrence_model except AttributeError: # got a list of sources, not a group time_span = param.get('investigation_time') # None for nonparametric cmaker.tom = PoissonTOM(time_span) if time_span else None if cluster: cmaker.tom = FatedTOM(time_span=1) pmap, rup_data, calc_times = PmapMaker(cmaker, src_filter, group).make() extra = {} extra['task_no'] = getattr(monitor, 'task_no', 0) extra['trt'] = trt extra['source_id'] = src.source_id extra['grp_id'] = src.grp_id extra['maxradius'] = maxradius group_probability = getattr(group, 'grp_probability', None) if src_mutex and group_probability: pmap *= group_probability if cluster: tom = getattr(group, 'temporal_occurrence_model') pmap = _cluster(param['imtls'], tom, gsims, pmap) return dict(pmap=pmap, calc_times=calc_times, rup_data=rup_data, extra=extra)
def __init__(self, oqparam, calc_id): self.datastore = datastore.DataStore(calc_id) init_performance(self.datastore.hdf5) self._monitor = Monitor('%s.run' % self.__class__.__name__, measuremem=True, h5=self.datastore) # NB: using h5=self.datastore.hdf5 would mean losing the performance # info about Calculator.run since the file will be closed later on self.oqparam = oqparam
def run_calc(job_id, oqparam, log_level, log_file, exports, hazard_calculation_id=None): """ Run a calculation. :param job_id: ID of the current job :param oqparam: :class:`openquake.commonlib.oqvalidation.OqParam` instance :param str log_level: The desired logging level. Valid choices are 'debug', 'info', 'progress', 'warn', 'error', and 'critical'. :param str log_file: Complete path (including file name) to file where logs will be written. If `None`, logging will just be printed to standard output. :param exports: A comma-separated string of export types. """ monitor = Monitor('total runtime', measuremem=True) with logs.handle(job_id, log_level, log_file): # run the job if USE_CELERY and os.environ.get('OQ_DISTRIBUTE') == 'celery': set_concurrent_tasks_default() calc = base.calculators(oqparam, monitor, calc_id=job_id) tb = 'None\n' try: _do_run_calc(calc, exports, hazard_calculation_id) logs.dbcmd('finish', job_id, 'complete') expose_outputs(calc.datastore) records = views.performance_view(calc.datastore) logs.dbcmd('save_performance', job_id, records) calc.datastore.close() logs.LOG.info('Calculation %d finished correctly in %d seconds', job_id, calc.monitor.duration) except: tb = traceback.format_exc() try: logs.LOG.critical(tb) logs.dbcmd('finish', job_id, 'failed') except: # an OperationalError may always happen sys.stderr.write(tb) raise finally: # if there was an error in the calculation, this part may fail; # in such a situation, we simply log the cleanup error without # taking further action, so that the real error can propagate try: if USE_CELERY: celery_cleanup(TERMINATE, parallel.TaskManager.task_ids) except: # log the finalization error only if there is no real error if tb == 'None\n': logs.LOG.error('finalizing', exc_info=True) return calc
def sample_ruptures(sources, cmaker, sitecol=None, monitor=Monitor()): """ :param sources: a sequence of sources of the same group :param cmaker: a ContextMaker instance with ses_per_logic_tree_path, ses_seed :param sitecol: SiteCollection instance used for filtering (None for no filtering) :param monitor: monitor instance :yields: dictionaries with keys rup_array, calc_times """ srcfilter = SourceFilter(sitecol, cmaker.maximum_distance) # AccumDict of arrays with 3 elements num_ruptures, num_sites, calc_time calc_times = AccumDict(accum=numpy.zeros(3, numpy.float32)) # Compute and save stochastic event sets num_ses = cmaker.ses_per_logic_tree_path grp_id = sources[0].grp_id # Compute the number of occurrences of the source group. This is used # for cluster groups or groups with mutually exclusive sources. if (getattr(sources, 'atomic', False) and getattr(sources, 'cluster', False)): eb_ruptures, calc_times = sample_cluster( sources, srcfilter, num_ses, vars(cmaker)) # Yield ruptures er = sum(src.num_ruptures for src, _ in srcfilter.filter(sources)) yield AccumDict(dict(rup_array=get_rup_array(eb_ruptures, srcfilter), calc_times=calc_times, eff_ruptures={grp_id: er})) else: eb_ruptures = [] eff_ruptures = 0 # AccumDict of arrays with 2 elements weight, calc_time calc_times = AccumDict(accum=numpy.zeros(3, numpy.float32)) for src, _ in srcfilter.filter(sources): nr = src.num_ruptures eff_ruptures += nr t0 = time.time() if len(eb_ruptures) > MAX_RUPTURES: # yield partial result to avoid running out of memory yield AccumDict(dict(rup_array=get_rup_array(eb_ruptures, srcfilter), calc_times={}, eff_ruptures={})) eb_ruptures.clear() samples = getattr(src, 'samples', 1) for rup, trt_smr, n_occ in src.sample_ruptures( samples * num_ses, cmaker.ses_seed): ebr = EBRupture(rup, src.source_id, trt_smr, n_occ) eb_ruptures.append(ebr) dt = time.time() - t0 calc_times[src.id] += numpy.array([nr, src.nsites, dt]) rup_array = get_rup_array(eb_ruptures, srcfilter) yield AccumDict(dict(rup_array=rup_array, calc_times=calc_times, eff_ruptures={grp_id: eff_ruptures}))
def pmap_from_grp(sources, source_site_filter, imtls, gsims, truncation_level=None, bbs=(), monitor=Monitor()): """ Compute the hazard curves for a set of sources belonging to the same tectonic region type for all the GSIMs associated to that TRT. The arguments are the same as in :func:`calc_hazard_curves`, except for ``gsims``, which is a list of GSIM instances. :returns: a ProbabilityMap instance """ if isinstance(sources, SourceGroup): group = sources sources = group.sources trt = sources[0].tectonic_region_type else: # list of sources trt = sources[0].tectonic_region_type group = SourceGroup(trt, sources, 'src_group', 'indep', 'indep') try: maxdist = source_site_filter.integration_distance[trt] except: maxdist = source_site_filter.integration_distance if hasattr(gsims, 'keys'): # dictionary trt -> gsim gsims = [gsims[trt]] with GroundShakingIntensityModel.forbid_instantiation(): imtls = DictArray(imtls) cmaker = ContextMaker(gsims, maxdist) ctx_mon = monitor('making contexts', measuremem=False) pne_mon = monitor('computing poes', measuremem=False) disagg_mon = monitor('get closest points', measuremem=False) src_indep = group.src_interdep == 'indep' pmap = ProbabilityMap(len(imtls.array), len(gsims)) pmap.calc_times = [] # pairs (src_id, delta_t) pmap.grp_id = sources[0].src_group_id for src, s_sites in source_site_filter(sources): t0 = time.time() poemap = poe_map(src, s_sites, imtls, cmaker, truncation_level, bbs, group.rup_interdep == 'indep', ctx_mon, pne_mon, disagg_mon) if src_indep: # usual composition of probabilities pmap |= poemap else: # mutually exclusive probabilities weight = float(group.srcs_weights[src.source_id]) for sid in poemap: pmap[sid] += poemap[sid] * weight pmap.calc_times.append( (src.source_id, len(s_sites), time.time() - t0)) # storing the number of contributing ruptures too pmap.eff_ruptures = {pmap.grp_id: pne_mon.counts} return pmap
def get_hazard(self, rlzs_assoc, monitor=Monitor()): """ :param rlzs_assoc: :class:`openquake.commonlib.source.RlzsAssoc` instance :param monitor: a :class:`openquake.baselib.performance.Monitor` instance :returns: list of hazard dictionaries imt -> rlz -> haz per each site """ return [{ self.imt: rlzs_assoc.combine(hazard) } for hazard in self.hazard_by_site]
def disaggregate(self, sitecol, ruptures, iml4, truncnorm, epsilons, monitor=Monitor()): """ Disaggregate (separate) PoE of `imldict` in different contributions each coming from `n_epsilons` distribution bins. :param sitecol: a SiteCollection :param ruptures: an iterator over ruptures with the same TRT :param iml4: a 4d array of IMLs of shape (N, R, M, P) :param truncnorm: an instance of scipy.stats.truncnorm :param epsilons: the epsilon bins :param monitor: a Monitor instance :returns: an AccumDict """ sitemesh = sitecol.mesh acc = AccumDict(accum=[]) ctx_mon = monitor('make_contexts', measuremem=False) pne_mon = monitor('disaggregate_pne', measuremem=False) for rupture in ruptures: with ctx_mon: sctx, rctx, orig_dctx = self.make_contexts(sitecol, rupture, filter=False) if (self.maximum_distance and orig_dctx.rjb.min() > self.maximum_distance( rupture.tectonic_region_type, rupture.mag)): continue # rupture away from all sites cache = {} for r, gsim in self.gsim_by_rlzi.items(): dctx = orig_dctx.roundup(gsim.minimum_distance) for m, imt in enumerate(iml4.imts): for p, poe in enumerate(iml4.poes_disagg): iml = tuple(iml4.array[:, r, m, p]) try: pne = cache[gsim, imt, iml] except KeyError: with pne_mon: pne = gsim.disaggregate_pne( rupture, sctx, rctx, dctx, imt, iml, truncnorm, epsilons) cache[gsim, imt, iml] = pne acc[poe, str(imt), r].append(pne) closest_points = rupture.surface.get_closest_points(sitemesh) acc['mags'].append(rupture.mag) acc['dists'].append(dctx.rjb) acc['lons'].append(closest_points.lons) acc['lats'].append(closest_points.lats) return acc
def main(dirname): dname = pathlib.Path(dirname) with hdf5new() as hdf5: # create a new datastore monitor = Monitor('count', hdf5) # create a new monitor iterargs = ((open(dname/fname, encoding='utf-8').read(),) for fname in os.listdir(dname) if fname.endswith('.rst')) # read the docs c = collections.Counter() # intially empty counter for counter in Starmap(count, iterargs, monitor): c += counter print(c) # total counts print('Performance info stored in', hdf5)
def disaggregate(self, sitecol, ruptures, iml4, truncnorm, epsilons, monitor=Monitor()): """ Disaggregate (separate) PoE of `imldict` in different contributions each coming from `n_epsilons` distribution bins. :param sitecol: a SiteCollection :param ruptures: an iterator over ruptures with the same TRT :param iml4: a 4d array of IMLs of shape (N, R, M, P) :param truncnorm: an instance of scipy.stats.truncnorm :param epsilons: the epsilon bins :param monitor: a Monitor instance :returns: an AccumDict with keys (poe, imt, rlzi) and mags, dists, lons, lats """ acc = AccumDict(accum=[]) ctx_mon = monitor('disagg_contexts', measuremem=False) pne_mon = monitor('disaggregate_pne', measuremem=False) clo_mon = monitor('get_closest', measuremem=False) for rupture in ruptures: with ctx_mon: orig_dctx = DistancesContext( (param, get_distances(rupture, sitecol, param)) for param in self.REQUIRES_DISTANCES) self.add_rup_params(rupture) with clo_mon: # this is faster than computing orig_dctx closest_points = rupture.surface.get_closest_points(sitecol) cache = {} for r, gsim in self.gsim_by_rlzi.items(): dctx = orig_dctx.roundup(gsim.minimum_distance) for m, imt in enumerate(iml4.imts): for p, poe in enumerate(iml4.poes_disagg): iml = tuple(iml4.array[:, r, m, p]) try: pne = cache[gsim, imt, iml] except KeyError: with pne_mon: pne = gsim.disaggregate_pne( rupture, sitecol, dctx, imt, iml, truncnorm, epsilons) cache[gsim, imt, iml] = pne acc[poe, str(imt), r].append(pne) acc['mags'].append(rupture.mag) acc['dists'].append(getattr(dctx, self.filter_distance)) acc['lons'].append(closest_points.lons) acc['lats'].append(closest_points.lats) return acc
def __init__(self, trt, gsims, param=None, monitor=Monitor()): param = param or {} # empty in the gmpe-smtk self.af = param.get('af', None) self.max_sites_disagg = param.get('max_sites_disagg', 10) self.collapse_level = param.get('collapse_level', False) self.trt = trt self.gsims = gsims self.single_site_opt = numpy.array( [hasattr(gsim, 'get_mean_std1') for gsim in gsims]) self.maximum_distance = (param.get('maximum_distance') or MagDepDistance({})) self.investigation_time = param.get('investigation_time') self.trunclevel = param.get('truncation_level') self.num_epsilon_bins = param.get('num_epsilon_bins', 1) self.grp_id = param.get('grp_id', 0) self.effect = param.get('effect') self.task_no = getattr(monitor, 'task_no', 0) for req in self.REQUIRES: reqset = set() for gsim in gsims: reqset.update(getattr(gsim, 'REQUIRES_' + req)) setattr(self, 'REQUIRES_' + req, reqset) # self.pointsource_distance is a dict mag -> dist, possibly empty psd = param.get('pointsource_distance') if hasattr(psd, 'ddic'): self.pointsource_distance = psd.ddic.get(trt, {}) else: self.pointsource_distance = {} if 'imtls' in param: self.imtls = param['imtls'] elif 'hazard_imtls' in param: self.imtls = DictArray(param['hazard_imtls']) else: self.imtls = {} self.imts = [imt_module.from_string(imt) for imt in self.imtls] self.reqv = param.get('reqv') if self.reqv is not None: self.REQUIRES_DISTANCES.add('repi') self.mon = monitor self.ctx_mon = monitor('make_contexts', measuremem=False) self.loglevels = DictArray(self.imtls) if self.imtls else {} self.shift_hypo = param.get('shift_hypo') with warnings.catch_warnings(): # avoid RuntimeWarning: divide by zero encountered in log warnings.simplefilter("ignore") for imt, imls in self.imtls.items(): if imt != 'MMI': self.loglevels[imt] = numpy.log(imls) # instantiate monitors self.gmf_mon = monitor('computing mean_std', measuremem=False) self.poe_mon = monitor('get_poes', measuremem=False)
def get_hazard(self, rlzs_assoc, monitor=Monitor()): """ :param rlzs_assoc: :class:`openquake.commonlib.source.RlzsAssoc` instance :param monitor: a :class:`openquake.baselib.performance.Monitor` instance :returns: lists of N hazard dictionaries imt -> rlz -> Gmvs """ gmfcoll = create(GmfCollector, self.ses_ruptures, self.sitecol, self.imts, rlzs_assoc, self.trunc_level, self.correl_model, self.min_iml, monitor) return gmfcoll
def disaggregate(cmaker, sitecol, rupdata, iml2, truncnorm, epsilons, monitor=Monitor()): """ Disaggregate (separate) PoE in different contributions. :param cmaker: a ContextMaker instance :param sitecol: a SiteCollection with N=1 site :param ruptures: an iterator over ruptures with the same TRT :param iml2: a 2D array of IMLs of shape (M, P) :param truncnorm: an instance of scipy.stats.truncnorm :param epsilons: the epsilon bins :param monitor: a Monitor instance :returns: an AccumDict with keys (poe, imt, rlzi) and mags, dists, lons, lats """ assert len(sitecol) == 1, sitecol acc = AccumDict(accum=[], mags=[], dists=[], lons=[], lats=[]) try: gsim = cmaker.gsim_by_rlzi[iml2.rlzi] except KeyError: return acc pne_mon = monitor('disaggregate_pne', measuremem=False) [sid] = sitecol.sids acc['mags'] = rupdata['mag'] acc['lons'] = rupdata['lon'][:, sid] acc['lats'] = rupdata['lat'][:, sid] acc['dists'] = dists = rupdata[cmaker.filter_distance][:, sid] if gsim.minimum_distance: dists[dists < gsim.minimum_distance] = gsim.minimum_distance # compute epsilon bin contributions only once eps_bands = truncnorm.cdf(epsilons[1:]) - truncnorm.cdf(epsilons[:-1]) for rec in rupdata: rctx = contexts.RuptureContext(rec) dctx = contexts.DistancesContext( (param, rec[param][[sid]]) for param in cmaker.REQUIRES_DISTANCES).roundup( gsim.minimum_distance) for m, imt in enumerate(iml2.imts): for p, poe in enumerate(iml2.poes_disagg): iml = iml2[m, p] with pne_mon: pne = disaggregate_pne(gsim, rctx, sitecol, dctx, imt, iml, truncnorm, epsilons, eps_bands) acc[poe, str(imt), iml2.rlzi].append(pne) return acc
def get_calc(self, testfile, job_ini, **kw): """ Return the outputs of the calculation as a dictionary """ self.testdir = os.path.dirname(testfile) if os.path.isfile(testfile) \ else testfile inis = [os.path.join(self.testdir, ini) for ini in job_ini.split(',')] params = readinput.get_params(inis) params.update(kw) oq = oqvalidation.OqParam(**params) oq.validate() # change this when debugging the test monitor = Monitor(self.testdir) return base.calculators(oq, monitor)
class BaseCalculator(metaclass=abc.ABCMeta): """ Abstract base class for all calculators. :param oqparam: OqParam object :param monitor: monitor object :param calc_id: numeric calculation ID """ precalc = None accept_precalc = [] from_engine = False # set by engine.run_calc is_stochastic = False # True for scenario and event based calculators def __init__(self, oqparam, calc_id=None): self.datastore = datastore.DataStore(calc_id) self._monitor = Monitor( '%s.run' % self.__class__.__name__, measuremem=True) self.oqparam = oqparam if 'performance_data' not in self.datastore: self.datastore.create_dset('performance_data', perf_dt) def monitor(self, operation='', **kw): """ :returns: a new Monitor instance """ mon = self._monitor(operation, hdf5=self.datastore.hdf5) self._monitor.calc_id = mon.calc_id = self.datastore.calc_id vars(mon).update(kw) return mon def save_params(self, **kw): """ Update the current calculation parameters and save engine_version """ if ('hazard_calculation_id' in kw and kw['hazard_calculation_id'] is None): del kw['hazard_calculation_id'] vars(self.oqparam).update(**kw) self.datastore['oqparam'] = self.oqparam # save the updated oqparam attrs = self.datastore['/'].attrs attrs['engine_version'] = engine_version attrs['date'] = datetime.now().isoformat()[:19] if 'checksum32' not in attrs: attrs['checksum32'] = readinput.get_checksum32(self.oqparam) self.datastore.flush() def check_precalc(self, precalc_mode): """ Defensive programming against users providing an incorrect pre-calculation ID (with ``--hazard-calculation-id``). :param precalc_mode: calculation_mode of the previous calculation """ calc_mode = self.oqparam.calculation_mode ok_mode = self.accept_precalc if calc_mode != precalc_mode and precalc_mode not in ok_mode: raise InvalidCalculationID( 'In order to run a calculation of kind %r, ' 'you need to provide a calculation of kind %r, ' 'but you provided a %r instead' % (calc_mode, ok_mode, precalc_mode)) def run(self, pre_execute=True, concurrent_tasks=None, close=True, **kw): """ Run the calculation and return the exported outputs. """ with self._monitor: self._monitor.username = kw.get('username', '') self._monitor.hdf5 = self.datastore.hdf5 if concurrent_tasks is None: # use the job.ini parameter ct = self.oqparam.concurrent_tasks else: # used the parameter passed in the command-line ct = concurrent_tasks if ct == 0: # disable distribution temporarily oq_distribute = os.environ.get('OQ_DISTRIBUTE') os.environ['OQ_DISTRIBUTE'] = 'no' if ct != self.oqparam.concurrent_tasks: # save the used concurrent_tasks self.oqparam.concurrent_tasks = ct self.save_params(**kw) try: if pre_execute: self.pre_execute() self.result = self.execute() if self.result is not None: self.post_execute(self.result) self.before_export() self.export(kw.get('exports', '')) except Exception: if kw.get('pdb'): # post-mortem debug tb = sys.exc_info()[2] traceback.print_tb(tb) pdb.post_mortem(tb) else: logging.critical('', exc_info=True) raise finally: # cleanup globals if ct == 0: # restore OQ_DISTRIBUTE if oq_distribute is None: # was not set del os.environ['OQ_DISTRIBUTE'] else: os.environ['OQ_DISTRIBUTE'] = oq_distribute readinput.pmap = None readinput.exposure = None readinput.gmfs = None readinput.eids = None self._monitor.flush() if close: # in the engine we close later self.result = None try: self.datastore.close() except (RuntimeError, ValueError): # sometimes produces errors but they are difficult to # reproduce logging.warning('', exc_info=True) return getattr(self, 'exported', {}) def core_task(*args): """ Core routine running on the workers. """ raise NotImplementedError @abc.abstractmethod def pre_execute(self): """ Initialization phase. """ @abc.abstractmethod def execute(self): """ Execution phase. Usually will run in parallel the core function and return a dictionary with the results. """ @abc.abstractmethod def post_execute(self, result): """ Post-processing phase of the aggregated output. It must be overridden with the export code. It will return a dictionary of output files. """ def export(self, exports=None): """ Export all the outputs in the datastore in the given export formats. Individual outputs are not exported if there are multiple realizations. """ self.exported = getattr(self.precalc, 'exported', {}) if isinstance(exports, tuple): fmts = exports elif exports: # is a string fmts = exports.split(',') elif isinstance(self.oqparam.exports, tuple): fmts = self.oqparam.exports else: # is a string fmts = self.oqparam.exports.split(',') keys = set(self.datastore) has_hcurves = ('hcurves-stats' in self.datastore or 'hcurves-rlzs' in self.datastore) if has_hcurves: keys.add('hcurves') for fmt in fmts: if not fmt: continue for key in sorted(keys): # top level keys if 'rlzs' in key and self.R > 1: continue # skip individual curves self._export((key, fmt)) if has_hcurves and self.oqparam.hazard_maps: self._export(('hmaps', fmt)) if has_hcurves and self.oqparam.uniform_hazard_spectra: self._export(('uhs', fmt)) def _export(self, ekey): if ekey not in exp or self.exported.get(ekey): # already exported return with self.monitor('export'): try: self.exported[ekey] = fnames = exp(ekey, self.datastore) except Exception as exc: fnames = [] logging.error('Could not export %s: %s', ekey, exc) if fnames: logging.info('exported %s: %s', ekey[0], fnames) def before_export(self): """ Set the attributes nbytes """ # sanity check that eff_ruptures have been set, i.e. are not -1 try: csm_info = self.datastore['csm_info'] except KeyError: csm_info = self.datastore['csm_info'] = self.csm.info for sm in csm_info.source_models: for sg in sm.src_groups: assert sg.eff_ruptures != -1, sg for key in self.datastore: self.datastore.set_nbytes(key) self.datastore.flush()