def compute_gmfs(job_id, sites, rupture_id, gmfcoll_id, realizations): """ Compute ground motion fields and store them in the db. :param job_id: ID of the currently running job. :param sites: The subset of the full SiteCollection scanned by this task :param rupture_id: The parsed rupture model from which we will generate ground motion fields. :param gmfcoll_id: the id of a :class:`openquake.engine.db.models.Gmf` record :param realizations: Number of realizations to create. """ hc = models.HazardCalculation.objects.get(oqjob=job_id) rupture_mdl = source.nrml_to_hazardlib( models.ParsedRupture.objects.get(id=rupture_id).nrml, hc.rupture_mesh_spacing, None, None) imts = [haz_general.imt_to_hazardlib(x) for x in hc.intensity_measure_types] gsim = AVAILABLE_GSIMS[hc.gsim]() # instantiate the GSIM class correlation_model = haz_general.get_correl_model(hc) with EnginePerformanceMonitor('computing gmfs', job_id, gmfs): gmf = ground_motion_fields( rupture_mdl, sites, imts, gsim, hc.truncation_level, realizations=realizations, correlation_model=correlation_model) with EnginePerformanceMonitor('saving gmfs', job_id, gmfs): save_gmf(gmfcoll_id, gmf, sites)
def post_process(self): """ If requested, perform additional processing of GMFs to produce hazard curves. """ if self.hc.hazard_curves_from_gmfs: with EnginePerformanceMonitor('generating hazard curves', self.job.id): self.parallelize( post_processing.gmf_to_hazard_curve_task, post_processing.gmf_to_hazard_curve_arg_gen(self.job)) # If `mean_hazard_curves` is True and/or `quantile_hazard_curves` # has some value (not an empty list), do this additional # post-processing. if self.hc.mean_hazard_curves or self.hc.quantile_hazard_curves: with EnginePerformanceMonitor( 'generating mean/quantile curves', self.job.id): self.do_aggregate_post_proc() if self.hc.hazard_maps: with EnginePerformanceMonitor( 'generating hazard maps', self.job.id): self.parallelize( cls_post_proc.hazard_curves_to_hazard_map_task, cls_post_proc.hazard_curves_to_hazard_map_task_arg_gen( self.job))
def compute_gmf(job_id, params, imt, gsims, ses, site_coll, rupture_ids, rupture_seeds): """ Compute and save the GMFs for all the ruptures in a SES. """ imt = haz_general.imt_to_hazardlib(imt) with EnginePerformanceMonitor( 'reading ruptures', job_id, compute_gmf): ruptures = list(models.SESRupture.objects.filter(pk__in=rupture_ids)) with EnginePerformanceMonitor( 'computing gmfs', job_id, compute_gmf): gmvs_per_site, ruptures_per_site = _compute_gmf( params, imt, gsims, site_coll, ruptures, rupture_seeds) with EnginePerformanceMonitor('saving gmfs', job_id, compute_gmf): _save_gmfs(ses, imt, gmvs_per_site, ruptures_per_site, site_coll)
def wrapped(*args, **kwargs): """ Initialize logs, make sure the job is still running, and run the task code surrounded by a try-except. If any error occurs, log it as a critical failure. """ # job_id is always assumed to be the first argument passed to # the task, or a keyword argument # this is the only required argument job_id = kwargs.get('job_id') or args[0] job = models.OqJob.objects.get(id=job_id) if job.is_running is False: # the job was killed, it is useless to run the task return # it is important to save the task ids soon, so that # the revoke functionality implemented in supervisor.py can work EnginePerformanceMonitor.store_task_id(job_id, tsk) with EnginePerformanceMonitor('total ' + task_func.__name__, job_id, tsk, flush=True): with EnginePerformanceMonitor('loading calculation object', job_id, tsk, flush=True): calculation = job.calculation # Set up logging via amqp. if isinstance(calculation, models.HazardCalculation): logs.init_logs_amqp_send(level=job.log_level, calc_domain='hazard', calc_id=calculation.id) else: logs.init_logs_amqp_send(level=job.log_level, calc_domain='risk', calc_id=calculation.id) try: res = task_func(*args, **kwargs) except Exception, err: logs.LOG.critical('Error occurred in task: %s', err) logs.LOG.exception(err) raise else:
def test_engine_performance_monitor_no_task(self): job = engine.create_job() operation = str(uuid.uuid1()) with EnginePerformanceMonitor(operation, job.id) as pmon: pass self._check_result(pmon) pmon.flush() records = Performance.objects.filter(operation=operation) self.assertEqual(len(records), 1)
def test_engine_performance_monitor(self): job = engine.create_job() mock_task = mock.Mock() mock_task.__name__ = 'mock_task' mock_task.request.id = task_id = str(uuid.uuid1()) with EnginePerformanceMonitor('test', job.id, mock_task) as pmon: pass self._check_result(pmon) # check that one record was stored on the db, as it should pmon.flush() self.assertEqual(len(Performance.objects.filter(task_id=task_id)), 1)
def __init__(self, job, monitor=None): self.job = job self.oqparam = self.job.get_oqparam() self.monitor = monitor or EnginePerformanceMonitor('', job.id) self.num_tasks = None self._task_args = [] # parameters from openquake.cfg self.concurrent_tasks = int(config.get('celery', 'concurrent_tasks')) self.max_input_weight = float(config.get('hazard', 'max_input_weight')) self.max_output_weight = float( config.get('hazard', 'max_output_weight')) TrtModel.POINT_SOURCE_WEIGHT = float( config.get('hazard', 'point_source_weight'))
def run_calc(job, log_level, log_file, exports, lite=False): """ Run a calculation. :param job: :class:`openquake.engine.db.model.OqJob` instance :param str log_level: The desired logging level. Valid choices are 'debug', 'info', 'progress', 'warn', 'error', and 'critical'. :param str log_file: Complete path (including file name) to file where logs will be written. If `None`, logging will just be printed to standard output. :param exports: A comma-separated string of export types. :param lite: Flag set when the oq-lite calculators are used """ # let's import the calculator classes here, when they are needed # the reason is that the command `$ oq-engine --upgrade-db` # does not need them and would raise strange errors during installation # time if the PYTHONPATH is not set and commonlib is not visible if lite: from openquake.commonlib.calculators import base calculator = base.calculators(job.get_oqparam()) calculator.job = job calculator.monitor = EnginePerformanceMonitor('', job.id) else: from openquake.engine.calculators import calculators calculator = calculators(job) # first of all check the database version and exit if the db is outdated upgrader.check_versions(django_db.connections['admin']) with logs.handle(job, log_level, log_file), job_stats(job): # run the job try: _do_run_calc(calculator, exports) except: tb = traceback.format_exc() logs.LOG.critical(tb) raise return calculator
def profile(name): return EnginePerformanceMonitor(name, job_id, event_based, tracing=True)
def post_process(self): """ Compute aggregate loss curves and event loss tables """ with EnginePerformanceMonitor('post processing', self.job.id): time_span, tses = self.hazard_times() for loss_type, event_loss_table in self.event_loss_tables.items(): for hazard_output in self.rc.hazard_outputs(): event_loss = models.EventLoss.objects.create( output=models.Output.objects.create_output( self.job, "Event Loss Table. type=%s, hazard=%s" % (loss_type, hazard_output.id), "event_loss"), loss_type=loss_type, hazard_output=hazard_output) inserter = writer.CacheInserter(models.EventLossData, 9999) rupture_ids = models.SESRupture.objects.filter( ses__ses_collection__lt_realization=hazard_output. output_container.lt_realization).values_list('id', flat=True) for rupture_id in rupture_ids: if rupture_id in event_loss_table: inserter.add( models.EventLossData( event_loss_id=event_loss.id, rupture_id=rupture_id, aggregate_loss=event_loss_table[rupture_id] )) inserter.flush() aggregate_losses = [ event_loss_table[rupture_id] for rupture_id in rupture_ids if rupture_id in event_loss_table ] if aggregate_losses: aggregate_loss_losses, aggregate_loss_poes = ( scientific.event_based( aggregate_losses, tses=tses, time_span=time_span, curve_resolution=self.rc.loss_curve_resolution) ) models.AggregateLossCurveData.objects.create( loss_curve=models.LossCurve.objects.create( aggregate=True, insured=False, hazard_output=hazard_output, loss_type=loss_type, output=models.Output.objects.create_output( self.job, "aggregate loss curves. " "loss_type=%s hazard=%s" % (loss_type, hazard_output), "agg_loss_curve")), losses=aggregate_loss_losses, poes=aggregate_loss_poes, average_loss=scientific.average_loss( aggregate_loss_losses, aggregate_loss_poes), stddev_loss=numpy.std(aggregate_losses))
def profile(name): return EnginePerformanceMonitor(name, job_id, classical_bcr, tracing=True)
def profile(name): return EnginePerformanceMonitor(name, job_id, scenario_damage, tracing=True)
def compute_ses(job_id, src_ids, ses, src_seeds, ltp): """ Celery task for the stochastic event set calculator. Samples logic trees and calls the stochastic event set calculator. Once stochastic event sets are calculated, results will be saved to the database. See :class:`openquake.engine.db.models.SESCollection`. Optionally (specified in the job configuration using the `ground_motion_fields` parameter), GMFs can be computed from each rupture in each stochastic event set. GMFs are also saved to the database. :param int job_id: ID of the currently running job. :param src_ids: List of ids of parsed source models from which we will generate stochastic event sets/ruptures. :param ses: Stochastic Event Set object :param int src_seeds: Values for seeding numpy/scipy in the computation of stochastic event sets and ground motion fields from the sources :param ltp: a :class:`openquake.engine.input.LogicTreeProcessor` instance """ hc = models.HazardCalculation.objects.get(oqjob=job_id) lt_rlz = ses.ses_collection.lt_realization apply_uncertainties = ltp.parse_source_model_logictree_path( lt_rlz.sm_lt_path) # complete_logic_tree_ses flag cmplt_lt_ses = None if hc.complete_logic_tree_ses: cmplt_lt_ses = models.SES.objects.get( ses_collection__output__oq_job=job_id, ordinal=None) with EnginePerformanceMonitor( 'reading sources', job_id, compute_ses): sources = [apply_uncertainties(s.nrml) for s in models.ParsedSource.objects.filter(pk__in=src_ids)] # Compute and save stochastic event sets # For each rupture generated, we can optionally calculate a GMF with EnginePerformanceMonitor('computing ses', job_id, compute_ses): ruptures = [] for src_seed, src in zip(src_seeds, sources): # first set the seed for the specific source numpy.random.seed(src_seed) # then make copies of the hazardlib ruptures (which may contain # duplicates): the copy is needed to keep the tags distinct rupts = map(copy.copy, stochastic.stochastic_event_set_poissonian( [src], hc.investigation_time)) # set the tag for each copy for i, r in enumerate(rupts): r.tag = 'rlz=%02d|ses=%04d|src=%s|i=%03d' % ( lt_rlz.ordinal, ses.ordinal, src.source_id, i) ruptures.extend(rupts) if not ruptures: return with EnginePerformanceMonitor('saving ses', job_id, compute_ses): _save_ses_ruptures(ses, ruptures, cmplt_lt_ses)
def compute_hazard_curves(job_id, src_ids, lt_rlz_id, ltp): """ Celery task for hazard curve calculator. Samples logic trees, gathers site parameters, and calls the hazard curve calculator. Once hazard curve data is computed, result progress updated (within a transaction, to prevent race conditions) in the `htemp.hazard_curve_progress` table. Once all of this work is complete, a signal will be sent via AMQP to let the control node know that the work is complete. (If there is any work left to be dispatched, this signal will indicate to the control node that more work can be enqueued.) :param int job_id: ID of the currently running job. :param src_ids: List of ids of parsed source models to take into account. :param lt_rlz_id: Id of logic tree realization model to calculate for. :param ltp: a :class:`openquake.engine.input.LogicTreeProcessor` instance """ hc = models.HazardCalculation.objects.get(oqjob=job_id) lt_rlz = models.LtRealization.objects.get(id=lt_rlz_id) apply_uncertainties = ltp.parse_source_model_logictree_path( lt_rlz.sm_lt_path) gsims = ltp.parse_gmpe_logictree_path(lt_rlz.gsim_lt_path) parsed_sources = models.ParsedSource.objects.filter(pk__in=src_ids) imts = haz_general.im_dict_to_hazardlib( hc.intensity_measure_types_and_levels) # Prepare args for the calculator. calc_kwargs = { 'gsims': gsims, 'truncation_level': hc.truncation_level, 'time_span': hc.investigation_time, 'sources': [apply_uncertainties(s.nrml) for s in parsed_sources], 'imts': imts, 'sites': hc.site_collection } if hc.maximum_distance: dist = hc.maximum_distance # NB: a better approach could be to filter the sources by distance # at the beginning and to sore into the database only the relevant # sources, as we do in the event based calculator: I am not doing that # for the classical calculators because I wonder about the performance # impact in in SHARE-like calculations. So at the moment we store # everything in the database and we filter on the workers. This # will probably change in the future. calc_kwargs['source_site_filter'] = ( openquake.hazardlib.calc.filters.source_site_distance_filter(dist)) calc_kwargs['rupture_site_filter'] = ( openquake.hazardlib.calc.filters.rupture_site_distance_filter(dist) ) # mapping "imt" to 2d array of hazard curves: first dimension -- sites, # second -- IMLs with EnginePerformanceMonitor('computing hazard curves', job_id, hazard_curves, tracing=True): matrices = openquake.hazardlib.calc.hazard_curve.\ hazard_curves_poissonian(**calc_kwargs) with EnginePerformanceMonitor('saving hazard curves', job_id, hazard_curves, tracing=True): _update_curves(hc, matrices, lt_rlz, src_ids)
def compute_disagg(job_id, sites, lt_rlz_id, ltp): """ Calculate disaggregation histograms and saving the results to the database. Here is the basic calculation workflow: 1. Get all sources 2. Get IMTs 3. Get the hazard curve for each point, IMT, and realization 4. For each `poes_disagg`, interpolate the IML for each curve. 5. Get GSIMs, TOM (Temporal Occurence Model), and truncation level. 6. Get histogram bin edges. 7. Prepare calculation args. 8. Call the hazardlib calculator (see :func:`openquake.hazardlib.calc.disagg.disaggregation` for more info). :param int job_id: ID of the currently running :class:`openquake.engine.db.models.OqJob` :param list sites: `list` of :class:`openquake.hazardlib.site.Site` objects, which indicate the locations (and associated soil parameters) for which we need to compute disaggregation histograms. :param int lt_rlz_id: ID of the :class:`openquake.engine.db.models.LtRealization` for which we want to compute disaggregation histograms. This realization will determine which hazard curve results to use as a basis for the calculation. :param ltp: a :class:`openquake.engine.input.LogicTreeProcessor` instance """ # Silencing 'Too many local variables' # pylint: disable=R0914 logs.LOG.debug( '> computing disaggregation for %(np)s sites for realization %(rlz)s' % dict(np=len(sites), rlz=lt_rlz_id)) job = models.OqJob.objects.get(id=job_id) hc = job.hazard_calculation lt_rlz = models.LtRealization.objects.get(id=lt_rlz_id) apply_uncertainties = ltp.parse_source_model_logictree_path( lt_rlz.sm_lt_path) gsims = ltp.parse_gmpe_logictree_path(lt_rlz.gsim_lt_path) src_ids = models.SourceProgress.objects.filter(lt_realization=lt_rlz)\ .order_by('id').values_list('parsed_source_id', flat=True) sources = [ apply_uncertainties(s.nrml) for s in models.ParsedSource.objects.filter(pk__in=src_ids) ] # Make filters for distance to source and distance to rupture: # a better approach would be to filter the sources on distance # before, see the comment in the classical calculator src_site_filter = openquake.hazardlib.calc.filters.\ source_site_distance_filter(hc.maximum_distance) rup_site_filter = openquake.hazardlib.calc.filters.\ rupture_site_distance_filter(hc.maximum_distance) for imt, imls in hc.intensity_measure_types_and_levels.iteritems(): hazardlib_imt = haz_general.imt_to_hazardlib(imt) hc_im_type, sa_period, sa_damping = models.parse_imt(imt) imls = numpy.array(imls[::-1]) # loop over sites for site in sites: # get curve for this point/IMT/realization [curve] = models.HazardCurveData.objects.filter( location=site.location.wkt2d, hazard_curve__lt_realization=lt_rlz_id, hazard_curve__imt=hc_im_type, hazard_curve__sa_period=sa_period, hazard_curve__sa_damping=sa_damping, ) # If the hazard curve is all zeros, don't even do the # disagg calculation. if all([x == 0.0 for x in curve.poes]): logs.LOG.debug( '* hazard curve contained all 0 probability values; ' 'skipping') continue for poe in hc.poes_disagg: iml = numpy.interp(poe, curve.poes[::-1], imls) calc_kwargs = { 'sources': sources, 'site': site, 'imt': hazardlib_imt, 'iml': iml, 'gsims': gsims, 'time_span': hc.investigation_time, 'truncation_level': hc.truncation_level, 'n_epsilons': hc.num_epsilon_bins, 'mag_bin_width': hc.mag_bin_width, 'dist_bin_width': hc.distance_bin_width, 'coord_bin_width': hc.coordinate_bin_width, 'source_site_filter': src_site_filter, 'rupture_site_filter': rup_site_filter, } with EnginePerformanceMonitor('computing disaggregation', job_id, disagg_task): bin_edges, diss_matrix = openquake.hazardlib.calc.\ disagg.disaggregation_poissonian(**calc_kwargs) if not bin_edges: # no ruptures generated continue with EnginePerformanceMonitor('saving disaggregation', job_id, disagg_task): _save_disagg_matrix(job, site, bin_edges, diss_matrix, lt_rlz, hc.investigation_time, hc_im_type, iml, poe, sa_period, sa_damping) with transaction.commit_on_success(): # Update realiation progress, # mark realization as complete if it is done haz_general.update_realization(lt_rlz_id, len(sites)) logs.LOG.debug('< done computing disaggregation')
def setUpClass(cls): cfg = helpers.get_data_path( 'calculators/hazard/classical/haz_map_test_job2.ini') cls.job = helpers.run_job(cfg).job models.JobStats.objects.create(oq_job=cls.job) cls.monitor = EnginePerformanceMonitor('', cls.job.id)