def _do_run_calc(calc, exports): """ Step through all of the phases of a calculation, updating the job status at each phase. :param calc: An :class:`~openquake.engine.calculators.base.Calculator` instance. :param exports: a (potentially empty) comma-separated string of export targets """ job = calc.job log_status(job, "pre_executing") calc.pre_execute() log_status(job, "executing") calc.execute() log_status(job, "post_executing") calc.post_execute() log_status(job, "post_processing") calc.post_process() log_status(job, "export") calc.export(exports=exports) log_status(job, "clean_up") calc.clean_up() CacheInserter.flushall() # flush caches into the db log_status(job, "complete")
def _save_uhs(job, uhs_results, poe, rlz=None, statistics=None, quantile=None): """ Save computed UHS data to the DB. UHS results can be either for an end branch or for mean or quantile statistics. :param job: :class:`openquake.engine.db.models.OqJob` instance to be associated with the results. :param uhs_results: UHS computation results structured like the output of :func:`make_uhs`. :param float poe: Probability of exceedance of the hazard maps from which these UH Spectra were produced. :param rlz: :class:`openquake.engine.db.models.LtRealization`. Specify only if these results are for an end branch. :param statistics: 'mean' or 'quantile'. Specify only if these are statistical results. :param float quantile: Specify only if ``statistics`` == 'quantile'. """ output = models.Output( oq_job=job, owner=job.owner, output_type='uh_spectra' ) uhs = models.UHS( poe=poe, investigation_time=job.hazard_calculation.investigation_time, periods=uhs_results['periods'], ) if rlz is not None: uhs.lt_realization = rlz output.display_name = _UHS_DISP_NAME_FMT % dict(poe=poe, rlz=rlz.id) elif statistics is not None: uhs.statistics = statistics if statistics == 'quantile': uhs.quantile = quantile output.display_name = (_UHS_DISP_NAME_QUANTILE_FMT % dict(poe=poe, quantile=quantile)) else: # mean output.display_name = _UHS_DISP_NAME_MEAN_FMT % dict(poe=poe) output.save() uhs.output = output # This should fail if neither `lt_realization` nor `statistics` is defined: uhs.save() with transaction.commit_on_success(using='reslt_writer'): inserter = CacheInserter(models.UHSData, CURVE_CACHE_SIZE) for lon, lat, imls in uhs_results['uh_spectra']: inserter.add( models.UHSData( uhs_id=uhs.id, imls='{%s}' % ','.join(str(x) for x in imls), location='POINT(%s %s)' % (lon, lat)) ) inserter.flush()
def _do_run_calc(calc, exports): """ Step through all of the phases of a calculation, updating the job status at each phase. :param calc: An :class:`~openquake.engine.calculators.base.Calculator` instance. :param exports: a (potentially empty) comma-separated string of export targets """ job = calc.job log_status(job, "pre_executing") calc.pre_execute() log_status(job, "executing") result = calc.execute() log_status(job, "post_executing") calc.post_execute(result) log_status(job, "post_processing") calc.post_process() log_status(job, "export") calc.export(exports=exports) log_status(job, "clean_up") calc.clean_up() CacheInserter.flushall() # flush caches into the db log_status(job, "complete")
def wrapped(*args): """ Initialize logs, make sure the job is still running, and run the task code surrounded by a try-except. If any error occurs, log it as a critical failure. """ # the last argument is assumed to be a monitor monitor = args[-1] job = models.OqJob.objects.get(id=monitor.job_id) if job.is_running is False: # the job was killed, it is useless to run the task raise JobNotRunning(monitor.job_id) # it is important to save the task id soon, so that # the revoke functionality can work with monitor("storing task id", task=tsk, autoflush=True): pass with logs.handle(job): check_mem_usage() # warn if too much memory is used # run the task try: total = "total " + task_func.__name__ with monitor(total, task=tsk): with GroundShakingIntensityModel.forbid_instantiation(): return task_func(*args) finally: # save on the db CacheInserter.flushall() # the task finished, we can remove from the performance # table the associated row 'storing task id' models.Performance.objects.filter( oq_job=job, operation="storing task id", task_id=tsk.request.id ).delete()
def wrapped(*args): """ Initialize logs, make sure the job is still running, and run the task code surrounded by a try-except. If any error occurs, log it as a critical failure. """ # job_id is always assumed to be the first argument job_id = args[0] job = models.OqJob.objects.get(id=job_id) if job.is_running is False: # the job was killed, it is useless to run the task raise JobNotRunning(job_id) # it is important to save the task id soon, so that # the revoke functionality can work EnginePerformanceMonitor.store_task_id(job_id, tsk) with EnginePerformanceMonitor( 'total ' + task_func.__name__, job_id, tsk, flush=True): # tasks write on the celery log file logs.set_level(job.log_level) check_mem_usage() # log a warning if too much memory is used try: # run the task return task_func(*args) finally: # save on the db CacheInserter.flushall() # the task finished, we can remove from the performance # table the associated row 'storing task id' models.Performance.objects.filter( oq_job=job, operation='storing task id', task_id=tsk.request.id).delete()
def wrapped(*args): """ Initialize logs, make sure the job is still running, and run the task code surrounded by a try-except. If any error occurs, log it as a critical failure. """ # the last argument is assumed to be a monitor monitor = args[-1] job = models.OqJob.objects.get(id=monitor.job_id) if job.is_running is False: # the job was killed, it is useless to run the task raise JobNotRunning(monitor.job_id) # it is important to save the task id soon, so that # the revoke functionality can work with monitor('storing task id', task=tsk, autoflush=True): pass with logs.handle(job): check_mem_usage() # warn if too much memory is used # run the task try: total = 'total ' + task_func.__name__ with monitor(total, task=tsk, autoflush=True): return task_func(*args) finally: # save on the db CacheInserter.flushall() # the task finished, we can remove from the performance # table the associated row 'storing task id' models.Performance.objects.filter( oq_job=job, operation='storing task id', task_id=tsk.request.id).delete()
def wrapped(*args): """ Initialize logs, make sure the job is still running, and run the task code surrounded by a try-except. If any error occurs, log it as a critical failure. """ # job_id is always assumed to be the first argument job_id = args[0] job = models.OqJob.objects.get(id=job_id) if job.is_running is False: # the job was killed, it is useless to run the task return # it is important to save the task id soon, so that # the revoke functionality can work EnginePerformanceMonitor.store_task_id(job_id, tsk) with EnginePerformanceMonitor( 'total ' + task_func.__name__, job_id, tsk, flush=True): with EnginePerformanceMonitor( 'loading calculation object', job_id, tsk, flush=True): calculation = job.calculation # tasks write on the celery log file logs.init_logs( level=job.log_level, calc_domain='hazard' if isinstance( calculation, models.HazardCalculation) else'risk', calc_id=calculation.id) try: return task_func(*args), None except: etype, exc, tb = sys.exc_info() tb_str = ''.join(traceback.format_tb(tb)) return '%s\n%s' % (exc, tb_str), etype finally: CacheInserter.flushall() # the task finished, we can remove from the performance # table the associated row 'storing task id' models.Performance.objects.filter( oq_job=job, operation='storing task id', task_id=tsk.request.id).delete()
def _do_run_calc(job, exports, calc, job_type): """ Step through all of the phases of a calculation, updating the job status at each phase. :param job: An :class:`~openquake.engine.db.models.OqJob` instance. :param list exports: a (potentially empty) list of export targets, currently only "xml" is supported :returns: The input job object when the calculation completes. """ _switch_to_job_phase(job, job_type, "pre_executing") calc.progress_handler("pre_executing", calc.hc) calc.pre_execute() _switch_to_job_phase(job, job_type, "executing") calc.progress_handler("executing", calc.hc) calc.execute() _switch_to_job_phase(job, job_type, "post_executing") calc.progress_handler("post_executing", calc.hc) calc.post_execute() _switch_to_job_phase(job, job_type, "post_processing") calc.progress_handler("post_processing", calc.hc) calc.post_process() _switch_to_job_phase(job, job_type, "export") calc.export(exports=exports) _switch_to_job_phase(job, job_type, "clean_up") calc.clean_up() CacheInserter.flushall() # flush caches into the db _switch_to_job_phase(job, job_type, "complete") calc.progress_handler("calculation complete", calc.hc) logs.LOG.debug("*> complete") return job
def test_insert_gmf(self): cache = CacheInserter(GmfData, 10) gmf1 = GmfData( gmf_id=1, imt='PGA', gmvs=[], rupture_ids=[], site_id=1) gmf2 = GmfData( gmf_id=1, imt='PGA', gmvs=[], rupture_ids=[], site_id=2) cache.add(gmf1) cache.add(gmf2) cache.flush() connection = writer.connections['job_init'] self.assertEqual( connection.data, '1\t\\N\tPGA\t\\N\t\\N\t{}\t{}\t1\n1\t\\N\tPGA\t\\N\t\\N\t{}\t{}\t2\n') self.assertEqual(connection.table, '"hzrdr"."gmf_data"') self.assertEqual( connection.columns, ['gmf_id', 'task_no', 'imt', 'sa_period', 'sa_damping', 'gmvs', 'rupture_ids', 'site_id'])
def _do_run_calc(job, exports, calc, job_type): """ Step through all of the phases of a calculation, updating the job status at each phase. :param job: An :class:`~openquake.engine.db.models.OqJob` instance. :param list exports: a (potentially empty) list of export targets, currently only "xml" is supported :returns: The input job object when the calculation completes. """ # - Run the calculation _switch_to_job_phase(job, job_type, "pre_executing") calc.pre_execute() _switch_to_job_phase(job, job_type, "executing") calc.execute() _switch_to_job_phase(job, job_type, "post_executing") calc.post_execute() _switch_to_job_phase(job, job_type, "post_processing") calc.post_process() _switch_to_job_phase(job, job_type, "export") calc.export(exports=exports) _switch_to_job_phase(job, job_type, "clean_up") calc.clean_up() CacheInserter.flushall() # flush caches into the db _switch_to_job_phase(job, job_type, "complete") logs.LOG.debug("*> complete") return job
def _do_run_calc(calc, exports, job_type): """ Step through all of the phases of a calculation, updating the job status at each phase. :param calc: An :class:`~openquake.engine.calculators.base.Calculator` instance. :param list exports: a (potentially empty) list of export targets, currently only "xml" is supported :param str job_type: calculation type (hazard|risk) """ job = calc.job _switch_to_job_phase(job, job_type, "pre_executing") calc.pre_execute() _switch_to_job_phase(job, job_type, "executing") calc.execute() _switch_to_job_phase(job, job_type, "post_executing") calc.post_execute() _switch_to_job_phase(job, job_type, "post_processing") calc.post_process() _switch_to_job_phase(job, job_type, "export") calc.export(exports=exports) _switch_to_job_phase(job, job_type, "clean_up") calc.clean_up() CacheInserter.flushall() # flush caches into the db _switch_to_job_phase(job, job_type, "complete") logs.LOG.debug("*> complete")
def _save_uhs(job, uhs_results, poe, rlz=None, statistics=None, quantile=None): """ Save computed UHS data to the DB. UHS results can be either for an end branch or for mean or quantile statistics. :param job: :class:`openquake.engine.db.models.OqJob` instance to be associated with the results. :param uhs_results: UHS computation results structured like the output of :func:`make_uhs`. :param float poe: Probability of exceedance of the hazard maps from which these UH Spectra were produced. :param rlz: :class:`openquake.engine.db.models.LtRealization`. Specify only if these results are for an end branch. :param statistics: 'mean' or 'quantile'. Specify only if these are statistical results. :param float quantile: Specify only if ``statistics`` == 'quantile'. """ output = models.Output(oq_job=job, output_type='uh_spectra') uhs = models.UHS( poe=poe, investigation_time=job.hazard_calculation.investigation_time, periods=uhs_results['periods'], ) if rlz is not None: uhs.lt_realization = rlz output.display_name = _UHS_DISP_NAME_FMT % dict(poe=poe, rlz=rlz.id) elif statistics is not None: uhs.statistics = statistics if statistics == 'quantile': uhs.quantile = quantile output.display_name = (_UHS_DISP_NAME_QUANTILE_FMT % dict(poe=poe, quantile=quantile)) else: # mean output.display_name = _UHS_DISP_NAME_MEAN_FMT % dict(poe=poe) output.save() uhs.output = output # This should fail if neither `lt_realization` nor `statistics` is defined: uhs.save() with transaction.commit_on_success(using='reslt_writer'): inserter = CacheInserter(models.UHSData, CURVE_CACHE_SIZE) for lon, lat, imls in uhs_results['uh_spectra']: inserter.add( models.UHSData(uhs_id=uhs.id, imls='{%s}' % ','.join(str(x) for x in imls), location='POINT(%s %s)' % (lon, lat))) inserter.flush()
def test_insert_gmf(self): cache = CacheInserter(GmfData, 10) gmf1 = GmfData(gmf_id=1, imt='PGA', gmvs=[], rupture_ids=[], site_id=1) gmf2 = GmfData(gmf_id=1, imt='PGA', gmvs=[], rupture_ids=[], site_id=2) cache.add(gmf1) cache.add(gmf2) cache.flush() connection = writer.connections['reslt_writer'] self.assertEqual( connection.data, '1\t\\N\tPGA\t\\N\t\\N\t{}\t{}\t1\n1\t\\N\tPGA\t\\N\t\\N\t{}\t{}\t2\n' ) self.assertEqual(connection.table, '"hzrdr"."gmf_data"') self.assertEqual(connection.columns, [ 'gmf_id', 'ses_id', 'imt', 'sa_period', 'sa_damping', 'gmvs', 'rupture_ids', 'site_id' ])
logs.init_logs_amqp_send(level=job.log_level, calc_domain='hazard', calc_id=calculation.id) else: logs.init_logs_amqp_send(level=job.log_level, calc_domain='risk', calc_id=calculation.id) try: # Tasks can be used in the `execute` or `post-process` phase if job.is_running is False: raise JobCompletedError('Job %d was killed' % job_id) elif job.status not in ('executing', 'post_processing'): raise JobCompletedError( 'The status of job %d is %s, should be executing or ' 'post_processing' % (job_id, job.status)) # else continue with task execution res = task_func(*args, **kwargs) # TODO: should we do something different with JobCompletedError? except Exception, err: logs.LOG.critical('Error occurred in task: %s', err) logs.LOG.exception(err) raise else: return res finally: CacheInserter.flushall() celery_queue = config.get('amqp', 'celery_queue') tsk = task(wrapped, ignore_result=True, queue=celery_queue) return tsk
class EnginePerformanceMonitor(PerformanceMonitor): """ Performance monitor specialized for the engine. It takes in input a string, a job_id, and a celery task; the on_exit method send the relevant info to the uiapi.performance table. For efficiency reasons the saving on the database is delayed and done in chunks of 1,000 rows each. That means that hundreds of concurrents task can log simultaneously on the uiapi.performance table without problems. You can save more often by calling the .cache.flush() method; it is automatically called for you by the oqtask decorator; it is also called at the end of the main engine process. """ # globals per process cache = CacheInserter(models.Performance, 1000) # store at most 1k objects pgpid = None pypid = None @classmethod def store_task_id(cls, job_id, task): with cls('storing task id', job_id, task, flush=True): pass @classmethod def monitor(cls, method): """ A decorator to add monitoring to calculator methods. The only constraints are: 1) the method has no arguments except self 2) there is an attribute self.job.id """ def newmeth(self): with cls(method.__name__, self.job.id, flush=True): return method(self) newmeth.__name__ = method.__name__ return newmeth def __init__(self, operation, job_id, task=None, tracing=False, profile_pymem=True, profile_pgmem=False, flush=False): self.operation = operation self.job_id = job_id if task: self.task = task self.task_id = task.request.id else: self.task = None self.task_id = None self.tracing = tracing self.profile_pymem = profile_pymem self.profile_pgmem = profile_pgmem self.flush = flush if self.profile_pymem and self.pypid is None: self.__class__.pypid = os.getpid() if self.profile_pgmem and self.pgpid is None: # this may be slow pgpid = connections['job_init'].cursor().\ connection.get_backend_pid() try: psutil.Process(pgpid) except psutil.error.NoSuchProcess: # db on a different machine pass else: self.__class__.pgpid = pgpid if tracing: self.tracer = logs.tracing(operation) super(EnginePerformanceMonitor, self).__init__([self.pypid, self.pgpid]) def copy(self, operation): """ Return a copy of the monitor usable for a different operation in the same task. """ return self.__class__(operation, self.job_id, self.task, self.tracing, self.profile_pymem, self.profile_pgmem) def on_exit(self): """ Save the memory consumption on the uiapi.performance table. """ n_measures = len(self.mem) if n_measures == 2: pymemory, pgmemory = self.mem elif n_measures == 1: pymemory, = self.mem pgmemory = None elif n_measures == 0: # profile_pymem was False pymemory = pgmemory = None else: raise ValueError('Got %d memory measurements, must be <= 2' % n_measures) if self.exc is None: # save only valid calculations perf = models.Performance(oq_job_id=self.job_id, task_id=self.task_id, task=getattr(self.task, '__name__', None), operation=self.operation, start_time=self.start_time, duration=self.duration, pymemory=pymemory, pgmemory=pgmemory) self.cache.add(perf) if self.flush: self.cache.flush() def __enter__(self): super(EnginePerformanceMonitor, self).__enter__() if self.tracing: self.tracer.__enter__() return self def __exit__(self, etype, exc, tb): super(EnginePerformanceMonitor, self).__exit__(etype, exc, tb) if self.tracing: self.tracer.__exit__(etype, exc, tb)
# Set up logging via amqp. if isinstance(calculation, models.HazardCalculation): logs.init_logs_amqp_send(level=job.log_level, calc_domain='hazard', calc_id=calculation.id) else: logs.init_logs_amqp_send(level=job.log_level, calc_domain='risk', calc_id=calculation.id) try: res = task_func(*args, **kwargs) except Exception, err: logs.LOG.critical('Error occurred in task: %s', err) logs.LOG.exception(err) raise else: return res finally: CacheInserter.flushall() # the task finished, we can remove from the performance # table the associated row 'storing task id', then the # supervisor will not try revoke it without need models.Performance.objects.filter( oq_job=job, operation='storing task id', task_id=tsk.request.id).delete() celery_queue = config.get('amqp', 'celery_queue') tsk = task(wrapped, ignore_result=True, queue=celery_queue) return tsk