def oqtask(task_func): """ Task function decorator which sets up logging and catches (and logs) any errors which occur inside the task. Also checks to make sure the job is actually still running. If it is not running, the task doesn't get executed, so we don't do useless computation. :param task_func: the function to decorate """ def wrapped(*args): """ Initialize logs, make sure the job is still running, and run the task code surrounded by a try-except. If any error occurs, log it as a critical failure. """ # the last argument is assumed to be a monitor monitor = args[-1] job = models.OqJob.objects.get(id=monitor.job_id) if job.is_running is False: # the job was killed, it is useless to run the task raise JobNotRunning(monitor.job_id) # it is important to save the task id soon, so that # the revoke functionality can work with monitor("storing task id", task=tsk, autoflush=True): pass with logs.handle(job): check_mem_usage() # warn if too much memory is used # run the task try: total = "total " + task_func.__name__ with monitor(total, task=tsk): with GroundShakingIntensityModel.forbid_instantiation(): return task_func(*args) finally: # save on the db CacheInserter.flushall() # the task finished, we can remove from the performance # table the associated row 'storing task id' models.Performance.objects.filter( oq_job=job, operation="storing task id", task_id=tsk.request.id ).delete() celery_queue = config.get("amqp", "celery_queue") f = lambda *args: safely_call(wrapped, args, pickle=True) f.__name__ = task_func.__name__ f.__module__ = task_func.__module__ tsk = task(f, queue=celery_queue) tsk.__func__ = tsk tsk.task_func = task_func return tsk
def run_calc(request): """ Run a calculation. :param request: a `django.http.HttpRequest` object. """ hazard_job_id = request.POST.get('hazard_job_id') if hazard_job_id: candidates = ("job_risk.ini", "job.ini") else: candidates = ("job_hazard.ini", "job_haz.ini", "job.ini") einfo, exctype, monitor = safely_call(_prepare_job, (request, hazard_job_id, candidates)) if exctype: return HttpResponse(json.dumps(einfo.splitlines()), content_type=JSON, status=500) if not einfo: msg = 'Could not find any file of the form %s' % str(candidates) logging.error(msg) return HttpResponse(content=json.dumps([msg]), content_type=JSON, status=500) temp_dir = os.path.dirname(einfo[0]) user = utils.get_user_data(request) try: job_id, _fut = submit_job(einfo[0], temp_dir, user['name'], hazard_job_id) except Exception as exc: # no job created, for instance missing .xml file # get the exception message exc_msg = exc.args[0] if isinstance(exc_msg, bytes): exc_msg = exc_msg.decode('utf-8') # make it a unicode object else: assert isinstance(exc_msg, unicode), exc_msg logging.error(exc_msg) response_data = exc_msg.splitlines() status = 500 else: calc = oqe_models.OqJob.objects.get(pk=job_id) response_data = vars(calc.get_oqparam()) response_data['job_id'] = job_id response_data['status'] = calc.status status = 200 return HttpResponse(content=json.dumps(response_data), content_type=JSON, status=status)
def oqtask(task_func): """ Task function decorator which sets up logging and catches (and logs) any errors which occur inside the task. Also checks to make sure the job is actually still running. If it is not running, the task doesn't get executed, so we don't do useless computation. :param task_func: the function to decorate """ def wrapped(*args): """ Initialize logs, make sure the job is still running, and run the task code surrounded by a try-except. If any error occurs, log it as a critical failure. """ # the last argument is assumed to be a monitor monitor = args[-1] job = models.OqJob.objects.get(id=monitor.job_id) if job.is_running is False: # the job was killed, it is useless to run the task raise JobNotRunning(monitor.job_id) # it is important to save the task id soon, so that # the revoke functionality can work with monitor('storing task id', task=tsk, autoflush=True): pass with logs.handle(job): check_mem_usage() # warn if too much memory is used # run the task try: total = 'total ' + task_func.__name__ with monitor(total, task=tsk, autoflush=True): return task_func(*args) finally: # save on the db CacheInserter.flushall() # the task finished, we can remove from the performance # table the associated row 'storing task id' models.Performance.objects.filter( oq_job=job, operation='storing task id', task_id=tsk.request.id).delete() celery_queue = config.get('amqp', 'celery_queue') f = lambda *args: safely_call(wrapped, args, pickle=True) f.__name__ = task_func.__name__ f.__module__ = task_func.__module__ tsk = task(f, queue=celery_queue) tsk.__func__ = tsk tsk.task_func = task_func return tsk
def submit(self, *args): """ Submit an oqtask with the given arguments to celery and return an AsyncResult. If the variable OQ_NO_DISTRIBUTE is set, the task function is run in process and the result is returned. """ check_mem_usage() # log a warning if too much memory is used if no_distribute(): res = safely_call(self.oqtask.task_func, args) else: piks = pickle_sequence(args) self.sent += sum(len(p) for p in piks) res = self.oqtask.delay(*piks) self.results.append(res)
def oqtask(task_func): """ Task function decorator which sets up logging and catches (and logs) any errors which occur inside the task. Also checks to make sure the job is actually still running. If it is not running, the task doesn't get executed, so we don't do useless computation. :param task_func: the function to decorate """ def wrapped(*args): """ Initialize logs, make sure the job is still running, and run the task code surrounded by a try-except. If any error occurs, log it as a critical failure. """ # job_id is always assumed to be the first argument job_id = args[0] job = models.OqJob.objects.get(id=job_id) if job.is_running is False: # the job was killed, it is useless to run the task raise JobNotRunning(job_id) # it is important to save the task id soon, so that # the revoke functionality can work EnginePerformanceMonitor.store_task_id(job_id, tsk) with EnginePerformanceMonitor( 'total ' + task_func.__name__, job_id, tsk, flush=True): # tasks write on the celery log file logs.set_level(job.log_level) try: # log a warning if too much memory is used check_mem_usage(SOFT_MEM_LIMIT, HARD_MEM_LIMIT) # run the task return task_func(*args) finally: # save on the db CacheInserter.flushall() # the task finished, we can remove from the performance # table the associated row 'storing task id' models.Performance.objects.filter( oq_job=job, operation='storing task id', task_id=tsk.request.id).delete() celery_queue = config.get('amqp', 'celery_queue') f = lambda *args: safely_call(wrapped, args, pickle=True) f.__name__ = task_func.__name__ tsk = task(f, queue=celery_queue) tsk.task_func = task_func return tsk
def run_calc(request): """ Run a calculation. :param request: a `django.http.HttpRequest` object. """ hazard_job_id = request.POST.get('hazard_job_id') if hazard_job_id: candidates = ("job_risk.ini", "job.ini") else: candidates = ("job_hazard.ini", "job_haz.ini", "job.ini") einfo, exctype, monitor = safely_call( _prepare_job, (request, hazard_job_id, candidates)) if exctype: return HttpResponse(json.dumps(einfo.splitlines()), content_type=JSON, status=500) if not einfo: msg = 'Could not find any file of the form %s' % str(candidates) logging.error(msg) return HttpResponse(content=json.dumps([msg]), content_type=JSON, status=500) temp_dir = os.path.dirname(einfo[0]) user = utils.get_user_data(request) try: job_id, _fut = submit_job( einfo[0], temp_dir, user['name'], hazard_job_id) except Exception as exc: # no job created, for instance missing .xml file # get the exception message exc_msg = exc.args[0] if isinstance(exc_msg, bytes): exc_msg = exc_msg.decode('utf-8') # make it a unicode object else: assert isinstance(exc_msg, unicode), exc_msg logging.error(exc_msg) response_data = exc_msg.splitlines() status = 500 else: calc = oqe_models.OqJob.objects.get(pk=job_id) response_data = vars(calc.get_oqparam()) response_data['job_id'] = job_id response_data['status'] = calc.status status = 200 return HttpResponse(content=json.dumps(response_data), content_type=JSON, status=status)
def submit_job(job_file, temp_dir, user_name, hazard_job_id=None, logfile=None): """ Create a job object from the given job.ini file in the job directory and submit it to the job queue. """ ini = os.path.join(temp_dir, job_file) err, exctype, monitor = safely_call( db.actions.job_from_file, (ini, user_name, hazard_job_id)) if exctype: raise exctype(err) job_id, oqparam = err future = executor.submit( tasks.safely_call, tasks.run_calc, job_id, oqparam, temp_dir, logfile, hazard_job_id) return job_id, future
def run_calc(request): """ Run a calculation. :param request: a `django.http.HttpRequest` object. If the request has the attribute `hazard_job_id`, the results of the specified hazard calculations will be re-used as input by the risk calculation. The request also needs to contain the files needed to perform the calculation. They can be uploaded as separate files, or zipped together. """ hazard_job_id = request.POST.get('hazard_job_id') if hazard_job_id: candidates = ("job_risk.ini", "job.ini") else: candidates = ("job_hazard.ini", "job_haz.ini", "job.ini") einfo, exctype, monitor = safely_call( _prepare_job, (request, hazard_job_id, candidates)) if exctype: return HttpResponse(json.dumps(einfo.splitlines()), content_type=JSON, status=500) if not einfo: msg = 'Could not find any file of the form %s' % str(candidates) logging.error(msg) return HttpResponse(content=json.dumps([msg]), content_type=JSON, status=500) user = utils.get_user_data(request) try: job_id, fut = submit_job(einfo[0], user['name'], hazard_job_id) # restart the process pool at the end of each job fut .add_done_callback(lambda f: TaskManager.restart()) except Exception as exc: # no job created, for instance missing .xml file # get the exception message exc_msg = str(exc) logging.error(exc_msg) response_data = exc_msg.splitlines() status = 500 else: response_data = dict(job_id=job_id, status='created') status = 200 return HttpResponse(content=json.dumps(response_data), content_type=JSON, status=status)
def run_command(cmd, args, conn): """ Execute the received command. Errors are trapped and a pair (result, exctype) is sent back. `exctype` is None if there is no exception, otherwise it is an exception class and `result` is an error string containing the traceback. """ try: logging.info('Processing %s%s', cmd, args) func = getattr(actions, cmd) # execute the function by trapping any possible exception res, etype, _ = safely_call(func, args) if etype: logging.error(res) # send back the result and the exception class conn.send((res, etype)) finally: conn.close()
def test_no_flush(self): mon = parallel.Monitor("test") res = parallel.safely_call(get_len, ("ab", mon)) self.assertIn("Monitor('test').flush() must not be called" " by get_len!", res[0]) self.assertEqual(res[1], RuntimeError) self.assertEqual(res[2].operation, mon.operation)