def get_running_job(job_id): """Helper function which is intended to be run by celery task functions. Given the id of an in-progress calculation (:class:`openquake.db.models.OqJob`), load all of the calculation data from the database and KVS and return a :class:`openquake.engine.JobContext` object. If the calculation is not currently running, a :exc:`JobCompletedError` is raised. :returns: :class:`openquake.engine.JobContext` object, representing an in-progress job. This object is created from cached data in the KVS as well as data stored in the relational database. :raises JobCompletedError: If :meth:`~openquake.engine.JobContext.is_job_completed` returns ``True`` for ``job_id``. """ # pylint: disable=W0404 from openquake.engine import JobContext if JobContext.is_job_completed(job_id): raise JobCompletedError(job_id) job_ctxt = JobContext.from_kvs(job_id) if job_ctxt and job_ctxt.params: level = job_ctxt.log_level else: level = 'warn' logs.init_logs_amqp_send(level=level, job_id=job_id) return job_ctxt
def get_running_calculation(calculation_id): """Helper function which is intended to be run by celery task functions. Given the id of an in-progress calculation (:class:`openquake.db.models.OqCalculation`), load all of the calculation data from the database and KVS and return a :class:`openquake.engine.CalculationProxy` object. If the calculation is not currently running, a :exception:`JobCompletedError` is raised. :returns: :class:`openquake.engine.CalculationProxy` object, representing an in-progress calculation. This object is created from cached data in the KVS as well as data stored in the relational database. :raises JobCompletedError: If :meth:`~openquake.engine.CalculationProxy.is_job_completed` returns ``True`` for ``calculation_id``. """ # pylint: disable=W0404 from openquake.engine import CalculationProxy if CalculationProxy.is_job_completed(calculation_id): raise JobCompletedError(calculation_id) calc_proxy = CalculationProxy.from_kvs(calculation_id) if calc_proxy and calc_proxy.params: level = calc_proxy.params.get('debug') else: level = 'warn' logs.init_logs_amqp_send(level=level, job_id=calculation_id) return calc_proxy
def wrapped(*args, **kwargs): """ Initialize logs, make sure the job is still running, and run the task code surrounded by a try-except. If any error occurs, log it as a critical failure. """ # job_id is always assumed to be the first arugment passed to a task # this is the only required argument job_id = args[0] # Set up logging via amqp. try: # check if the job is still running job = models.OqJob.objects.get(id=job_id) if not job.status == 'executing' and not job.is_running: # the job is not running raise JobCompletedError(job_id) # The job is running. # Setup task logging, via AMQP ... logs.init_logs_amqp_send(level=job.log_level, job_id=job_id) # ... and continue with task execution. task_func(*args, **kwargs) # TODO: should we do something different with the JobCompletedError? except Exception, err: logs.LOG.critical('Error occurred in task: %s' % str(err)) logs.LOG.exception(err) raise
def get_running_job(job_id): """Helper function which is intended to be run by celery task functions. Given the id of an in-progress calculation (:class:`openquake.db.models.OqJob`), load all of the calculation data from the database and KVS and return a :class:`openquake.engine.JobContext` object. If the calculation is not currently running, a :exception:`JobCompletedError` is raised. :returns: :class:`openquake.engine.JobContext` object, representing an in-progress job. This object is created from cached data in the KVS as well as data stored in the relational database. :raises JobCompletedError: If :meth:`~openquake.engine.JobContext.is_job_completed` returns ``True`` for ``job_id``. """ # pylint: disable=W0404 from openquake.engine import JobContext if JobContext.is_job_completed(job_id): raise JobCompletedError(job_id) job_ctxt = JobContext.from_kvs(job_id) if job_ctxt and job_ctxt.params: level = job_ctxt.log_level else: level = 'warn' logs.init_logs_amqp_send(level=level, job_id=job_id) return job_ctxt
def run_job(job_file, output_type): """ Given a job_file, run the job. :param job_file: the path of the configuration file for the job :type job_file: string :param output_type: the desired format for the results, one of 'db', 'xml' :type output_type: string """ a_job = Job.from_file(job_file, output_type) a_job.set_status('running') # closing all db connections to make sure they're not shared between # supervisor and job executor processes. otherwise if one of them closes # the connection it immediately becomes unavailable for other close_connection() job_pid = os.fork() if not job_pid: # job executor process try: logs.init_logs_amqp_send(level=FLAGS.debug, job_id=a_job.job_id) a_job.launch() except Exception, ex: LOG.critical("Job failed with exception: '%s'" % str(ex)) a_job.set_status('failed') raise else: a_job.set_status('succeeded') return
def wrapped(*args, **kwargs): """ Initialize logs, make sure the job is still running, and run the task code surrounded by a try-except. If any error occurs, log it as a critical failure. """ # job_id is always assumed to be the first argument passed to # the task, or a keyword argument # this is the only required argument job_id = kwargs.get('job_id') or args[0] # Set up logging via amqp. try: # check if the job is still running job = models.OqJob.objects.get(id=job_id) # Setup task logging, via AMQP ... logs.init_logs_amqp_send(level=job.log_level, job_id=job_id) logs.LOG.debug('job.is_running == %s' % job.is_running) logs.LOG.debug('job.status == %s' % job.status) # Tasks can be used in either the `execute` or `post-process` phase if not (job.is_running and job.status in ('executing', 'post_processing')): # the job is not running raise JobCompletedError(job_id) # The job is running. # ... now continue with task execution. task_func(*args, **kwargs) # TODO: should we do something different with the JobCompletedError? except Exception, err: logs.LOG.critical('Error occurred in task: %s' % str(err)) logs.LOG.exception(err) raise
def check_job_status(job_id): """ Helper function which is intended to be run by celery task functions. :raises JobCompletedError: If :meth:`~openquake.job.Job.is_job_completed` returns ``True`` for ``job_id``. """ job = Job.from_kvs(job_id) level = job.params.get('debug') if job and job.params else 'warn' logs.init_logs_amqp_send(level=level, job_id=job_id) if Job.is_job_completed(job_id): raise JobCompletedError(job_id)
def test_init_logs_amqp_send_with_no_amqp_handler(self): """ init_logs_amqp_send() will add an `AMQPHandler` instance to the root logger if none is present. """ mm = mock.MagicMock(spec=kombu.messaging.Producer) with mock.patch.object(logs.AMQPHandler, "_initialize") as minit: minit.return_value = mm with helpers.patch("logging.root.addHandler") as mah: logs.init_logs_amqp_send("info", 321) self.assertEqual(1, mah.call_count) (single_arg, ) = mah.call_args[0] self.assertTrue(isinstance(single_arg, logs.AMQPHandler)) self.assertEqual(logging.root.level, logging.INFO)
def test_init_logs_amqp_send_with_no_amqp_handler(self): """ init_logs_amqp_send() will add an `AMQPHandler` instance to the root logger if none is present. """ mm = mock.MagicMock(spec=kombu.messaging.Producer) with mock.patch.object(logs.AMQPHandler, "_initialize") as minit: minit.return_value = mm with helpers.patch("logging.root.addHandler") as mah: logs.init_logs_amqp_send("info", 321) self.assertEqual(1, mah.call_count) (single_arg,) = mah.call_args[0] self.assertTrue(isinstance(single_arg, logs.AMQPHandler)) self.assertEqual(logging.root.level, logging.INFO)
def test_init_logs_amqp_send_with_existing_amqp_handler(self): """ init_logs_amqp_send() will not add more than one `AMQPHandler` instance to the root logger. """ mm = mock.MagicMock(spec=kombu.messaging.Producer) with mock.patch.object(logs.AMQPHandler, "_initialize") as minit: minit.return_value = mm handler = logs.AMQPHandler() handler.set_job_id = mock.Mock() logging.root.handlers.append(handler) with helpers.patch("logging.root.addHandler") as mah: logs.init_logs_amqp_send("info", 322) self.assertEqual(0, mah.call_count) self.assertEqual(1, handler.set_job_id.call_count) self.assertEqual((322,), handler.set_job_id.call_args[0])
def test_init_logs_amqp_send_with_existing_amqp_handler(self): """ init_logs_amqp_send() will not add more than one `AMQPHandler` instance to the root logger. """ mm = mock.MagicMock(spec=kombu.messaging.Producer) with mock.patch.object(logs.AMQPHandler, "_initialize") as minit: minit.return_value = mm handler = logs.AMQPHandler() handler.set_job_id = mock.Mock() logging.root.handlers.append(handler) with helpers.patch("logging.root.addHandler") as mah: logs.init_logs_amqp_send("info", 322) self.assertEqual(0, mah.call_count) self.assertEqual(1, handler.set_job_id.call_count) self.assertEqual((322, ), handler.set_job_id.call_args[0])
def _run_calc(job, log_level, log_file, exports, calc, job_type): """ Run a calculation. :param job: :class:`openquake.db.model.OqJob` instance which references a valid :class:`openquake.db.models.RiskCalculation` or :class:`openquake.db.models.HazardCalculation`. :param str log_level: The desired logging level. Valid choices are 'debug', 'info', 'progress', 'warn', 'error', and 'critical'. :param str log_file: Complete path (including file name) to file where logs will be written. If `None`, logging will just be printed to standard output. :param list exports: A (potentially empty) list of export targets. Currently only "xml" is supported. :param calc: Calculator object, which must implement the interface of :class:`openquake.calculators.base.CalculatorNext`. :param str job_type: 'hazard' or 'risk' """ # Closing all db connections to make sure they're not shared between # supervisor and job executor processes. # Otherwise, if one of them closes the connection it immediately becomes # unavailable for others. close_connection() job_pid = os.fork() if not job_pid: # calculation executor process try: logs.init_logs_amqp_send(level=log_level, job_id=job.id) # run the job job.is_running = True job.save() kvs.mark_job_as_current(job.id) _do_run_calc(job, exports, calc, job_type) except Exception, ex: logs.LOG.critical("Calculation failed with exception: '%s'" % str(ex)) raise finally:
def run_hazard(job, log_level, log_file, exports): """Run a hazard job. :param job: :class:`openquake.db.models.OqJob` instance which references a valid :class:`openquake.db.models.HazardCalculation`. :param list exports: a (potentially empty) list of export targets, currently only "xml" is supported """ # Closing all db connections to make sure they're not shared between # supervisor and job executor processes. # Otherwise, if one of them closes the connection it immediately becomes # unavailable for others. close_connection() job_pid = os.fork() if not job_pid: # calculation executor process try: logs.init_logs_amqp_send(level=log_level, job_id=job.id) # record initial job stats hc = job.hazard_calculation models.JobStats.objects.create( oq_job=job, num_sites=len(hc.points_to_compute()), realizations=hc.number_of_logic_tree_samples) # run the job job.is_running = True job.save() kvs.mark_job_as_current(job.id) _do_run_hazard(job, exports) except Exception, ex: logs.LOG.critical("Calculation failed with exception: '%s'" % str(ex)) raise finally:
def test_init_logs_amqp_send_changes_logging_level(self): """ init_logs_amqp_send() will change the root level logger anyway. """ mm = mock.MagicMock(spec=kombu.messaging.Producer) with mock.patch.object(logs.AMQPHandler, "_initialize") as minit: minit.return_value = mm handler = logs.AMQPHandler() logging.root.handlers.append(handler) handler.set_job_id = mock.Mock() logging.root.setLevel(logging.INFO) logs.init_logs_amqp_send("warning", 322) self.assertEqual(logging.root.level, logging.WARNING) logs.init_logs_amqp_send("debug", 323) self.assertEqual(logging.root.level, logging.DEBUG) logs.init_logs_amqp_send("error", 324) self.assertEqual(logging.root.level, logging.ERROR)
def run_job(job, params, sections, output_type='db', log_level='warn', force_inputs=False): """Given an :class:`openquake.db.models.OqJobProfile` object, create a new :class:`openquake.db.models.OqJob` object and run the job. NOTE: The params and sections parameters are temporary but will be required until we can run calculations purely using Django model objects as calculator input. Returns the calculation object when the calculation concludes. :param job: :class:`openquake.db.models.OqJob` instance :param params: A dictionary of config parameters parsed from the calculation config file. :param sections: A list of sections parsed from the calculation config file. :param output_type: 'db' or 'xml' (defaults to 'db') :param str log_level: One of 'debug', 'info', 'warn', 'error', or 'critical'. Defaults to 'warn'. :param bool force_inputs: If `True` the model input files will be parsed and the resulting content written to the database no matter what. :returns: :class:`openquake.db.models.OqJob` instance. """ if not output_type in ('db', 'xml'): raise RuntimeError("output_type must be 'db' or 'xml'") job.description = job.profile().description job.status = 'running' job.save() # Clear any counters for this job_id, prior to running the # job. # We do this just to make sure all of the counters behave properly and can # provide accurate data about a calculation in-progress. stats.delete_job_counters(job.id) # Make the job/calculation ID generally available. utils_config.Config().job_id = job.id serialize_results_to = ['db'] if output_type == 'xml': serialize_results_to.append('xml') job_ctxt = JobContext(params, job.id, sections=sections, serialize_results_to=serialize_results_to, oq_job_profile=job.profile(), oq_job=job, log_level=log_level, force_inputs=force_inputs) # closing all db connections to make sure they're not shared between # supervisor and job executor processes. otherwise if one of them closes # the connection it immediately becomes unavailable for other close_connection() job_pid = os.fork() if not job_pid: # calculation executor process try: logs.init_logs_amqp_send(level=log_level, job_id=job.id) _launch_job(job_ctxt, sections) except Exception, ex: logs.LOG.critical("Calculation failed with exception: '%s'" % str(ex)) job.status = 'failed' job.save() raise else: job.status = 'succeeded' job.save() return
def run_job(job, params, sections, output_type='db', log_level='warn', force_inputs=False, log_file=None): """Given an :class:`openquake.db.models.OqJobProfile` object, create a new :class:`openquake.db.models.OqJob` object and run the job. NOTE: The params and sections parameters are temporary but will be required until we can run calculations purely using Django model objects as calculator input. Returns the calculation object when the calculation concludes. :param job: :class:`openquake.db.models.OqJob` instance :param params: A dictionary of config parameters parsed from the calculation config file. :param sections: A list of sections parsed from the calculation config file. :param output_type: 'db' or 'xml' (defaults to 'db') :param str log_level: One of 'debug', 'info', 'warn', 'error', or 'critical'. Defaults to 'warn'. :param bool force_inputs: If `True` the model input files will be parsed and the resulting content written to the database no matter what. :param str log_file: Optional log file location. :returns: :class:`openquake.db.models.OqJob` instance. """ if not output_type in ('db', 'xml'): raise RuntimeError("output_type must be 'db' or 'xml'") job.description = job.profile().description job.status = 'running' job.save() # Clear any counters for this job_id, prior to running the # job. # We do this just to make sure all of the counters behave properly and can # provide accurate data about a calculation in-progress. stats.delete_job_counters(job.id) # Make the job/calculation ID generally available. utils_config.Config().job_id = job.id serialize_results_to = ['db'] if output_type == 'xml': serialize_results_to.append('xml') job_ctxt = JobContext(params, job.id, sections=sections, serialize_results_to=serialize_results_to, oq_job_profile=job.profile(), oq_job=job, log_level=log_level, force_inputs=force_inputs) # closing all db connections to make sure they're not shared between # supervisor and job executor processes. otherwise if one of them closes # the connection it immediately becomes unavailable for other close_connection() job_pid = os.fork() if not job_pid: # calculation executor process try: logs.init_logs_amqp_send(level=log_level, job_id=job.id) _launch_job(job_ctxt, sections) except Exception, ex: logs.LOG.critical("Calculation failed with exception: '%s'" % str(ex)) job.status = 'failed' job.save() raise else: job.status = 'succeeded' job.save() return
def run_calculation(job_profile, params, sections, output_type='db'): """Given an :class:`openquake.db.models.OqJobProfile` object, create a new :class:`openquake.db.models.OqCalculation` object and run the calculation. NOTE: The params and sections parameters are temporary but will be required until we can run calculations purely using Django model objects as calculator input. Returns the calculation object when the calculation concludes. :param job_profile: :class:`openquake.db.models.OqJobProfile` instance. :param params: A dictionary of config parameters parsed from the calculation config file. :param sections: A list of sections parsed from the calculation config file. :param output_type: 'db' or 'xml' (defaults to 'db') :returns: :class:`openquake.db.models.OqCalculation` instance. """ if not output_type in ('db', 'xml'): raise RuntimeError("output_type must be 'db' or 'xml'") calculation = OqCalculation(owner=job_profile.owner) calculation.oq_job_profile = job_profile calculation.status = 'running' calculation.save() # Clear any counters for this calculation_id, prior to running the # calculation. # We do this just to make sure all of the counters behave properly and can # provide accurate data about a calculation in-progress. stats.delete_job_counters(calculation.id) # Make the job/calculation ID generally available. utils_config.Config().job_id = calculation.id serialize_results_to = ['db'] if output_type == 'xml': serialize_results_to.append('xml') calc_proxy = CalculationProxy(params, calculation.id, sections=sections, serialize_results_to=serialize_results_to, oq_job_profile=job_profile, oq_calculation=calculation) # closing all db connections to make sure they're not shared between # supervisor and job executor processes. otherwise if one of them closes # the connection it immediately becomes unavailable for other close_connection() calc_pid = os.fork() if not calc_pid: # calculation executor process try: logs.init_logs_amqp_send(level=FLAGS.debug, job_id=calculation.id) _launch_calculation(calc_proxy, sections) except Exception, ex: logs.LOG.critical("Calculation failed with exception: '%s'" % str(ex)) calculation.status = 'failed' calculation.save() raise else: calculation.status = 'succeeded' calculation.save() return