def run_calc(job_id, oqparam, log_level, log_file, exports, hazard_calculation_id=None, **kw): """ Run a calculation. :param job_id: ID of the current job :param oqparam: :class:`openquake.commonlib.oqvalidation.OqParam` instance :param str log_level: The desired logging level. Valid choices are 'debug', 'info', 'progress', 'warn', 'error', and 'critical'. :param str log_file: Complete path (including file name) to file where logs will be written. If `None`, logging will just be printed to standard output. :param exports: A comma-separated string of export types. """ monitor = Monitor('total runtime', measuremem=True) with logs.handle(job_id, log_level, log_file): # run the job if USE_CELERY and os.environ.get('OQ_DISTRIBUTE') == 'celery': set_concurrent_tasks_default() calc = base.calculators(oqparam, monitor, calc_id=job_id) calc.from_engine = True tb = 'None\n' try: logs.dbcmd('set_status', job_id, 'executing') _do_run_calc(calc, exports, hazard_calculation_id, **kw) expose_outputs(calc.datastore) records = views.performance_view(calc.datastore) logs.dbcmd('save_performance', job_id, records) calc.datastore.close() logs.LOG.info('Calculation %d finished correctly in %d seconds', job_id, calc._monitor.duration) logs.dbcmd('finish', job_id, 'complete') except: tb = traceback.format_exc() try: logs.LOG.critical(tb) logs.dbcmd('finish', job_id, 'failed') except: # an OperationalError may always happen sys.stderr.write(tb) raise finally: # if there was an error in the calculation, this part may fail; # in such a situation, we simply log the cleanup error without # taking further action, so that the real error can propagate try: if USE_CELERY: celery_cleanup(TERMINATE, parallel.Starmap.task_ids) except: # log the finalization error only if there is no real error if tb == 'None\n': logs.LOG.error('finalizing', exc_info=True) return calc
def run_calc(job_id, oqparam, log_level, log_file, exports, hazard_calculation_id=None): """ Run a calculation. :param job_id: ID of the current job :param oqparam: :class:`openquake.commonlib.oqvalidation.OqParam` instance :param str log_level: The desired logging level. Valid choices are 'debug', 'info', 'progress', 'warn', 'error', and 'critical'. :param str log_file: Complete path (including file name) to file where logs will be written. If `None`, logging will just be printed to standard output. :param exports: A comma-separated string of export types. """ monitor = Monitor('total runtime', measuremem=True) with logs.handle(job_id, log_level, log_file): # run the job if USE_CELERY and os.environ.get('OQ_DISTRIBUTE') == 'celery': set_concurrent_tasks_default() calc = base.calculators(oqparam, monitor, calc_id=job_id) tb = 'None\n' try: logs.dbcmd('set_status', job_id, 'executing') _do_run_calc(calc, exports, hazard_calculation_id) expose_outputs(calc.datastore) records = views.performance_view(calc.datastore) logs.dbcmd('save_performance', job_id, records) calc.datastore.close() logs.LOG.info('Calculation %d finished correctly in %d seconds', job_id, calc.monitor.duration) logs.dbcmd('finish', job_id, 'complete') except: tb = traceback.format_exc() try: logs.LOG.critical(tb) logs.dbcmd('finish', job_id, 'failed') except: # an OperationalError may always happen sys.stderr.write(tb) raise finally: # if there was an error in the calculation, this part may fail; # in such a situation, we simply log the cleanup error without # taking further action, so that the real error can propagate try: if USE_CELERY: celery_cleanup(TERMINATE, parallel.TaskManager.task_ids) except: # log the finalization error only if there is no real error if tb == 'None\n': logs.LOG.error('finalizing', exc_info=True) return calc
def run_calc(job_id, oqparam, exports, hazard_calculation_id=None, **kw): """ Run a calculation. :param job_id: ID of the current job :param oqparam: :class:`openquake.commonlib.oqvalidation.OqParam` instance :param exports: A comma-separated string of export types. """ register_signals() setproctitle('oq-job-%d' % job_id) calc = base.calculators(oqparam, calc_id=job_id) logging.info('%s running %s [--hc=%s]', getpass.getuser(), calc.oqparam.inputs['job_ini'], calc.oqparam.hazard_calculation_id) logging.info('Using engine version %s', __version__) msg = check_obsolete_version(oqparam.calculation_mode) if msg: logs.LOG.warn(msg) calc.from_engine = True tb = 'None\n' try: if not oqparam.hazard_calculation_id: if 'input_zip' in oqparam.inputs: # starting from an archive with open(oqparam.inputs['input_zip'], 'rb') as arch: data = numpy.array(arch.read()) else: logs.LOG.info('Zipping the input files') bio = io.BytesIO() oqzip.zip_job(oqparam.inputs['job_ini'], bio, (), oqparam, logging.debug) data = numpy.array(bio.getvalue()) del bio calc.datastore['input/zip'] = data calc.datastore.set_attrs('input/zip', nbytes=data.nbytes) del data # save memory poll_queue(job_id, _PID, poll_time=15) if OQ_DISTRIBUTE.endswith('pool'): logs.LOG.warning('Using %d cores on %s', parallel.cpu_count, platform.node()) if OQ_DISTRIBUTE == 'zmq': logs.dbcmd('zmq_start') # start zworkers logs.dbcmd('zmq_wait') # wait for them to go up if OQ_DISTRIBUTE.startswith(('celery', 'zmq')): set_concurrent_tasks_default(job_id) t0 = time.time() calc.run(exports=exports, hazard_calculation_id=hazard_calculation_id, close=False, **kw) logs.LOG.info('Exposing the outputs to the database') expose_outputs(calc.datastore) duration = time.time() - t0 records = views.performance_view(calc.datastore, add_calc_id=False) logs.dbcmd('save_performance', job_id, records) calc.datastore.close() logs.LOG.info('Calculation %d finished correctly in %d seconds', job_id, duration) logs.dbcmd('finish', job_id, 'complete') except BaseException as exc: if isinstance(exc, MasterKilled): msg = 'aborted' else: msg = 'failed' tb = traceback.format_exc() try: logs.LOG.critical(tb) logs.dbcmd('finish', job_id, msg) except BaseException: # an OperationalError may always happen sys.stderr.write(tb) raise finally: # if there was an error in the calculation, this part may fail; # in such a situation, we simply log the cleanup error without # taking further action, so that the real error can propagate if OQ_DISTRIBUTE == 'zmq': # stop zworkers logs.dbcmd('zmq_stop') try: if OQ_DISTRIBUTE.startswith('celery'): celery_cleanup(TERMINATE) except BaseException: # log the finalization error only if there is no real error if tb == 'None\n': logs.LOG.error('finalizing', exc_info=True) return calc
def run_calc(job_id, oqparam, exports, hazard_calculation_id=None, **kw): """ Run a calculation. :param job_id: ID of the current job :param oqparam: :class:`openquake.commonlib.oqvalidation.OqParam` instance :param exports: A comma-separated string of export types. """ register_signals() setproctitle('oq-job-%d' % job_id) calc = base.calculators(oqparam, calc_id=job_id) logging.info('%s running %s [--hc=%s]', getpass.getuser(), calc.oqparam.inputs['job_ini'], calc.oqparam.hazard_calculation_id) logging.info('Using engine version %s', __version__) msg = check_obsolete_version(oqparam.calculation_mode) if msg: logs.LOG.warn(msg) calc.from_engine = True tb = 'None\n' try: poll_queue(job_id, _PID, poll_time=15) except BaseException: # the job aborted even before starting logs.dbcmd('finish', job_id, 'aborted') return try: if OQ_DISTRIBUTE.endswith('pool'): logs.LOG.warning('Using %d cores on %s', parallel.Starmap.num_cores, platform.node()) if OQ_DISTRIBUTE == 'zmq' and config.zworkers['host_cores']: logs.dbcmd('zmq_start') # start the zworkers logs.dbcmd('zmq_wait') # wait for them to go up set_concurrent_tasks_default(calc) t0 = time.time() calc.run(exports=exports, hazard_calculation_id=hazard_calculation_id, **kw) logs.LOG.info('Exposing the outputs to the database') expose_outputs(calc.datastore) duration = time.time() - t0 records = views.performance_view(calc.datastore, add_calc_id=False) logs.dbcmd('save_performance', job_id, records) calc.datastore.close() logs.LOG.info('Calculation %d finished correctly in %d seconds', job_id, duration) logs.dbcmd('finish', job_id, 'complete') except BaseException as exc: if isinstance(exc, MasterKilled): msg = 'aborted' else: msg = 'failed' tb = traceback.format_exc() try: logs.LOG.critical(tb) logs.dbcmd('finish', job_id, msg) except BaseException: # an OperationalError may always happen sys.stderr.write(tb) raise finally: # if there was an error in the calculation, this part may fail; # in such a situation, we simply log the cleanup error without # taking further action, so that the real error can propagate if OQ_DISTRIBUTE == 'zmq' and config.zworkers['host_cores']: logs.dbcmd('zmq_stop') # stop the zworkers try: if OQ_DISTRIBUTE.startswith('celery'): celery_cleanup(TERMINATE) except BaseException: # log the finalization error only if there is no real error if tb == 'None\n': logs.LOG.error('finalizing', exc_info=True) return calc
def run_calc(job_id, oqparam, log_level, log_file, exports, hazard_calculation_id=None, **kw): """ Run a calculation. :param job_id: ID of the current job :param oqparam: :class:`openquake.commonlib.oqvalidation.OqParam` instance :param str log_level: The desired logging level. Valid choices are 'debug', 'info', 'progress', 'warn', 'error', and 'critical'. :param str log_file: Complete path (including file name) to file where logs will be written. If `None`, logging will just be printed to standard output. :param exports: A comma-separated string of export types. """ setproctitle('oq-job-%d' % job_id) with logs.handle(job_id, log_level, log_file): # run the job calc = base.calculators(oqparam, calc_id=job_id) calc.set_log_format() # set the log format first of all logging.info('Running %s [--hc=%s]', calc.oqparam.inputs['job_ini'], calc.oqparam.hazard_calculation_id) logging.info('Using engine version %s', __version__) msg = check_obsolete_version(oqparam.calculation_mode) if msg: logs.LOG.warn(msg) if OQ_DISTRIBUTE.startswith(('celery', 'zmq')): set_concurrent_tasks_default(job_id) calc.from_engine = True input_zip = oqparam.inputs.get('input_zip') tb = 'None\n' try: if input_zip: # the input was zipped from the beginning data = open(input_zip, 'rb').read() else: # zip the input logs.LOG.info('zipping the input files') bio = io.BytesIO() zip(oqparam.inputs['job_ini'], bio, (), oqparam, logging.debug) data = bio.getvalue() calc.datastore['input_zip'] = numpy.array(data) calc.datastore.set_attrs('input_zip', nbytes=len(data)) logs.dbcmd('update_job', job_id, { 'status': 'executing', 'pid': _PID }) t0 = time.time() calc.run(exports=exports, hazard_calculation_id=hazard_calculation_id, close=False, **kw) logs.LOG.info('Exposing the outputs to the database') expose_outputs(calc.datastore) duration = time.time() - t0 calc._monitor.flush() records = views.performance_view(calc.datastore) logs.dbcmd('save_performance', job_id, records) calc.datastore.close() logs.LOG.info('Calculation %d finished correctly in %d seconds', job_id, duration) logs.dbcmd('finish', job_id, 'complete') except BaseException: tb = traceback.format_exc() try: logs.LOG.critical(tb) logs.dbcmd('finish', job_id, 'failed') except BaseException: # an OperationalError may always happen sys.stderr.write(tb) raise finally: # if there was an error in the calculation, this part may fail; # in such a situation, we simply log the cleanup error without # taking further action, so that the real error can propagate try: if OQ_DISTRIBUTE.startswith('celery'): celery_cleanup(TERMINATE, parallel.running_tasks) except BaseException: # log the finalization error only if there is no real error if tb == 'None\n': logs.LOG.error('finalizing', exc_info=True) return calc
def run_calc(job_id, oqparam, exports, hazard_calculation_id=None, **kw): """ Run a calculation. :param job_id: ID of the current job :param oqparam: :class:`openquake.commonlib.oqvalidation.OqParam` instance :param exports: A comma-separated string of export types. """ register_signals() setproctitle('oq-job-%d' % job_id) calc = base.calculators(oqparam, calc_id=job_id) logging.info('%s running %s [--hc=%s]', getpass.getuser(), calc.oqparam.inputs['job_ini'], calc.oqparam.hazard_calculation_id) logging.info('Using engine version %s', __version__) msg = check_obsolete_version(oqparam.calculation_mode) if msg: logs.LOG.warn(msg) if OQ_DISTRIBUTE.startswith(('celery', 'zmq')): set_concurrent_tasks_default(job_id) calc.from_engine = True tb = 'None\n' try: if not oqparam.hazard_calculation_id: if 'input_zip' in oqparam.inputs: # starting from an archive with open(oqparam.inputs['input_zip'], 'rb') as arch: data = numpy.array(arch.read()) else: logs.LOG.info('zipping the input files') bio = io.BytesIO() oqzip.zip_job(oqparam.inputs['job_ini'], bio, (), oqparam, logging.debug) data = numpy.array(bio.getvalue()) del bio calc.datastore['input/zip'] = data calc.datastore.set_attrs('input/zip', nbytes=data.nbytes) del data # save memory poll_queue(job_id, _PID, poll_time=15) t0 = time.time() calc.run(exports=exports, hazard_calculation_id=hazard_calculation_id, close=False, **kw) logs.LOG.info('Exposing the outputs to the database') expose_outputs(calc.datastore) duration = time.time() - t0 calc._monitor.flush() records = views.performance_view(calc.datastore) logs.dbcmd('save_performance', job_id, records) calc.datastore.close() logs.LOG.info('Calculation %d finished correctly in %d seconds', job_id, duration) logs.dbcmd('finish', job_id, 'complete') except BaseException as exc: if isinstance(exc, MasterKilled): msg = 'aborted' else: msg = 'failed' tb = traceback.format_exc() try: logs.LOG.critical(tb) logs.dbcmd('finish', job_id, msg) except BaseException: # an OperationalError may always happen sys.stderr.write(tb) raise finally: # if there was an error in the calculation, this part may fail; # in such a situation, we simply log the cleanup error without # taking further action, so that the real error can propagate try: if OQ_DISTRIBUTE.startswith('celery'): celery_cleanup(TERMINATE) except BaseException: # log the finalization error only if there is no real error if tb == 'None\n': logs.LOG.error('finalizing', exc_info=True) return calc