Example #1
0
def run_calc(job_id,
             oqparam,
             log_level,
             log_file,
             exports,
             hazard_calculation_id=None,
             **kw):
    """
    Run a calculation.

    :param job_id:
        ID of the current job
    :param oqparam:
        :class:`openquake.commonlib.oqvalidation.OqParam` instance
    :param str log_level:
        The desired logging level. Valid choices are 'debug', 'info',
        'progress', 'warn', 'error', and 'critical'.
    :param str log_file:
        Complete path (including file name) to file where logs will be written.
        If `None`, logging will just be printed to standard output.
    :param exports:
        A comma-separated string of export types.
    """
    monitor = Monitor('total runtime', measuremem=True)
    with logs.handle(job_id, log_level, log_file):  # run the job
        if USE_CELERY and os.environ.get('OQ_DISTRIBUTE') == 'celery':
            set_concurrent_tasks_default()
        calc = base.calculators(oqparam, monitor, calc_id=job_id)
        calc.from_engine = True
        tb = 'None\n'
        try:
            logs.dbcmd('set_status', job_id, 'executing')
            _do_run_calc(calc, exports, hazard_calculation_id, **kw)
            expose_outputs(calc.datastore)
            records = views.performance_view(calc.datastore)
            logs.dbcmd('save_performance', job_id, records)
            calc.datastore.close()
            logs.LOG.info('Calculation %d finished correctly in %d seconds',
                          job_id, calc._monitor.duration)
            logs.dbcmd('finish', job_id, 'complete')
        except:
            tb = traceback.format_exc()
            try:
                logs.LOG.critical(tb)
                logs.dbcmd('finish', job_id, 'failed')
            except:  # an OperationalError may always happen
                sys.stderr.write(tb)
            raise
        finally:
            # if there was an error in the calculation, this part may fail;
            # in such a situation, we simply log the cleanup error without
            # taking further action, so that the real error can propagate
            try:
                if USE_CELERY:
                    celery_cleanup(TERMINATE, parallel.Starmap.task_ids)
            except:
                # log the finalization error only if there is no real error
                if tb == 'None\n':
                    logs.LOG.error('finalizing', exc_info=True)
    return calc
Example #2
0
def run_job(job_ini,
            log_level='info',
            log_file=None,
            exports='',
            username=getpass.getuser(),
            **kw):
    """
    Run a job using the specified config file and other options.

    :param str job_ini:
        Path to calculation config (INI-style) files.
    :param str log_level:
        'debug', 'info', 'warn', 'error', or 'critical'
    :param str log_file:
        Path to log file.
    :param exports:
        A comma-separated string of export types requested by the user.
    :param username:
        Name of the user running the job
    :param kw:
        Extra parameters like hazard_calculation_id and calculation_mode
    """
    job_id = logs.init('job', getattr(logging, log_level.upper()))
    with logs.handle(job_id, log_level, log_file):
        job_ini = os.path.abspath(job_ini)
        oqparam = eng.job_from_file(job_ini, job_id, username, **kw)
        kw['username'] = username
        eng.run_calc(job_id, oqparam, exports, **kw)
        for line in logs.dbcmd('list_outputs', job_id, False):
            safeprint(line)
    return job_id
Example #3
0
def main(calc_id: int, aggregate_by):
    """
    Re-run the postprocessing after an event based risk calculation
    """
    parent = util.read(calc_id)
    oqp = parent['oqparam']
    aggby = aggregate_by.split(',')
    for tagname in aggby:
        if tagname not in oqp.aggregate_by:
            raise ValueError('%r not in %s' % (tagname, oqp.aggregate_by))
    job_id = logs.init('job', level=logging.INFO)
    dic = dict(
        calculation_mode='reaggregate',
        description=oqp.description + '[aggregate_by=%s]' % aggregate_by,
        user_name=getpass.getuser(), is_running=1, status='executing',
        pid=os.getpid(), hazard_calculation_id=job_id)
    logs.dbcmd('update_job', job_id, dic)
    if os.environ.get('OQ_DISTRIBUTE') not in ('no', 'processpool'):
        os.environ['OQ_DISTRIBUTE'] = 'processpool'
    with logs.handle(job_id, logging.INFO):
        oqp.hazard_calculation_id = parent.calc_id
        parallel.Starmap.init()
        prc = PostRiskCalculator(oqp, job_id)
        try:
            prc.run(aggregate_by=aggby)
            engine.expose_outputs(prc.datastore)
            logs.dbcmd('finish', job_id, 'complete')
        except Exception:
            logs.dbcmd('finish', job_id, 'failed')
        finally:
            parallel.Starmap.shutdown()
Example #4
0
def run_calc(job_id, oqparam, exports, log_level='info', log_file=None, **kw):
    """
    Run a calculation.

    :param job_id:
        ID of the current job
    :param oqparam:
        :class:`openquake.commonlib.oqvalidation.OqParam` instance
    :param exports:
        A comma-separated string of export types.
    """
    register_signals()
    setproctitle('oq-job-%d' % job_id)
    logs.init(job_id, getattr(logging, log_level.upper()))
    with logs.handle(job_id, log_level, log_file):
        calc = base.calculators(oqparam, calc_id=job_id)
        logging.info('%s running %s [--hc=%s]', getpass.getuser(),
                     calc.oqparam.inputs['job_ini'],
                     calc.oqparam.hazard_calculation_id)
        logging.info('Using engine version %s', __version__)
        msg = check_obsolete_version(oqparam.calculation_mode)
        if msg:
            logging.warning(msg)
        calc.from_engine = True
        tb = 'None\n'
        try:
            if OQ_DISTRIBUTE.endswith('pool'):
                logging.warning('Using %d cores on %s', parallel.CT // 2,
                                platform.node())
            set_concurrent_tasks_default(calc)
            t0 = time.time()
            calc.run(exports=exports, **kw)
            logging.info('Exposing the outputs to the database')
            expose_outputs(calc.datastore)
            path = calc.datastore.filename
            size = general.humansize(os.path.getsize(path))
            logging.info('Stored %s on %s in %d seconds', size, path,
                         time.time() - t0)
            logs.dbcmd('finish', job_id, 'complete')
            calc.datastore.close()
            for line in logs.dbcmd('list_outputs', job_id, False):
                general.safeprint(line)
        except BaseException as exc:
            if isinstance(exc, MasterKilled):
                msg = 'aborted'
            else:
                msg = 'failed'
            tb = traceback.format_exc()
            try:
                logging.critical(tb)
                logs.dbcmd('finish', job_id, msg)
            except BaseException:  # an OperationalError may always happen
                sys.stderr.write(tb)
            raise
        finally:
            parallel.Starmap.shutdown()
    # sanity check to make sure that the logging on file is working
    if log_file and log_file != os.devnull and os.path.getsize(log_file) == 0:
        logging.warning('The log file %s is empty!?' % log_file)
    return calc
Example #5
0
def run_job(job_ini, log_level='info', log_file=None, exports='',
            username=getpass.getuser(), **kw):
    """
    Run a job using the specified config file and other options.

    :param str job_ini:
        Path to calculation config (INI-style) files.
    :param str log_level:
        'debug', 'info', 'warn', 'error', or 'critical'
    :param str log_file:
        Path to log file.
    :param exports:
        A comma-separated string of export types requested by the user.
    :param username:
        Name of the user running the job
    :param kw:
        Extra parameters like hazard_calculation_id and calculation_mode
    """
    job_id = logs.init('job', getattr(logging, log_level.upper()))
    with logs.handle(job_id, log_level, log_file):
        job_ini = os.path.abspath(job_ini)
        oqparam = eng.job_from_file(job_ini, job_id, username, **kw)
        kw['username'] = username
        eng.run_calc(job_id, oqparam, exports, **kw)
        for line in logs.dbcmd('list_outputs', job_id, False):
            safeprint(line)
    return job_id
Example #6
0
def recompute_losses(calc_id, aggregate_by):
    """Re-run the postprocessing after an event based risk calculation"""
    parent = util.read(calc_id)
    oqp = parent['oqparam']
    aggby = aggregate_by.split(',')
    for tagname in aggby:
        if tagname not in oqp.aggregate_by:
            raise ValueError('%r not in %s' % (tagname, oqp.aggregate_by))
    job_id = logs.init('job', level=logging.INFO)
    if os.environ.get('OQ_DISTRIBUTE') not in ('no', 'processpool'):
        os.environ['OQ_DISTRIBUTE'] = 'processpool'
    with logs.handle(job_id, logging.INFO):
        oqp.hazard_calculation_id = calc_id
        parallel.Starmap.init()
        prc = PostRiskCalculator(oqp, job_id)
        try:
            prc.run(aggregate_by=aggby)
        finally:
            parallel.Starmap.shutdown()
Example #7
0
def run_jobs(job_inis,
             log_level='info',
             log_file=None,
             exports='',
             username=getpass.getuser(),
             **kw):
    """
    Run jobs using the specified config file and other options.

    :param str job_inis:
        A list of paths to .ini files, or a list of job dictionaries
    :param str log_level:
        'debug', 'info', 'warn', 'error', or 'critical'
    :param str log_file:
        Path to log file.
    :param exports:
        A comma-separated string of export types requested by the user.
    :param username:
        Name of the user running the job
    :param kw:
        Extra parameters like hazard_calculation_id and calculation_mode
    """
    dist = parallel.oq_distribute()
    jobparams = []
    multi = kw.pop('multi', None)
    loglvl = getattr(logging, log_level.upper())
    jobs = create_jobs(job_inis, loglvl, kw)
    hc_id = kw.pop('hazard_calculation_id', None)
    for job in jobs:
        job_id = job['_job_id']
        with logs.handle(job_id, log_level, log_file):
            oqparam = readinput.get_oqparam(job, hc_id=hc_id, **kw)
        logs.dbcmd(
            'update_job', job_id,
            dict(calculation_mode=oqparam.calculation_mode,
                 description=oqparam.description,
                 user_name=username,
                 hazard_calculation_id=hc_id))
        if (not jobparams and not multi and hc_id is None
                and 'sensitivity_analysis' not in job):
            hc_id = job_id
        jobparams.append((job_id, oqparam))
    jobarray = len(jobparams) > 1 and multi
    try:
        poll_queue(job_id, poll_time=15)
        # wait for an empty slot or a CTRL-C
    except BaseException:
        # the job aborted even before starting
        for job_id, oqparam in jobparams:
            logs.dbcmd('finish', job_id, 'aborted')
        return jobparams
    else:
        for job_id, oqparam in jobparams:
            dic = {'status': 'executing', 'pid': _PID}
            if jobarray:
                dic['hazard_calculation_id'] = jobparams[0][0]
            logs.dbcmd('update_job', job_id, dic)
    try:
        if dist == 'zmq' and config.zworkers['host_cores']:
            logging.info('Asking the DbServer to start the workers')
            logs.dbcmd('zmq_start')  # start the zworkers
            logs.dbcmd('zmq_wait')  # wait for them to go up
        allargs = [(job_id, oqparam, exports, log_level, log_file)
                   for job_id, oqparam in jobparams]
        if jobarray:
            with general.start_many(run_calc, allargs):
                pass
        else:
            for args in allargs:
                run_calc(*args)
    finally:
        if dist == 'zmq' and config.zworkers['host_cores']:
            logging.info('Stopping the zworkers')
            logs.dbcmd('zmq_stop')
        elif dist.startswith('celery'):
            celery_cleanup(config.distribution.terminate_workers_on_revoke)
    return jobparams
Example #8
0
def run_jobs(job_inis, log_level='info', log_file=None, exports='',
             username=getpass.getuser(), **kw):
    """
    Run jobs using the specified config file and other options.

    :param str job_inis:
        A list of paths to .ini files, or a list of job dictionaries
    :param str log_level:
        'debug', 'info', 'warn', 'error', or 'critical'
    :param str log_file:
        Path to log file.
    :param exports:
        A comma-separated string of export types requested by the user.
    :param username:
        Name of the user running the job
    :param kw:
        Extra parameters like hazard_calculation_id and calculation_mode
    """
    jobparams = []
    multi = kw.pop('multi', None)
    loglvl = getattr(logging, log_level.upper())
    jobs = create_jobs(job_inis, loglvl, kw)  # inizialize the logs
    if kw.get('hazard_calculation_id'):
        hc_id = int(kw['hazard_calculation_id'])
    else:
        hc_id = None
    for job in jobs:
        job_id = job['_job_id']
        job['hazard_calculation_id'] = hc_id
        with logs.handle(job_id, log_level, log_file):
            dic = dict(calculation_mode=job['calculation_mode'],
                       description=job['description'],
                       user_name=username, is_running=1)
            if hc_id:
                dic['hazard_calculation_id'] = hc_id
            logs.dbcmd('update_job', job_id, dic)
            if (not jobparams and not multi and
                    'hazard_calculation_id' not in kw and
                    'sensitivity_analysis' not in job):
                hc_id = job_id
            try:
                oqparam = readinput.get_oqparam(job)
            except BaseException:
                tb = traceback.format_exc()
                logging.critical(tb)
                logs.dbcmd('finish', job_id, 'failed')
                raise
        jobparams.append((job_id, oqparam))
    jobarray = len(jobparams) > 1 and multi
    try:
        poll_queue(job_id, poll_time=15)
        # wait for an empty slot or a CTRL-C
    except BaseException:
        # the job aborted even before starting
        for job_id, oqparam in jobparams:
            logs.dbcmd('finish', job_id, 'aborted')
        return jobparams
    else:
        for job_id, oqparam in jobparams:
            dic = {'status': 'executing', 'pid': _PID}
            if jobarray:
                dic['hazard_calculation_id'] = jobparams[0][0]
            logs.dbcmd('update_job', job_id, dic)
    try:
        if config.zworkers['host_cores'] and parallel.workers_status() == []:
            logging.info('Asking the DbServer to start the workers')
            logs.dbcmd('workers_start')  # start the workers
        allargs = [(job_id, oqparam, exports, log_level, log_file)
                   for job_id, oqparam in jobparams]
        if jobarray:
            with general.start_many(run_calc, allargs):
                pass
        else:
            for args in allargs:
                run_calc(*args)
    finally:
        if config.zworkers['host_cores']:
            logging.info('Stopping the workers')
            parallel.workers_stop()
    return jobparams
Example #9
0
def run_calc(job_id,
             oqparam,
             log_level,
             log_file,
             exports,
             hazard_calculation_id=None,
             **kw):
    """
    Run a calculation.

    :param job_id:
        ID of the current job
    :param oqparam:
        :class:`openquake.commonlib.oqvalidation.OqParam` instance
    :param str log_level:
        The desired logging level. Valid choices are 'debug', 'info',
        'progress', 'warn', 'error', and 'critical'.
    :param str log_file:
        Complete path (including file name) to file where logs will be written.
        If `None`, logging will just be printed to standard output.
    :param exports:
        A comma-separated string of export types.
    """
    setproctitle('oq-job-%d' % job_id)
    with logs.handle(job_id, log_level, log_file):  # run the job
        calc = base.calculators(oqparam, calc_id=job_id)
        calc.set_log_format()  # set the log format first of all
        logging.info('Running %s [--hc=%s]', calc.oqparam.inputs['job_ini'],
                     calc.oqparam.hazard_calculation_id)
        logging.info('Using engine version %s', __version__)
        msg = check_obsolete_version(oqparam.calculation_mode)
        if msg:
            logs.LOG.warn(msg)
        if OQ_DISTRIBUTE.startswith(('celery', 'zmq')):
            set_concurrent_tasks_default(job_id)
        calc.from_engine = True
        input_zip = oqparam.inputs.get('input_zip')
        tb = 'None\n'
        try:
            if input_zip:  # the input was zipped from the beginning
                data = open(input_zip, 'rb').read()
            else:  # zip the input
                logs.LOG.info('zipping the input files')
                bio = io.BytesIO()
                zip(oqparam.inputs['job_ini'], bio, (), oqparam, logging.debug)
                data = bio.getvalue()
            calc.datastore['input_zip'] = numpy.array(data)
            calc.datastore.set_attrs('input_zip', nbytes=len(data))

            logs.dbcmd('update_job', job_id, {
                'status': 'executing',
                'pid': _PID
            })
            t0 = time.time()
            calc.run(exports=exports,
                     hazard_calculation_id=hazard_calculation_id,
                     close=False,
                     **kw)
            logs.LOG.info('Exposing the outputs to the database')
            expose_outputs(calc.datastore)
            duration = time.time() - t0
            calc._monitor.flush()
            records = views.performance_view(calc.datastore)
            logs.dbcmd('save_performance', job_id, records)
            calc.datastore.close()
            logs.LOG.info('Calculation %d finished correctly in %d seconds',
                          job_id, duration)
            logs.dbcmd('finish', job_id, 'complete')
        except BaseException:
            tb = traceback.format_exc()
            try:
                logs.LOG.critical(tb)
                logs.dbcmd('finish', job_id, 'failed')
            except BaseException:  # an OperationalError may always happen
                sys.stderr.write(tb)
            raise
        finally:
            # if there was an error in the calculation, this part may fail;
            # in such a situation, we simply log the cleanup error without
            # taking further action, so that the real error can propagate
            try:
                if OQ_DISTRIBUTE.startswith('celery'):
                    celery_cleanup(TERMINATE, parallel.running_tasks)
            except BaseException:
                # log the finalization error only if there is no real error
                if tb == 'None\n':
                    logs.LOG.error('finalizing', exc_info=True)
    return calc
Example #10
0
def run_jobs(job_inis, log_level='info', log_file=None, exports='',
             username=getpass.getuser(), **kw):
    """
    Run jobs using the specified config file and other options.

    :param str job_inis:
        A list of paths to .ini files.
    :param str log_level:
        'debug', 'info', 'warn', 'error', or 'critical'
    :param str log_file:
        Path to log file.
    :param exports:
        A comma-separated string of export types requested by the user.
    :param username:
        Name of the user running the job
    :param kw:
        Extra parameters like hazard_calculation_id and calculation_mode
    """
    dist = parallel.oq_distribute()
    jobparams = []
    for job_ini in job_inis:
        # NB: the logs must be initialized BEFORE everything
        job_id = logs.init('job', getattr(logging, log_level.upper()))
        with logs.handle(job_id, log_level, log_file):
            oqparam = eng.job_from_file(os.path.abspath(job_ini), job_id,
                                        username, **kw)
        if (not jobparams and 'csm_cache' not in kw
                and 'hazard_calculation_id' not in kw):
            kw['hazard_calculation_id'] = job_id
        jobparams.append((job_id, oqparam))
    jobarray = len(jobparams) > 1 and 'csm_cache' in kw
    try:
        eng.poll_queue(job_id, poll_time=15)
        # wait for an empty slot or a CTRL-C
    except BaseException:
        # the job aborted even before starting
        for job_id, oqparam in jobparams:
            logs.dbcmd('finish', job_id, 'aborted')
        return jobparams
    else:
        for job_id, oqparam in jobparams:
            dic = {'status': 'executing', 'pid': eng._PID}
            if jobarray:
                dic['hazard_calculation_id'] = jobparams[0][0]
            logs.dbcmd('update_job', job_id, dic)
    try:
        if dist == 'zmq' and config.zworkers['host_cores']:
            logging.info('Asking the DbServer to start the workers')
            logs.dbcmd('zmq_start')  # start the zworkers
            logs.dbcmd('zmq_wait')  # wait for them to go up
        allargs = [(job_id, oqparam, exports, log_level, log_file)
                   for job_id, oqparam in jobparams]
        if jobarray:
            with start_many(eng.run_calc, allargs):
                pass
        else:
            for args in allargs:
                eng.run_calc(*args)
    finally:
        if dist == 'zmq' and config.zworkers['host_cores']:
            logging.info('Stopping the zworkers')
            logs.dbcmd('zmq_stop')
        elif dist.startswith('celery'):
            eng.celery_cleanup(config.distribution.terminate_workers_on_revoke)
    return jobparams