コード例 #1
0
def update_job_calc_from_detailed_job_info(calc, detailed_job_info):
    """
    Updates the detailed job info for a JobCalculation as obtained from
    the scheduler

    :param calc: The job calculation
    :param detailed_job_info: the detailed information as returned by the
        scheduler for this job
    """
    from aiida.scheduler.datastructures import JobInfo

    last_jobinfo = calc._get_last_jobinfo()
    if last_jobinfo is None:
        last_jobinfo = JobInfo()
        last_jobinfo.job_id = calc.get_job_id()
        last_jobinfo.job_state = JOB_STATES.DONE

    last_jobinfo.detailedJobinfo = detailed_job_info
    calc._set_last_jobinfo(last_jobinfo)
コード例 #2
0
ファイル: execmanager.py プロジェクト: santiama/aiida_core
def update_running_calcs_status(authinfo):
    """
    Update the states of calculations in WITHSCHEDULER status belonging
    to user and machine as defined in the 'dbauthinfo' table.
    """
    from aiida.orm import JobCalculation, Computer
    from aiida.scheduler.datastructures import JobInfo
    from aiida.utils.logger import get_dblogger_extra

    if not authinfo.enabled:
        return

    execlogger.debug("Updating running calc status for user {} "
                     "and machine {}".format(authinfo.aiidauser.email,
                                             authinfo.dbcomputer.name))

    # This returns an iterator over aiida JobCalculation objects
    calcs_to_inquire = list(
        JobCalculation._get_all_with_state(state=calc_states.WITHSCHEDULER,
                                           computer=authinfo.dbcomputer,
                                           user=authinfo.aiidauser))

    # NOTE: no further check is done that machine and
    # aiidauser are correct for each calc in calcs
    s = Computer(dbcomputer=authinfo.dbcomputer).get_scheduler()
    t = authinfo.get_transport()

    computed = []

    # I avoid to open an ssh connection if there are
    # no calcs with state WITHSCHEDULER
    if len(calcs_to_inquire):
        jobids_to_inquire = [str(c.get_job_id()) for c in calcs_to_inquire]

        # Open connection
        with t:
            s.set_transport(t)
            # TODO: Check if we are ok with filtering by job (to make this work,
            # I had to remove the check on the retval for getJobs,
            # because if the job has computed and is not in the output of
            # qstat, it gives a nonzero retval)

            # TODO: catch SchedulerError exception and do something
            # sensible (at least, skip this computer but continue with
            # following ones, and set a counter; set calculations to
            # UNKNOWN after a while?
            if s.get_feature('can_query_by_user'):
                found_jobs = s.getJobs(user="******", as_dict=True)
            else:
                found_jobs = s.getJobs(jobs=jobids_to_inquire, as_dict=True)

            # I update the status of jobs

            for c in calcs_to_inquire:
                try:
                    logger_extra = get_dblogger_extra(c)
                    t._set_logger_extra(logger_extra)

                    jobid = c.get_job_id()
                    if jobid is None:
                        execlogger.error("JobCalculation {} is WITHSCHEDULER "
                                         "but no job id was found!".format(
                                             c.pk),
                                         extra=logger_extra)
                        continue

                    # I check if the calculation to be checked (c)
                    # is in the output of qstat
                    if jobid in found_jobs:
                        # jobinfo: the information returned by
                        # qstat for this job
                        jobinfo = found_jobs[jobid]
                        execlogger.debug("Inquirying calculation {} (jobid "
                                         "{}): it has job_state={}".format(
                                             c.pk, jobid, jobinfo.job_state),
                                         extra=logger_extra)
                        # For the moment, FAILED is not defined
                        if jobinfo.job_state in [job_states.DONE
                                                 ]:  # , job_states.FAILED]:
                            computed.append(c)
                            try:
                                c._set_state(calc_states.COMPUTED)
                            except ModificationNotAllowed:
                                # Someone already set it, just skip
                                pass

                        ## Do not set the WITHSCHEDULER state multiple times,
                        ## this would raise a ModificationNotAllowed
                        # else:
                        # c._set_state(calc_states.WITHSCHEDULER)

                        c._set_scheduler_state(jobinfo.job_state)

                        c._set_last_jobinfo(jobinfo)
                    else:
                        execlogger.debug("Inquirying calculation {} (jobid "
                                         "{}): not found, assuming "
                                         "job_state={}".format(
                                             c.pk, jobid, job_states.DONE),
                                         extra=logger_extra)

                        # calculation c is not found in the output of qstat
                        computed.append(c)
                        c._set_scheduler_state(job_states.DONE)
                except Exception as e:
                    # TODO: implement a counter, after N retrials
                    # set it to a status that
                    # requires the user intervention
                    execlogger.warning("There was an exception for "
                                       "calculation {} ({}): {}".format(
                                           c.pk, e.__class__.__name__,
                                           e.message),
                                       extra=logger_extra)
                    continue

            for c in computed:
                try:
                    logger_extra = get_dblogger_extra(c)
                    try:
                        detailed_jobinfo = s.get_detailed_jobinfo(
                            jobid=c.get_job_id())
                    except NotImplementedError:
                        detailed_jobinfo = (
                            u"AiiDA MESSAGE: This scheduler does not implement "
                            u"the routine get_detailed_jobinfo to retrieve "
                            u"the information on "
                            u"a job after it has finished.")
                    last_jobinfo = c._get_last_jobinfo()
                    if last_jobinfo is None:
                        last_jobinfo = JobInfo()
                        last_jobinfo.job_id = c.get_job_id()
                        last_jobinfo.job_state = job_states.DONE
                    last_jobinfo.detailedJobinfo = detailed_jobinfo
                    c._set_last_jobinfo(last_jobinfo)
                except Exception as e:
                    execlogger.warning("There was an exception while "
                                       "retrieving the detailed jobinfo "
                                       "for calculation {} ({}): {}".format(
                                           c.pk, e.__class__.__name__,
                                           e.message),
                                       extra=logger_extra)
                    continue
                finally:
                    # Set the state to COMPUTED as the very last thing
                    # of this routine; no further change should be done after
                    # this, so that in general the retriever can just
                    # poll for this state, if we want to.
                    try:
                        c._set_state(calc_states.COMPUTED)
                    except ModificationNotAllowed:
                        # Someone already set it, just skip
                        pass

    return computed