Example #1
0
def kill_calculation(calculation: CalcJobNode, transport: Transport) -> None:
    """
    Kill the calculation through the scheduler

    :param calculation: the instance of CalcJobNode to kill.
    :param transport: an already opened transport to use to address the scheduler
    """
    job_id = calculation.get_job_id()

    if job_id is None:
        # the calculation has not yet been submitted to the scheduler
        return

    # Get the scheduler plugin class and initialize it with the correct transport
    scheduler = calculation.computer.get_scheduler()
    scheduler.set_transport(transport)

    # Call the proper kill method for the job ID of this calculation
    result = scheduler.kill(job_id)

    if result is not True:

        # Failed to kill because the job might have already been completed
        running_jobs = scheduler.get_jobs(jobs=[job_id], as_dict=True)
        job = running_jobs.get(job_id, None)

        # If the job is returned it is still running and the kill really failed, so we raise
        if job is not None and job.job_state != JobState.DONE:
            raise exceptions.RemoteOperationError(
                f'scheduler.kill({job_id}) was unsuccessful')
        else:
            EXEC_LOGGER.warning(
                'scheduler.kill() failed but job<{%s}> no longer seems to be running regardless',
                job_id)
Example #2
0
def submit_calculation(calculation: CalcJobNode, transport: Transport) -> str:
    """Submit a previously uploaded `CalcJob` to the scheduler.

    :param calculation: the instance of CalcJobNode to submit.
    :param transport: an already opened transport to use to submit the calculation.
    :return: the job id as returned by the scheduler `submit_from_script` call
    """
    job_id = calculation.get_job_id()

    # If the `job_id` attribute is already set, that means this function was already executed once and the scheduler
    # submit command was successful as the job id it returned was set on the node. This scenario can happen when the
    # daemon runner gets shutdown right after accomplishing the submission task, but before it gets the chance to
    # finalize the state transition of the `CalcJob` to the `UPDATE` transport task. Since the job is already submitted
    # we do not want to submit it a second time, so we simply return the existing job id here.
    if job_id is not None:
        return job_id

    scheduler = calculation.computer.get_scheduler()
    scheduler.set_transport(transport)

    submit_script_filename = calculation.get_option('submit_script_filename')
    workdir = calculation.get_remote_workdir()
    job_id = scheduler.submit_from_script(workdir, submit_script_filename)
    calculation.set_job_id(job_id)

    return job_id