Example #1
0
File: jobs.py Project: Atemia/pyjip
def hold(job, clean_job=False, clean_logs=False, hold_children=True):
    """Hold the given job make sure its no longer on the cluster.
    The function takes only jobs that are in active state and takes
    care of the cancellation of any children.

    :param job: the job
    :param clean_logs: if True, the job log files will be deleted
    :param clean_job: if True, the job results will be removed
    :param silent: if False, the method will print status messages
    """
    if not job.state in db.STATES_ACTIVE:
        return False

    log.info("Holding job: %s-%d", str(job), job.id)
    set_state(job, db.STATE_HOLD, cleanup=clean_job)
    db.update_job_states(get_group_jobs(job))

    if len(job.pipe_from) == 0:
        cluster = jip.cluster.get()
        cluster.cancel(job)

    if clean_logs:
        clean(job)
    if hold_children:
        # cancel children
        for child in job.children:
            hold(child,
                 clean_job=clean_job,
                 clean_logs=clean_logs,
                 hold_children=hold_children)
    return True
Example #2
0
def hold(job, clean_job=False, clean_logs=False, hold_children=True):
    """Hold the given job make sure its no longer on the cluster.
    The function takes only jobs that are in active state and takes
    care of the cancellation of any children.

    :param job: the job
    :param clean_logs: if True, the job log files will be deleted
    :param clean_job: if True, the job results will be removed
    :param silent: if False, the method will print status messages
    """
    if not job.state in db.STATES_ACTIVE:
        return False

    log.info("Holding job: %s-%d", str(job), job.id)
    set_state(job, db.STATE_HOLD, cleanup=clean_job)
    db.update_job_states(get_group_jobs(job))

    if len(job.pipe_from) == 0:
        cluster = jip.cluster.get()
        cluster.cancel(job)

    if clean_logs:
        clean(job)
    if hold_children:
        # cancel children
        for child in job.children:
            hold(child, clean_job=clean_job,
                 clean_logs=clean_logs, hold_children=hold_children)
    return True
Example #3
0
    def handle_signal(signum, frame):
        log.warn("Signal %s received, going to fail state", signum)

        # Some signal arrived and maybe the job object is detached,
        # we need to re-create the session and re-attach the object
        from sqlalchemy import inspect
        insp = inspect(job)
        if insp.detached:
            session = jip.db.create_session()
            session.add(job)

        set_state(job, jip.db.STATE_FAILED, check_state=save)
        if save:
            db.update_job_states([job] + job.pipe_to)
        sys.exit(1)
Example #4
0
File: jobs.py Project: Atemia/pyjip
    def handle_signal(signum, frame):
        log.warn("Signal %s received, going to fail state", signum)

        # Some signal arrived and maybe the job object is detached,
        # we need to re-create the session and re-attach the object
        from sqlalchemy import inspect
        insp = inspect(job)
        if insp.detached:
            session = jip.db.create_session()
            session.add(job)

        set_state(job, jip.db.STATE_FAILED, check_state=save)
        if save:
            db.update_job_states([job] + job.pipe_to)
        sys.exit(1)
Example #5
0
File: jobs.py Project: Atemia/pyjip
def cancel(job,
           clean_job=False,
           clean_logs=False,
           cluster=None,
           save=False,
           cancel_children=True):
    """Cancel the given job and make sure its no longer on the cluster.

    The function takes only jobs that are in active state and takes
    care of the cancellation of any children.

    :param job: the job
    :type job: `jip.db.Job`
    :param clean_logs: if True, the job log files will be deleted
    :param clean_job: if True, the job results will be removed
    :param cluster: if not Cluster is specified and this is the parent
                    job in a group, the default cluster is loaded
    :param save: if True, save job in database after state change
    :param cancel_children: set this to False to disable canceling children of
                            a given job

    :returns: True if job was canceled
    """
    if not job.state in db.STATES_ACTIVE and job.state != db.STATE_CANCELED:
        return False
    log.info("Canceling job: %s-%d", str(job), job.id)
    set_state(job, db.STATE_CANCELED, cleanup=clean_job)
    if save:
        db.update_job_states(job)

    # cancel the job on the cluster if this is a parent job
    if len(job.pipe_from) == 0:
        cluster = jip.cluster.get() if not cluster else cluster
        cluster.cancel(job)

    if clean_logs:
        clean(job)

    # cancel children
    if cancel_children:
        for child in job.children:
            cancel(child,
                   clean_job=clean_job,
                   clean_logs=clean_logs,
                   cluster=cluster,
                   save=save,
                   cancel_children=cancel_children)
    return True
Example #6
0
def cancel(job, clean_job=False, clean_logs=False, cluster=None, save=False,
           cancel_children=True
           ):

    """Cancel the given job and make sure its no longer on the cluster.

    The function takes only jobs that are in active state and takes
    care of the cancellation of any children.

    :param job: the job
    :type job: `jip.db.Job`
    :param clean_logs: if True, the job log files will be deleted
    :param clean_job: if True, the job results will be removed
    :param cluster: if not Cluster is specified and this is the parent
                    job in a group, the default cluster is loaded
    :param save: if True, save job in database after state change
    :param cancel_children: set this to False to disable canceling children of
                            a given job

    :returns: True if job was canceled
    """
    if not job.state in db.STATES_ACTIVE and job.state != db.STATE_CANCELED:
        return False
    log.info("Canceling job: %s-%d", str(job), job.id)
    set_state(job, db.STATE_CANCELED, cleanup=clean_job)
    if save:
        db.update_job_states(job)

    # cancel the job on the cluster if this is a parent job
    if len(job.pipe_from) == 0:
        cluster = jip.cluster.get() if not cluster else cluster
        cluster.cancel(job)

    if clean_logs:
        clean(job)

    # cancel children
    if cancel_children:
        for child in job.children:
            cancel(child, clean_job=clean_job,
                   clean_logs=clean_logs, cluster=cluster, save=save,
                   cancel_children=cancel_children)
    return True
Example #7
0
def run_job(job, save=False, profiler=False, submit_embedded=False, closeDB=False):
    """Execute the given job. This method returns immediately in case the
    job has a pipe source. Otherwise the job and all its dispatch jobs are
    executed.

    NOTE that the run method creates a signal handler that sets the given
    job state to failed in case the jobs process is terminated by a signal.

    :param job: the job to run. Note the jobs with pipe sources are ignored
    :type job: `jip.db.Job`
    :param save: if True the jobs state changes are persisted in the database
    :param profiler: if set to True, job profiling is enabled
    :param submit_embedded: if True, embedded pipelines will be submitted and
                            not executed directly
    :returns: True if the job was executed successfully
    :rtype: boolean
    """
    if len(job.pipe_from) > 0:
        return
    # setup signal handling
    _setup_signal_handler(job, save=save)

    # create the dispatcher graph
    dispatcher_nodes = jip.executils.create_dispatcher_graph(job)
    log.info("%s | Dispatch graph: %s", job, dispatcher_nodes)
    # load job environment
    env = job.env
    if env is not None:
        for k, v in env.iteritems():
            log.info("Loading job environment %s:%s", k, v)
            os.environ[k] = str(v)

    # Issue #37
    # make sure working directories exist at submission time
    if not os.path.exists(job.working_directory):
        os.makedirs(job.working_directory)
    for child in job.pipe_to:
        if not os.path.exists(child.working_directory):
            os.makedirs(child.working_directory)

    # Execute the commands
    for dispatcher_node in dispatcher_nodes:
        dispatcher_node.run(profiler=profiler)

    all_jobs = get_group_jobs(job)
    if save:
        # save the update job state
        db.update_job_states(all_jobs)

    success = True

    # Close the DB connection for the execution of the commands,
    # the job object gets the detached state
    session=jip.db.create_session()
    jip.db.commit_session(session)
    session.close()

    # we collect the state of all jobs in the dispatcher first
    # a single failure will cause ALL nodes/jobs in that dispatcher
    # to be marked as failed
    for dispatcher_node in reversed(dispatcher_nodes):
        success &= dispatcher_node.wait()

    # The commands finished their execution, re-attach the job object
    session = jip.db.create_session()
    session.add(job)

    # get the new state and update all jobs
    new_state = db.STATE_DONE if success else db.STATE_FAILED
    for dispatcher_node in reversed(dispatcher_nodes):
        for job in dispatcher_node.sources:
            jip.jobs.set_state(job, new_state, update_children=False)

    if save:
        # save the update job state at the end of the run
        db.update_job_states(all_jobs)

    # handle embedded pipelines and callables
    if job.on_success and success:
        for element in job.on_success:
            if isinstance(element, jip.pipelines.Pipeline):
                ## run or submit embedded pipeline
                # Create a base profile for the embedded job
                # that is based on the current jobs profile
                profile = jip.profiles.Profile.from_job(job)
                # glob the inputs
                for n in element.nodes():
                    n._tool.options.glob_inputs()
                # TODO: handle the other paramters (i.e. profile, keep)
                # TODO: catch exception and make the job fail
                jobs = create_jobs(element, profile=profile)
                # add dependency to this job
                for j in jobs:
                    j.dependencies.append(job)
                for exe in create_executions(jobs, save=submit_embedded):
                    if not submit_embedded:
                        success &= run_job(exe.job, save=save)
                    else:
                        submit_job(exe.job)
    return success
Example #8
0
def submit_job(job, clean=False, force=False, save=True,
               cluster=None):
    """Submit the given job to the cluster. This only submits jobs that are not
    `DONE`. The job has to be in `canceled`, `failed`, `queued`,
    or `hold` state to be submitted, unless `force` is set to True. This will
    NOT submit the child jobs. You have to submit the children
    yourself and ensure you do that in proper order.

    If job submission is forced and a job is in active state, the job
    is canceled first to ensure there is only a single instance of the
    job on the cluster.

    You have to set save to True in order to save the jobs after
    successful submission. This will use :py:meth:`jip.db.create_session` to
    get a session instance.

    If no cluster is specified, :py:func:`jip.cluster.get` is used to load
    the default cluster. This will raise a
    ``jip.cluster.ClusterImplementationError`` in case no compute cluster is
    configured.

    :param job: the job to be submitted
    :param clean: if True, the job log files will be submitted
    :param force: force job submission
    :param save: if True, job will be saved to the database
    :param cluster: the compute cluster instance. If ``None``, the default
                    cluster will be loaded from the jip configuration
    :returns: True if the job was submitted
    :raises jip.cluster.ClusterImplementationError: if no cluster could be
                                                    loaded
    """
    log.info("(Re)submitting %s", job)
    if not force and job.state == db.STATE_DONE:
        return False
    if len(job.pipe_from) != 0:
        return False

    cluster = cluster if cluster else jip.cluster.get()
    # cancel or clean the job
    if job.state in db.STATES_ACTIVE:
        cancel(job, clean_logs=True, cluster=cluster, cancel_children=False)
    elif clean:
        jip.jobs.clean(job, cluster=cluster)

    # set the job state
    set_state(job, db.STATE_QUEUED, update_children=True)

    if job.id is None:
        if not save:
            raise Exception("No ID assigned to your job! You have to enable "
                            "database save with save=True to store the "
                            "job and get an ID.")
        session = db.create_session()
        session.add(job)
        session = db.commit_session(session)
        session.close()

    # Issue #12
    # we have to make sure that log file folders exist
    # otherwise job submission might succeed but nothing
    # will be executed and the job failes silently without log files
    for log_file in (job.stdout, job.stderr):
        if not log_file:
            continue
        parent = os.path.dirname(log_file)
        if not parent:
            continue
        if not os.path.exists(parent):
            os.makedirs(parent)

    # Issue #37
    # make sure working directories exist at submission time
    if not os.path.exists(job.working_directory):
        os.makedirs(job.working_directory)
    for child in job.pipe_to:
        if not os.path.exists(child.working_directory):
            os.makedirs(child.working_directory)

    # submit the job
    cluster.submit(job)
    all_jobs = [job]

    # update child ids
    def _set_id(child):
        all_jobs.append(child)
        child.job_id = job.job_id
        for c in child.pipe_to:
            _set_id(c)
    map(_set_id, job.pipe_to)

    if save:
        # save updates to job_id and dates for all_jobs
        db.update_job_states(all_jobs)
    return True
Example #9
0
File: jobs.py Project: Poshi/pyjip
 def handle_signal(signum, frame):
     log.warn("Signal %s received, going to fail state", signum)
     set_state(job, jip.db.STATE_FAILED, check_state=save)
     if save:
         db.update_job_states([job] + job.pipe_to)
     sys.exit(1)
Example #10
0
File: jobs.py Project: Atemia/pyjip
def run_job(job,
            save=False,
            profiler=False,
            submit_embedded=False,
            closeDB=False):
    """Execute the given job. This method returns immediately in case the
    job has a pipe source. Otherwise the job and all its dispatch jobs are
    executed.

    NOTE that the run method creates a signal handler that sets the given
    job state to failed in case the jobs process is terminated by a signal.

    :param job: the job to run. Note the jobs with pipe sources are ignored
    :type job: `jip.db.Job`
    :param save: if True the jobs state changes are persisted in the database
    :param profiler: if set to True, job profiling is enabled
    :param submit_embedded: if True, embedded pipelines will be submitted and
                            not executed directly
    :returns: True if the job was executed successfully
    :rtype: boolean
    """
    if len(job.pipe_from) > 0:
        return
    # setup signal handling
    _setup_signal_handler(job, save=save)

    # create the dispatcher graph
    dispatcher_nodes = jip.executils.create_dispatcher_graph(job)
    log.info("%s | Dispatch graph: %s", job, dispatcher_nodes)
    # load job environment
    env = job.env
    if env is not None:
        for k, v in env.iteritems():
            log.info("Loading job environment %s:%s", k, v)
            os.environ[k] = str(v)

    # Issue #37
    # make sure working directories exist at submission time
    if not os.path.exists(job.working_directory):
        os.makedirs(job.working_directory)
    for child in job.pipe_to:
        if not os.path.exists(child.working_directory):
            os.makedirs(child.working_directory)

    # Execute the commands
    for dispatcher_node in dispatcher_nodes:
        dispatcher_node.run(profiler=profiler)

    all_jobs = get_group_jobs(job)
    if save:
        # save the update job state
        db.update_job_states(all_jobs)

    success = True

    # Close the DB connection for the execution of the commands,
    # the job object gets the detached state
    session = jip.db.create_session()
    jip.db.commit_session(session)
    session.close()

    # we collect the state of all jobs in the dispatcher first
    # a single failure will cause ALL nodes/jobs in that dispatcher
    # to be marked as failed
    for dispatcher_node in reversed(dispatcher_nodes):
        success &= dispatcher_node.wait()

    # The commands finished their execution, re-attach the job object
    session = jip.db.create_session()
    session.add(job)

    # get the new state and update all jobs
    new_state = db.STATE_DONE if success else db.STATE_FAILED
    for dispatcher_node in reversed(dispatcher_nodes):
        for job in dispatcher_node.sources:
            jip.jobs.set_state(job, new_state, update_children=False)

    if save:
        # save the update job state at the end of the run
        db.update_job_states(all_jobs)

    # handle embedded pipelines and callables
    if job.on_success and success:
        for element in job.on_success:
            if isinstance(element, jip.pipelines.Pipeline):
                ## run or submit embedded pipeline
                # Create a base profile for the embedded job
                # that is based on the current jobs profile
                profile = jip.profiles.Profile.from_job(job)
                # glob the inputs
                for n in element.nodes():
                    n._tool.options.glob_inputs()
                # TODO: handle the other paramters (i.e. profile, keep)
                # TODO: catch exception and make the job fail
                jobs = create_jobs(element, profile=profile)
                # add dependency to this job
                for j in jobs:
                    j.dependencies.append(job)
                for exe in create_executions(jobs, save=submit_embedded):
                    if not submit_embedded:
                        success &= run_job(exe.job, save=save)
                    else:
                        submit_job(exe.job)
    return success
Example #11
0
File: jobs.py Project: Atemia/pyjip
def submit_job(job, clean=False, force=False, save=True, cluster=None):
    """Submit the given job to the cluster. This only submits jobs that are not
    `DONE`. The job has to be in `canceled`, `failed`, `queued`,
    or `hold` state to be submitted, unless `force` is set to True. This will
    NOT submit the child jobs. You have to submit the children
    yourself and ensure you do that in proper order.

    If job submission is forced and a job is in active state, the job
    is canceled first to ensure there is only a single instance of the
    job on the cluster.

    You have to set save to True in order to save the jobs after
    successful submission. This will use :py:meth:`jip.db.create_session` to
    get a session instance.

    If no cluster is specified, :py:func:`jip.cluster.get` is used to load
    the default cluster. This will raise a
    ``jip.cluster.ClusterImplementationError`` in case no compute cluster is
    configured.

    :param job: the job to be submitted
    :param clean: if True, the job log files will be submitted
    :param force: force job submission
    :param save: if True, job will be saved to the database
    :param cluster: the compute cluster instance. If ``None``, the default
                    cluster will be loaded from the jip configuration
    :returns: True if the job was submitted
    :raises jip.cluster.ClusterImplementationError: if no cluster could be
                                                    loaded
    """
    log.info("(Re)submitting %s", job)
    if not force and job.state == db.STATE_DONE:
        return False
    if len(job.pipe_from) != 0:
        return False

    cluster = cluster if cluster else jip.cluster.get()
    # cancel or clean the job
    if job.state in db.STATES_ACTIVE:
        cancel(job, clean_logs=True, cluster=cluster, cancel_children=False)
    elif clean:
        jip.jobs.clean(job, cluster=cluster)

    # set the job state
    set_state(job, db.STATE_QUEUED, update_children=True)

    if job.id is None:
        if not save:
            raise Exception("No ID assigned to your job! You have to enable "
                            "database save with save=True to store the "
                            "job and get an ID.")
        session = db.create_session()
        session.add(job)
        session = db.commit_session(session)
        session.close()

    # Issue #12
    # we have to make sure that log file folders exist
    # otherwise job submission might succeed but nothing
    # will be executed and the job failes silently without log files
    for log_file in (job.stdout, job.stderr):
        if not log_file:
            continue
        parent = os.path.dirname(log_file)
        if not parent:
            continue
        if not os.path.exists(parent):
            os.makedirs(parent)

    # Issue #37
    # make sure working directories exist at submission time
    if not os.path.exists(job.working_directory):
        os.makedirs(job.working_directory)
    for child in job.pipe_to:
        if not os.path.exists(child.working_directory):
            os.makedirs(child.working_directory)

    # submit the job
    cluster.submit(job)
    all_jobs = [job]

    # update child ids
    def _set_id(child):
        all_jobs.append(child)
        child.job_id = job.job_id
        for c in child.pipe_to:
            _set_id(c)

    map(_set_id, job.pipe_to)

    if save:
        # save updates to job_id and dates for all_jobs
        db.update_job_states(all_jobs)
    return True