コード例 #1
0
def _submit_jobs(jobs, home_address, temp_dir=DEFAULT_TEMP_DIR, white_list=None,
                 quiet=True):
    """
    Method used to send a list of jobs onto the cluster.
    :param jobs: list of jobs to be executed
    :type jobs: list of `Job`
    :param home_address: Full address (including IP and port) of JobMonitor on
                         submitting host. Running jobs will communicate with the
                         parent process at that address via ZMQ.
    :type home_address: str
    :param temp_dir: Local temporary directory for storing output for an
                     individual job.
    :type temp_dir: str
    :param white_list: List of acceptable nodes to use for scheduling job. If
                       None, all are used.
    :type white_list: list of str
    :param quiet: When true, do not output information about the jobs that have
                  been submitted.
    :type quiet: bool

    :returns: Session ID
    """
    with Session() as session:
        for job in jobs:
            # set job white list
            job.white_list = white_list

            # remember address of submission host
            job.home_address = home_address

            # append jobs
            _append_job_to_session(session, job, temp_dir=temp_dir, quiet=quiet)

        sid = session.contact
    return sid
コード例 #2
0
    def __exit__(self, exc_type, exc_value, exc_tb):
        '''
        Gracefully handle exceptions by terminating all jobs, and closing
        sockets.
        '''
        # Always close socket
        self.socket.close()

        # Clean up if we have a valid session
        if self.session_id is not None:
            with Session(self.session_id) as session:
                # If we encounter an exception, kill all jobs
                if exc_type is not None:
                    self.logger.info('Encountered %s, so killing all jobs.',
                                     exc_type.__name__)
                    # try to kill off all old jobs
                    try:
                        session.control(JOB_IDS_SESSION_ALL,
                                        JobControlAction.TERMINATE)
                    except InvalidJobException:
                        self.logger.debug("Could not kill all jobs for " +
                                          "session.", exc_info=True)

                # Get rid of job info to prevent memory leak
                try:
                    session.synchronize([JOB_IDS_SESSION_ALL], TIMEOUT_NO_WAIT,
                                        dispose=True)
                except ExitTimeoutException:
                    pass
コード例 #3
0
def test_with_session():
    """'with' statement works with Session"""
    with Session() as s:
        print(s.version)
        print(s.contact)
        print(s.drmsInfo)
        print(s.drmaaImplementation)
コード例 #4
0
def test_with_jt():
    """'with' statement works with JobTemplate"""
    s = Session()
    s.initialize()
    with s.createJobTemplate() as jt:
        jt.remoteCommand = 'sleep'
        jt.args = ['10']
        jid = s.runJob(jt)
        print(s.wait(jid))
    s.exit()
コード例 #5
0
def _resubmit(session_id, job, temp_dir):
    """
    Resubmit a failed job.

    :returns: ID of new job
    """
    logger = logging.getLogger(__name__)
    logger.info("starting resubmission process")

    if DRMAA_PRESENT:
        # append to session
        with Session(session_id) as session:
            # try to kill off old job
            try:
                session.control(job.id, JobControlAction.TERMINATE)
                logger.info("zombie job killed")
            except Exception:
                logger.error("Could not kill job with SGE id %s", job.id,
                             exc_info=True)
            # create new job
            _append_job_to_session(session, job, temp_dir=temp_dir)
    else:
        logger.error("Could not restart job because we're in local mode.")
コード例 #6
0
        return "sge"  # XXX: should probably change to GE
    elif "Platform LSF" in drms_info:  # includes "IBM Platform LSF"
        return "lsf"
    elif drms_info.startswith("SLURM"):
        return "slurm"
    # not sure what PBS and PBS Pro return here.
    elif drms_info.startswith("Torque"):
        return "pbs"
    else:
        msg = ("unsupported distributed resource management system: %s" %
               drms_info)
        raise ValueError(msg)


# non-reentrant code
with Session() as _session:
    driver_name = get_driver_name(_session)

driver = __import__(driver_name, globals(), locals(), [driver_name], 1)
JobTemplateFactory = driver.JobTemplateFactory
make_native_spec = driver.make_native_spec
get_job_max_query_lifetime = driver.get_job_max_query_lifetime


class RestartableJob(object):
    def __init__(self, session, job_tmpl_factory, global_mem_usage,
                 mem_usage_key):
        self.session = session
        self.job_tmpl_factory = job_tmpl_factory

        # last trial index tried
コード例 #7
0
ファイル: testmisc.py プロジェクト: xzy3/drmaa-python
 def setUp(self):
     self.s = Session()