Beispiel #1
0
 def setUp(self):
     self.jt = jt = Session.createJobTemplate()
     jt.remoteCommand = 'python'
     jt.args = ['-c', "print('hello from python!')"]
     if hasattr(self, 'jt_tweaks'):
         self.jt_tweaks()
     self.jid = Session.runJob(jt)
Beispiel #2
0
def test_with_jt():
    """'with' statement works with JobTemplate"""
    s = Session()
    s.initialize()
    with s.createJobTemplate() as jt:
        jt.remoteCommand = 'sleep'
        jt.args = ['10']
        jid = s.runJob(jt)
        print(s.wait(jid))
    s.exit()
Beispiel #3
0
    def __exit__(self, exc_type, exc_value, exc_tb):
        '''
        Gracefully handle exceptions by terminating all jobs, and closing
        sockets.
        '''
        # Always close socket
        self.socket.close()

        # Clean up if we have a valid session
        if self.session_id is not None:
            with Session(self.session_id) as session:
                # If we encounter an exception, kill all jobs
                if exc_type is not None:
                    self.logger.info('Encountered %s, so killing all jobs.',
                                     exc_type.__name__)
                    # try to kill off all old jobs
                    try:
                        session.control(JOB_IDS_SESSION_ALL,
                                        JobControlAction.TERMINATE)
                    except InvalidJobException:
                        self.logger.debug("Could not kill all jobs for " +
                                          "session.", exc_info=True)

                # Get rid of job info to prevent memory leak
                try:
                    session.synchronize([JOB_IDS_SESSION_ALL], TIMEOUT_NO_WAIT,
                                        dispose=True)
                except ExitTimeoutException:
                    pass
Beispiel #4
0
def test_with_session():
    """'with' statement works with Session"""
    with Session() as s:
        print(s.version)
        print(s.contact)
        print(s.drmsInfo)
        print(s.drmaaImplementation)
Beispiel #5
0
def _submit_jobs(jobs, home_address, temp_dir=DEFAULT_TEMP_DIR, white_list=None,
                 quiet=True):
    """
    Method used to send a list of jobs onto the cluster.
    :param jobs: list of jobs to be executed
    :type jobs: list of `Job`
    :param home_address: Full address (including IP and port) of JobMonitor on
                         submitting host. Running jobs will communicate with the
                         parent process at that address via ZMQ.
    :type home_address: str
    :param temp_dir: Local temporary directory for storing output for an
                     individual job.
    :type temp_dir: str
    :param white_list: List of acceptable nodes to use for scheduling job. If
                       None, all are used.
    :type white_list: list of str
    :param quiet: When true, do not output information about the jobs that have
                  been submitted.
    :type quiet: bool

    :returns: Session ID
    """
    with Session() as session:
        for job in jobs:
            # set job white list
            job.white_list = white_list

            # remember address of submission host
            job.home_address = home_address

            # append jobs
            _append_job_to_session(session, job, temp_dir=temp_dir, quiet=quiet)

        sid = session.contact
    return sid
Beispiel #6
0
 def test_wait(self):
     """waiting for job completion"""
     jinfo = Session.wait(self.jid)
     eq_(jinfo.jobId, self.jid)
     assert hasattr(jinfo, 'hasExited')
     assert hasattr(jinfo, 'hasExited') and type(jinfo.hasExited) is bool
     assert hasattr(jinfo, 'hasSignal') and type(jinfo.hasSignal) is bool
     assert hasattr(jinfo, 'terminatedSignal') and type(jinfo.terminatedSignal) is str
     assert hasattr(jinfo, 'hasCoreDump') and type(jinfo.hasCoreDump) is bool
     assert hasattr(jinfo, 'wasAborted') and type(jinfo.wasAborted) is bool
     assert hasattr(jinfo, 'exitStatus') and type(jinfo.exitStatus) is int
     assert hasattr(jinfo, 'resourceUsage') and type(jinfo.resourceUsage) is dict
Beispiel #7
0
 def test_control_terminate(self):
     """control/terminate works"""
     Session.control(self.jid, JobControlAction.TERMINATE)
     Session.synchronize(self.jid,
                         Session.TIMEOUT_WAIT_FOREVER,
                         False)
     try:
         Session.wait(self.jid, Session.TIMEOUT_WAIT_FOREVER)
     except Exception as e:
         assert e.args[0].startswith('code 24')  # no rusage
Beispiel #8
0
def _resubmit(session_id, job, temp_dir):
    """
    Resubmit a failed job.

    :returns: ID of new job
    """
    logger = logging.getLogger(__name__)
    logger.info("starting resubmission process")

    if DRMAA_PRESENT:
        # append to session
        with Session(session_id) as session:
            # try to kill off old job
            try:
                session.control(job.id, JobControlAction.TERMINATE)
                logger.info("zombie job killed")
            except Exception:
                logger.error("Could not kill job with SGE id %s", job.id,
                             exc_info=True)
            # create new job
            _append_job_to_session(session, job, temp_dir=temp_dir)
    else:
        logger.error("Could not restart job because we're in local mode.")
Beispiel #9
0
        return "sge"  # XXX: should probably change to GE
    elif "Platform LSF" in drms_info:  # includes "IBM Platform LSF"
        return "lsf"
    elif drms_info.startswith("SLURM"):
        return "slurm"
    # not sure what PBS and PBS Pro return here.
    elif drms_info.startswith("Torque"):
        return "pbs"
    else:
        msg = ("unsupported distributed resource management system: %s" %
               drms_info)
        raise ValueError(msg)


# non-reentrant code
with Session() as _session:
    driver_name = get_driver_name(_session)

driver = __import__(driver_name, globals(), locals(), [driver_name], 1)
JobTemplateFactory = driver.JobTemplateFactory
make_native_spec = driver.make_native_spec
get_job_max_query_lifetime = driver.get_job_max_query_lifetime


class RestartableJob(object):
    def __init__(self, session, job_tmpl_factory, global_mem_usage,
                 mem_usage_key):
        self.session = session
        self.job_tmpl_factory = job_tmpl_factory

        # last trial index tried
Beispiel #10
0
 def test_run_bulk(self):
     """run bulk job"""
     jids = Session.runBulkJobs(self.jt, 1, 2, 1)
Beispiel #11
0
 def test_environment(self):
     """environment variables are correctly passed to submitted jobs"""
     jinfo = Session.wait(self.jid)
     eq_(jinfo.jobId, self.jid)
     assert hasattr(jinfo, 'hasExited')
     assert hasattr(jinfo, 'exitStatus') and jinfo.exitStatus == 0
Beispiel #12
0
 def test_sync(self):
     """sync with a job"""
     Session.synchronize(self.jid)
Beispiel #13
0
def test_allocate():
    "job template allocation"
    jt = Session.createJobTemplate()
    Session.deleteJobTemplate(jt)
Beispiel #14
0
def teardown():
    "finalize DRMAA session"
    Session.exit()
Beispiel #15
0
def setup():
    "initialize DRMAA library"
    Session.initialize()
Beispiel #16
0
 def xtest_tmp(self):
     self.test_scalar_attributes()
     self.jt.args = ['.colordb']
     jid = Session.runJob(self.jt)
     jinfo = Session.wait(jid)
     print(jinfo)
Beispiel #17
0
 def setUp(self):
     self.jt = Session.createJobTemplate()
Beispiel #18
0
 def tearDown(self):
     Session.deleteJobTemplate(self.jt)
Beispiel #19
0
 def setUp(self):
     self.s = Session()