コード例 #1
0
def getjobexitcode(lsfJobID):
    job, task = lsfJobID

    # first try bjobs to find out job state
    args = ["bjobs", "-l", str(job)]
    logger.debug("Checking job exit code for job via bjobs: " + str(job))
    process = subprocess.Popen(" ".join(args),
                               shell=True,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.STDOUT)
    started = 0
    for line in process.stdout:
        if line.find("Done successfully") > -1:
            logger.debug("bjobs detected job completed for job: " + str(job))
            return 0
        elif line.find("Completed <exit>") > -1:
            logger.debug("bjobs detected job failed for job: " + str(job))
            return 1
        elif line.find("New job is waiting for scheduling") > -1:
            logger.debug("bjobs detected job pending scheduling for job: " +
                         str(job))
            return None
        elif line.find("PENDING REASONS") > -1:
            logger.debug("bjobs detected job pending for job: " + str(job))
            return None
        elif line.find("Started on ") > -1:
            started = 1

    if started == 1:
        logger.debug("bjobs detected job started but not completed: " +
                     str(job))
        return None

    # if not found in bjobs, then try bacct (slower than bjobs)
    logger.debug("bjobs failed to detect job - trying bacct: " + str(job))

    args = ["bacct", "-l", str(job)]
    logger.debug("Checking job exit code for job via bacct:" + str(job))
    process = subprocess.Popen(" ".join(args),
                               shell=True,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.STDOUT)
    for line in process.stdout:
        if line.find("Completed <done>") > -1:
            logger.debug("Detected job completed for job: " + str(job))
            return 0
        elif line.find("Completed <exit>") > -1:
            logger.debug("Detected job failed for job: " + str(job))
            return 1
    logger.debug("Cant determine exit code for job or job still running: " +
                 str(job))
    return None
コード例 #2
0
ファイル: toilKillTest.py プロジェクト: tmooney/toil
    def test_cwl_toil_kill(self):
        """Test "toil kill" on a CWL workflow with a 100 second sleep."""

        run_cmd = [
            'toil-cwl-runner', '--jobStore', self.jobstore, self.cwl, self.yaml
        ]
        kill_cmd = ['toil', 'kill', self.jobstore]

        cwl_process = subprocess.Popen(run_cmd)
        time.sleep(2)
        subprocess.Popen(kill_cmd, stderr=subprocess.PIPE)

        assert cwl_process.poll() == None
コード例 #3
0
        def getRunningJobIDs(self):
            times = {}
            with self.runningJobsLock:
                currentjobs = dict((str(self.batchJobIDs[x][0].strip()), x)
                                   for x in self.runningJobs)
            logger.debug("getRunningJobIDs current jobs are: " +
                         str(currentjobs))
            # Skip running qstat if we don't have any current jobs
            if not currentjobs:
                return times
            # Only query for job IDs to avoid clogging the batch system on heavily loaded clusters
            # PBS plain qstat will return every running job on the system.
            jobids = sorted(currentjobs.keys())
            if self._version == "pro":
                process = subprocess.Popen(['qstat', '-x'] + jobids,
                                           stdout=subprocess.PIPE)
            elif self._version == "oss":
                process = subprocess.Popen(['qstat'] + jobids,
                                           stdout=subprocess.PIPE)

            stdout, stderr = process.communicate()

            # qstat supports XML output which is more comprehensive, but PBSPro does not support it
            # so instead we stick with plain commandline qstat tabular outputs
            for currline in stdout.split('\n'):
                items = currline.strip().split()
                if items:
                    jobid = items[0].strip()
                    if jobid in currentjobs:
                        logger.debug("getRunningJobIDs job status for is: " +
                                     items[4])
                    if jobid in currentjobs and items[4] == 'R':
                        walltime = items[3]
                        logger.debug(
                            "getRunningJobIDs qstat reported walltime is: " +
                            walltime)
                        # normal qstat has a quirk with job time where it reports '0'
                        # when initially running; this catches this case
                        if walltime == '0':
                            walltime = time.mktime(
                                time.strptime(walltime, "%S"))
                        else:
                            walltime = time.mktime(
                                time.strptime(walltime, "%H:%M:%S"))
                        times[currentjobs[jobid]] = walltime

            logger.debug("Job times from qstat are: " + str(times))
            return times
コード例 #4
0
ファイル: slurm.py プロジェクト: MarkFilus/toil
 def _getJobDetailsFromSacct(self, slurmJobID):
     # SLURM job exit codes are obtained by running sacct.
     args = ['sacct',
             '-n', # no header
             '-j', str(slurmJobID), # job
             '--format', 'State,ExitCode', # specify output columns
             '-P', # separate columns with pipes
             '-S', '1970-01-01'] # override start time limit
     
     process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
     rc = process.returncode
     
     if rc != 0:
         # no accounting system or some other error
         return (None, -999)
     
     for line in process.stdout:
         values = line.strip().split('|')
         if len(values) < 2:
             continue
         state, exitcode = values
         logger.debug("sacct job state is %s", state)
         # If Job is in a running state, return None to indicate we don't have an update
         status, _ = exitcode.split(':')
         logger.debug("sacct exit code is %s, returning status %s", exitcode, status)
         return (state, int(status))
     logger.debug("Did not find exit code for job in sacct output")
     return None
コード例 #5
0
        def getJobExitCode(self, torqueJobID):
            if self._version == "pro":
                args = ["qstat", "-x", "-f", str(torqueJobID).split('.')[0]]
            elif self._version == "oss":
                args = ["qstat", "-f", str(torqueJobID).split('.')[0]]

            process = subprocess.Popen(args,
                                       stdout=subprocess.PIPE,
                                       stderr=subprocess.STDOUT)
            for line in process.stdout:
                line = line.strip()
                #logger.debug("getJobExitCode exit status: " + line)
                # Case differences due to PBSPro vs OSS Torque qstat outputs
                if line.startswith("failed") or line.startswith(
                        "FAILED") and int(line.split()[1]) == 1:
                    return 1
                if line.startswith("exit_status") or line.startswith(
                        "Exit_status"):
                    status = line.split(' = ')[1]
                    logger.debug('Exit Status: ' + status)
                    return int(status)
                if 'unknown job id' in line.lower():
                    # some clusters configure Torque to forget everything about just
                    # finished jobs instantly, apparently for performance reasons
                    logger.debug(
                        'Batch system no longer remembers about job {}'.format(
                            torqueJobID))
                    # return assumed success; status files should reveal failure
                    return 0
            return None
コード例 #6
0
 def _runParasol(self, command, autoRetry=True):
     """
     Issues a parasol command using popen to capture the output. If the command fails then it
     will try pinging parasol until it gets a response. When it gets a response it will
     recursively call the issue parasol command, repeating this pattern for a maximum of N
     times. The final exit value will reflect this.
     """
     command = list(concat(self.parasolCommand, command))
     while True:
         logger.debug('Running %r', command)
         process = subprocess.Popen(command,
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE,
                                    bufsize=-1)
         stdout, stderr = process.communicate()
         status = process.wait()
         for line in stderr.split('\n'):
             if line: logger.warn(line)
         if status == 0:
             return 0, stdout.split('\n')
         message = 'Command %r failed with exit status %i' % (command,
                                                              status)
         if autoRetry:
             logger.warn(message)
         else:
             logger.error(message)
             return status, None
         logger.warn('Waiting for a 10s, before trying again')
         time.sleep(10)
コード例 #7
0
    def killBatchJobs(self, jobIDs):
        self.killLocalJobs(jobIDs)
        """Kills the given job IDs.
        """
        for jobID in jobIDs:
            try:
                lsfID = self.getLsfID(jobID)
                logger.debug("DEL: " + str(lsfID))
                self.currentjobs.remove(jobID)
                subprocess.Popen(["bkill", lsfID])
                del self.jobIDs[self.lsfJobIDs[jobID]]
                del self.lsfJobIDs[jobID]
            except RuntimeError:
                jobIDs.remove(jobID)

        toKill = set(jobIDs)
        while len(toKill) > 0:
            for jobID in list(toKill):
                if getjobexitcode(self.lsfJobIDs[jobID]) is not None:
                    toKill.remove(jobID)

            if len(toKill) > 0:
                logger.warn(
                    "Tried to kill some jobs, but something happened and they are still going, "
                    "so I'll try again")
                time.sleep(5)
コード例 #8
0
ファイル: node.py プロジェクト: drjrm3/toil-lsf-bacct-fix
    def coreSSH(self, *args, **kwargs):
        """
        If strict=False, strict host key checking will be temporarily disabled.
        This is provided as a convenience for internal/automated functions and
        ought to be set to True whenever feasible, or whenever the user is directly
        interacting with a resource (e.g. rsync-cluster or ssh-cluster). Assumed
        to be False by default.

        kwargs: input, tty, appliance, collectStdout, sshOptions, strict
        """
        commandTokens = ['ssh', '-t']
        strict = kwargs.pop('strict', False)
        if not strict:
            kwargs['sshOptions'] = ['-oUserKnownHostsFile=/dev/null', '-oStrictHostKeyChecking=no'] \
                                 + kwargs.get('sshOptions', [])
        sshOptions = kwargs.pop('sshOptions', None)
        # Forward ports:
        # 3000 for Grafana dashboard
        # 9090 for Prometheus dashboard
        # 5050 for Mesos dashboard (although to talk to agents you will need a proxy)
        commandTokens.extend(['-L', '3000:localhost:3000', \
                              '-L', '9090:localhost:9090', \
                              '-L', '5050:localhost:5050'])
        if sshOptions:
            # add specified options to ssh command
            assert isinstance(sshOptions, list)
            commandTokens.extend(sshOptions)
        # specify host
        user = kwargs.pop('user', 'core')   # CHANGED: Is this needed?
        commandTokens.append('%s@%s' % (user,str(self.effectiveIP)))
        appliance = kwargs.pop('appliance', None)
        if appliance:
            # run the args in the appliance
            tty = kwargs.pop('tty', None)
            ttyFlag = '-t' if tty else ''
            commandTokens += ['docker', 'exec', '-i', ttyFlag, 'toil_leader']

        inputString = kwargs.pop('input', None)
        if inputString is not None:
            kwargs['stdin'] = subprocess.PIPE
        collectStdout = kwargs.pop('collectStdout', None)
        if collectStdout:
            kwargs['stdout'] = subprocess.PIPE
        kwargs['stderr'] = subprocess.PIPE

        logger.debug('Node %s: %s', self.effectiveIP, ' '.join(args))
        args = list(map(pipes.quote, args))
        commandTokens += args
        logger.debug('Full command %s', ' '.join(commandTokens))
        popen = subprocess.Popen(commandTokens, **kwargs)
        stdout, stderr = popen.communicate(input=inputString)
        # at this point the process has already exited, no need for a timeout
        resultValue = popen.wait()
        # ssh has been throwing random 255 errors - why?
        if resultValue != 0:
            logger.debug('SSH Error (%s) %s' % (resultValue, stderr))
            raise RuntimeError('Executing the command "%s" on the appliance returned a non-zero '
                               'exit code %s with stdout %s and stderr %s'
                               % (' '.join(args), resultValue, stdout, stderr))
        return stdout
コード例 #9
0
    def getRunningBatchJobIDs(self):
        """Gets a map of jobs (as jobIDs) currently running (not just waiting) 
        and a how long they have been running for (in seconds).
        """
        times = {}
        currentjobs = set()
        for x in self.getIssuedBatchJobIDs():
            if x in self.lsfJobIDs:
                currentjobs.add(self.lsfJobIDs[x])
            else:
                # not yet started
                pass
        process = subprocess.Popen(["bjobs"], stdout=subprocess.PIPE)

        for curline in process.stdout:
            items = curline.strip().split()
            if (len(items) > 9 and
                (items[0]) in currentjobs) and items[2] == 'RUN':
                jobstart = "/".join(items[7:9]) + '/' + str(date.today().year)
                jobstart = jobstart + ' ' + items[9]
                jobstart = time.mktime(
                    time.strptime(jobstart, "%b/%d/%Y %H:%M"))
                jobstart = time.mktime(
                    time.strptime(jobstart, "%m/%d/%Y %H:%M:%S"))
                times[self.jobIDs[(items[0])]] = time.time() - jobstart
        times.update(self.getRunningLocalJobIDs())
        return times
コード例 #10
0
ファイル: __init__.py プロジェクト: arostamianfar/toil
def needs_appliance(test_item):
    """
    Use as a decorator before test classes or methods to only run them if
    the Toil appliance Docker image is downloaded.
    """
    test_item = _mark_test('appliance', test_item)
    if os.getenv('TOIL_SKIP_DOCKER', '').lower() == 'true':
        return unittest.skip('Skipping docker test.')(test_item)
    if not which('docker'):
        return unittest.skip("Install docker to include this test.")(test_item)

    try:
        image = applianceSelf()
        stdout, stderr = subprocess.Popen(
            ['docker', 'inspect', '--format="{{json .RepoTags}}"', image],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE).communicate()
        if image in stdout.decode("utf-8"):
            return test_item
    except:
        pass

    return unittest.skip(
        f"Cannot find appliance {image}. Use 'make test' target to automatically build appliance, or "
        f"just run 'make push_docker' prior to running this test.")(test_item)
コード例 #11
0
def writeA(job, mkFile):
    '''Runs a program, and writes a string 'A' into A.txt using mkFile.py.'''
    job.fileStore.logToMaster('''writeA''')

    # temp folder for the run
    tempDir = job.fileStore.getLocalTempDir()

    # import files
    try:
        mkFile_fs = job.fileStore.readGlobalFile(mkFile[0],
                                                 userPath=os.path.join(
                                                     tempDir, mkFile[1]))
    except:
        mkFile_fs = os.path.join(tempDir, mkFile[1])

    # make a file (A.txt) and writes a string 'A' into it using 'mkFile.py'
    content = 'A'
    cmd = 'python' + ' ' + mkFile_fs + ' ' + 'A.txt' + ' ' + content
    this_process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
    this_process.wait()

    # get the output file and return it as a tuple of location + name
    output_filename = 'A.txt'
    output_file = job.fileStore.writeGlobalFile(output_filename)
    A1 = (output_file, output_filename)
    rvDict = {"A1": A1}
    return rvDict
コード例 #12
0
        def getJobExitCode(self, sgeJobID):
            # the task is set as part of the job ID if using getBatchSystemID()
            job, task = (sgeJobID, None)
            if '.' in sgeJobID:
                job, task = sgeJobID.split('.', 1)

            args = ["qacct", "-j", str(job)]

            if task is not None:
                args.extend(["-t", str(task)])

            logger.debug("Running %r", args)
            process = subprocess.Popen(args,
                                       stdout=subprocess.PIPE,
                                       stderr=subprocess.STDOUT)
            toil = False
            for line in process.stdout:
                if line.startswith("failed") and int(line.split()[1]) == 1:
                    return 1
                ####start####
                if line.startswith("jobname") and line.find("toil_job") != -1:
                    toil = True
                if toil and line.startswith("exit_status"):
                    logger.debug('Exit Status: %r', line.split()[1])
                    return int(line.split()[1])
                ####end####
                #elif line.startswith("exit_status"):
                #    logger.debug('Exit Status: %r', line.split()[1])
                #    return int(line.split()[1])
            return None
コード例 #13
0
    def _run(cls, command, *args, **kwargs):
        """
        Run a command. Convenience wrapper for subprocess.check_call and subprocess.check_output.

        :param str command: The command to be run.

        :param str args: Any arguments to be passed to the command.

        :param Any kwargs: keyword arguments for subprocess.Popen constructor. Pass capture=True
               to have the process' stdout returned. Pass input='some string' to feed input to the
               process' stdin.

        :rtype: None|str

        :return: The output of the process' stdout if capture=True was passed, None otherwise.
        """
        args = list(concat(command, args))
        log.info('Running %r', args)
        capture = kwargs.pop('capture', False)
        _input = kwargs.pop('input', None)
        if capture:
            kwargs['stdout'] = subprocess.PIPE
        if _input is not None:
            kwargs['stdin'] = subprocess.PIPE
        popen = subprocess.Popen(args, **kwargs)
        stdout, stderr = popen.communicate(input=_input)
        assert stderr is None
        if popen.returncode != 0:
            raise subprocess.CalledProcessError(popen.returncode, args)
        if capture:
            return stdout
コード例 #14
0
    def testGetPIDStatus(self):
        """Test that ToilStatus.getPIDStatus() behaves as expected."""
        wf = subprocess.Popen(self.sort_workflow_cmd)
        self.check_status('RUNNING', status_fn=ToilStatus.getPIDStatus)
        wf.wait()
        self.check_status('COMPLETED', status_fn=ToilStatus.getPIDStatus)

        os.remove(os.path.join(self.toilDir, 'pid.log'))
        self.check_status('QUEUED', status_fn=ToilStatus.getPIDStatus)
コード例 #15
0
 def run(self):
     command = self.parasolCommand()
     with self.lock:
         self.popen = subprocess.Popen(command)
     status = self.popen.wait()
     if status != 0 and status != -signal.SIGKILL:
         log.error("Command '%s' failed with %i.", command, status)
         raise subprocess.CalledProcessError(status, command)
     log.info('Exiting %s', self.__class__.__name__)
コード例 #16
0
 def checkExitCode(self, script):
     program = os.path.join(self.directory, "scripts", script)
     process = subprocess.Popen(["python", program, "file:my-jobstore", "--clean=always"],
                                stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     stdout, stderr = process.communicate()
     assert process.returncode == 0, stderr
     if isinstance(stdout, bytes):
         return stdout.decode('utf-8') + ' ' + stderr.decode('utf-8')
     return stdout + ' ' + stderr
コード例 #17
0
def bsub(bsubline):
    process = subprocess.Popen(" ".join(bsubline),
                               shell=True,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.STDOUT)
    liney = process.stdout.readline()
    logger.debug("BSUB: " + liney)
    result = int(liney.strip().split()[1].strip('<>'))
    logger.debug("Got the job id: %s" % (str(result)))
    return result
コード例 #18
0
 def submitJob(self, subLine):
     combinedEnv = self.boss.environment
     combinedEnv.update(os.environ)
     process = subprocess.Popen(subLine, stdout=subprocess.PIPE,
                                env=combinedEnv)
     line = process.stdout.readline()
     logger.debug("BSUB: " + line)
     result = int(line.strip().split()[1].strip('<>'))
     logger.debug("Got the job id: {}".format(result))
     return result
コード例 #19
0
    def _addToHosts(self, node, destinationIP=None):
        """
        Add an "privateIP hostname" line to the /etc/hosts file. If destinationIP is given,
        do this on the remote machine.

        Azure VMs sometimes fail to initialize, causing the appliance to fail.
        This error is given:
           Failed to obtain the IP address for 'l7d41a19b-15a6-442c-8ba1-9678a951d824';
           the DNS service may not be able to resolve it: Name or service not known.
        This method is a fix.

        :param node: Node to add to /etc/hosts.
        :param destinationIP: A remote host's address
        """
        cmd = "echo %s %s | sudo tee --append /etc/hosts > /dev/null" % (node.privateIP, node.name)
        logger.debug("Running command %s on %s" % (cmd, destinationIP))
        if destinationIP:
            subprocess.Popen(["ssh", "-oStrictHostKeyChecking=no", "core@%s" % destinationIP, cmd])
        else:
            subprocess.Popen(cmd, shell=True)
コード例 #20
0
    def testGetPIDStatus(self):
        """Test that ToilStatus.getPIDStatus() behaves as expected."""
        wf = subprocess.Popen(self.sort_workflow_cmd)
        self.check_status('RUNNING', status_fn=ToilStatus.getPIDStatus)
        wf.wait()
        self.check_status('COMPLETED', status_fn=ToilStatus.getPIDStatus)

        # TODO: we need to reach into the FileJobStore's files and
        # delete this shared file. We assume we know its internal layout.
        os.remove(os.path.join(self.toilDir, 'files/shared/pid.log'))
        self.check_status('QUEUED', status_fn=ToilStatus.getPIDStatus)
コード例 #21
0
 def testGetStatusSuccessfulCWLWF(self):
     """Test that ToilStatus.getStatus() behaves as expected with a successful CWL workflow."""
     cmd = [
         'toil-cwl-runner', '--jobStore', self.toilDir, '--clean=never',
         'src/toil/test/cwl/sorttool.cwl', '--reverse', '--input',
         'src/toil/test/cwl/whale.txt'
     ]
     wf = subprocess.Popen(cmd)
     self.check_status('RUNNING', status_fn=ToilStatus.getStatus)
     wf.wait()
     self.check_status('COMPLETED', status_fn=ToilStatus.getStatus)
コード例 #22
0
 def testGetStatusFailedCWLWF(self):
     """Test that ToilStatus.getStatus() behaves as expected with a failing CWL workflow."""
     # --badWorker is set to force failure.
     cmd = [
         'toil-cwl-runner', '--jobStore', self.toilDir, '--clean=never',
         '--badWorker=1', 'src/toil/test/cwl/sorttool.cwl', '--reverse',
         '--input', 'src/toil/test/cwl/whale.txt'
     ]
     wf = subprocess.Popen(cmd)
     self.check_status('RUNNING', status_fn=ToilStatus.getStatus)
     wf.wait()
     self.check_status('ERROR', status_fn=ToilStatus.getStatus)
コード例 #23
0
    def testGetStatusFailedToilWF(self):
        """
        Test that ToilStatus.getStatus() behaves as expected with a failing Toil workflow.

        While this workflow could be called by importing and evoking its main function, doing so would remove the
        opportunity to test the 'RUNNING' functionality of getStatus().
        """
        # --badWorker is set to force failure.
        wf = subprocess.Popen(self.sort_workflow_cmd + ['--badWorker=1'])
        self.check_status('RUNNING', status_fn=ToilStatus.getStatus)
        wf.wait()
        self.check_status('ERROR', status_fn=ToilStatus.getStatus)
コード例 #24
0
ファイル: executor.py プロジェクト: huyu335/toil_ref
        def runJob(job):
            """
            :type job: toil.batchSystems.mesos.ToilJob

            :rtype: subprocess.Popen
            """
            if job.userScript:
                job.userScript.register()
            log.debug("Invoking command: '%s'", job.command)
            with self.popenLock:
                return subprocess.Popen(job.command,
                                        preexec_fn=lambda: os.setpgrp(),
                                        shell=True, env=dict(os.environ, **job.environment))
コード例 #25
0
ファイル: singleMachine.py プロジェクト: diekhans/toil
 def _runWorker(self, jobCommand, jobID, environment):
     """
     Run the jobCommand using the worker and wait for it to finish.
     The worker is forked unless it is a '_toil_worker' job and
     debugWorker is True.
     """
     startTime = time.time()  # Time job is started
     if self.debugWorker and "_toil_worker" in jobCommand:
         # Run the worker without forking
         jobName, jobStoreLocator, jobStoreID = jobCommand.split()[
             1:]  # Parse command
         jobStore = Toil.resumeJobStore(jobStoreLocator)
         # TODO: The following does not yet properly populate self.runningJobs so it is not possible to kill
         # running jobs in forkless mode - see the "None" value in place of popen
         info = Info(time.time(), None, killIntended=False)
         try:
             self.runningJobs[jobID] = info
             try:
                 toil_worker.workerScript(
                     jobStore,
                     jobStore.config,
                     jobName,
                     jobStoreID,
                     redirectOutputToLogFile=not self.debugWorker
                 )  # Call the worker
             finally:
                 self.runningJobs.pop(jobID)
         finally:
             if not info.killIntended:
                 self.outputQueue.put((jobID, 0, time.time() - startTime))
     else:
         with self.popenLock:
             popen = subprocess.Popen(jobCommand,
                                      shell=True,
                                      env=dict(os.environ, **environment))
         info = Info(time.time(), popen, killIntended=False)
         try:
             self.runningJobs[jobID] = info
             try:
                 statusCode = popen.wait()
                 if statusCode != 0 and not info.killIntended:
                     log.error(
                         "Got exit code %i (indicating failure) "
                         "from job %s.", statusCode, self.jobs[jobID])
             finally:
                 self.runningJobs.pop(jobID)
         finally:
             if not info.killIntended:
                 self.outputQueue.put(
                     (jobID, statusCode, time.time() - startTime))
コード例 #26
0
        def getRunningJobIDs(self):
            times = {}
            with self.runningJobsLock:
                currentjobs = dict((str(self.batchJobIDs[x][0]), x) for x in self.runningJobs)
            process = subprocess.Popen(["qstat"], stdout=subprocess.PIPE)
            stdout, stderr = process.communicate()

            for currline in stdout.decode('utf-8').split('\n'):
                items = currline.strip().split()
                if items:
                    if items[0] in currentjobs and items[4] == 'r':
                        jobstart = " ".join(items[5:7])
                        jobstart = time.mktime(time.strptime(jobstart, "%m/%d/%Y %H:%M:%S"))
                        times[currentjobs[items[0]]] = time.time() - jobstart

            return times
コード例 #27
0
 def __enter__(self):
     with self.lock:
         image = applianceSelf()
         # Omitting --rm, it's unreliable, see https://github.com/docker/docker/issues/16575
         args = list(
             concat('docker', 'run',
                    '--entrypoint=' + self._entryPoint(), '--net=host',
                    '-i', '--name=' + self.containerName, [
                        '--volume=%s:%s' % mount
                        for mount in iteritems(self.mounts)
                    ], image, self._containerCommand()))
         log.info('Running %r', args)
         self.popen = subprocess.Popen(args)
     self.start()
     self.__wait_running()
     return self
コード例 #28
0
        def getRunningJobIDs(self):
            times = {}
            with self.runningJobsLock:
                currentjobs = dict(
                    (str(self.batchJobIDs[x][0]), x) for x in self.runningJobs)
            process = subprocess.Popen(
                ["bjobs", "-o", "jobid stat start_time delimiter='|'"],
                stdout=subprocess.PIPE)
            stdout, _ = process.communicate()

            for curline in stdout.split('\n'):
                items = curline.strip().split('|')
                if items[0] in currentjobs and items[1] == 'RUN':
                    jobstart = parse(items[2], default=datetime.now(tzlocal()))
                    times[currentjobs[items[0]]] = datetime.now(tzlocal()) \
                        - jobstart
            return times
コード例 #29
0
    def obtainSystemConstants(cls):
        p = subprocess.Popen(["lshosts"],
                             stdout=subprocess.PIPE,
                             stderr=subprocess.STDOUT)

        line = p.stdout.readline().decode('utf-8')
        items = line.strip().split()
        num_columns = len(items)
        cpu_index = None
        mem_index = None
        for i in range(num_columns):
            if items[i] == 'ncpus':
                cpu_index = i
            elif items[i] == 'maxmem':
                mem_index = i

        if cpu_index is None or mem_index is None:
            RuntimeError("lshosts command does not return ncpus or maxmem "
                         "columns")

        # p.stdout.readline().decode('utf-8')

        maxCPU = 0
        maxMEM = MemoryString("0")
        for line in p.stdout:
            split_items = line.strip().split()
            items = [
                item.decode('utf-8') for item in split_items
                if isinstance(item, bytes)
            ]
            if len(items) < num_columns:
                RuntimeError("lshosts output has a varying number of "
                             "columns")
            if items[cpu_index] != '-' and int(items[cpu_index]) > int(maxCPU):
                maxCPU = items[cpu_index]
            if (items[mem_index] != '-'
                    and MemoryString(items[mem_index]) > maxMEM):
                maxMEM = MemoryString(items[mem_index])

        if maxCPU is 0 or maxMEM is 0:
            RuntimeError("lshosts returns null ncpus or maxmem info")
        logger.debug("Got the maxMEM: {}".format(maxMEM))
        logger.debug("Got the maxCPU: {}".format(maxCPU))

        return maxCPU, maxMEM
コード例 #30
0
ファイル: utilsTest.py プロジェクト: MarkFilus/toil
    def testGetPIDStatus(self):
        """Test that ToilStatus.getPIDStatus() behaves as expected."""
        jobstoreName = 'pidStatusTest'
        jobstoreLoc = os.path.join(os.getcwd(), jobstoreName)

        cmd = [
            'python', '-m', 'toil.test.sort.sort', 'file:' + jobstoreName,
            '--clean', 'never'
        ]
        wf = subprocess.Popen(cmd)
        time.sleep(
            2)  # Need to let jobstore be created before checking its contents.
        self.assertEqual(ToilStatus.getPIDStatus(jobstoreLoc), 'RUNNING')
        wf.wait()
        self.assertEqual(ToilStatus.getPIDStatus(jobstoreLoc), 'COMPLETED')
        os.remove(os.path.join(jobstoreLoc, 'pid.log'))
        self.assertEqual(ToilStatus.getPIDStatus(jobstoreLoc), 'QUEUED')
        shutil.rmtree(jobstoreLoc)