def _condorJobsInQueue(self, cdata): '''Returns the number of jobs still in the queue for a cluster.''' jobCount = None # condor_q -name q1@`hostname` -f "%d\n" ClusterID 11292 # We don't care if jobs are in the queue in the C or X state. So filter # those out with a constraint. if cdata.get('queue'): cmd = ['condor_q', '-name', cdata.get('queue'), '-f', '"%d\\n"', 'ClusterID', '-c', '"JobStatus != 3 && JobStatus != 4"', cdata.get('clusterid')] else: cmd = ['condor_q', '-f', '"%d\\n"', 'ClusterID', '-c', 'JobStatus != 3 && JobStatus != 4', cdata.get('clusterid')] logging.info('[cleaner] ...running: %s' % ' '.join(cmd)) (return_code, stdout_value, stderr_value) = util.runCommand2(' '.join(cmd)) # Case #8380: Job directories are being deleted when jobs remain in the queue # Pre Condor 7.2.2 it's not enough to just check the return code. Condor < 7.2.2 would often set # the return code to 0 and write error notes to stderr. So we have to check that stderr is # empty as well. if len(stderr_value) > 0: logging.error('[cleaner] ...got error running command: %s' % stderr_value) else: if return_code == 0: # Count the lines in the output that have the cluster ID in them # That's the number of jobs in the queue still. repat = re.compile(r"^\s*%s\s*" % cdata.get('clusterid'), re.M) matches = re.findall(repat, stdout_value) jobCount = len(matches) return jobCount
def doCondorSubmit(submitFile, queueName): clusterId = None # Change to the Condor directory (submission_directory, filename) = os.path.split(submitFile) logging.debug("CondorAgent.post_submit.doCondorSubmit(): submission file: %s" % submitFile) # TODO: make sure condor_submit is in the path and is available. submit_cmd = ['condor_submit'] if queueName: submit_cmd.append('-name') submit_cmd.append('%s' % queueName) # Case 7108: Add a new configuration option that allows users to pass along custom command # line arguments to insert in to the condor_submit call made by Condor Agent. # The syntax for the option is a comma-seperated list. With each value in the list being # an element in the command line argument additional_arguments = [] add_str = CondorAgent.util.getCondorConfigVal('CONDOR_AGENT_SUBMIT_PROXY_ADDITIONAL_ARGUMENTS') if add_str: # Add cleaned up versions of the arguments to our array additional_arguments = [i.strip() for i in string.split(add_str, ',')] if len(additional_arguments) > 0: logging.debug("CondorAgent.post_submit.doCondorSubmit(): Adding additional, user supplied arguments: %s" % ' '.join(additional_arguments)) submit_cmd.extend(additional_arguments) submit_cmd.append('%s' % submitFile) logging.debug("CondorAgent.post_submit.doCondorSubmit(): condor_submit command: %s" % ' '.join(submit_cmd)) # Set the umask to be liberal so files that get created can be edited by anyone current_umask = os.umask(0) try: (retcode, submit_out, submit_err) = util.runCommand2(cmd=' '.join(submit_cmd), cwd=submission_directory) except: # TODO: determine exact error condition check. Possible that there are still warnings we should log. logging.error("CondorAgent.post_submit.doCondorSubmit(): Unexpected error: %s, %s" % (sys.exc_info()[0], str(sys.exc_info()[1]))) if submit_out: logging.error("CondorAgent.post_submit.doCondorSubmit(): submit_out: %s" % submit_out) if submit_err: logging.error("CondorAgent.post_submit.doCondorSubmit(): submit_err: %s" % submit_err) os.umask(current_umask) cleanSubmissionDir(submission_directory) raise os.umask(current_umask) logging.debug("CondorAgent.post_submit.doCondorSubmit(): retcode: %d" % retcode) logging.debug("CondorAgent.post_submit.doCondorSubmit(): submit_out: %s" % submit_out) logging.debug("CondorAgent.post_submit.doCondorSubmit(): submit_err: %s" % submit_err) if retcode == 0: match = re.search("submitted to cluster (\\d+)", submit_out) if match == None: logging.error('CondorAgent.post_submit.doCondorSubmit(): Unable to parse submission details from condor_q output') cleanSubmissionDir(submission_directory) raise Exception("Failed to parse cluster id from output:\n%s" % submit_out) clusterId = match.group(1) else: # TODO: parse the error to figure out what happened. logging.error('CondorAgent.post_submit.doCondorSubmit(): Condor submission failed') cleanSubmissionDir(submission_directory) raise Exception("Failed to submit jobs to condor with error:\n%s" % submit_err) logging.info('CondorAgent.post_submit.doCondorSubmit(): Returning cluster ID: %s' % str(clusterId)) return clusterId
def _condorJobsInQueue(self, cdata): '''Returns the number of jobs still in the queue for a cluster.''' jobCount = None # condor_q -name q1@`hostname` -f "%d\n" ClusterID 11292 # We don't care if jobs are in the queue in the C or X state. So filter # those out with a constraint. if cdata.get('queue'): cmd = [ 'condor_q', '-name', cdata.get('queue'), '-f', '"%d\\n"', 'ClusterID', '-c', '"JobStatus != 3 && JobStatus != 4"', cdata.get('clusterid') ] else: cmd = [ 'condor_q', '-f', '"%d\\n"', 'ClusterID', '-c', 'JobStatus != 3 && JobStatus != 4', cdata.get('clusterid') ] logging.info('[cleaner] ...running: %s' % ' '.join(cmd)) (return_code, stdout_value, stderr_value) = util.runCommand2(' '.join(cmd)) # Case #8380: Job directories are being deleted when jobs remain in the queue # Pre Condor 7.2.2 it's not enough to just check the return code. Condor < 7.2.2 would often set # the return code to 0 and write error notes to stderr. So we have to check that stderr is # empty as well. if len(stderr_value) > 0: logging.error('[cleaner] ...got error running command: %s' % stderr_value) else: if return_code == 0: # Count the lines in the output that have the cluster ID in them # That's the number of jobs in the queue still. repat = re.compile(r"^\s*%s\s*" % cdata.get('clusterid'), re.M) matches = re.findall(repat, stdout_value) jobCount = len(matches) return jobCount
def testDoRunCommand2(self): (retval, out, err) = util.runCommand2("ls -al") self.assertEqual(0, retval) self.assertTrue(out != '') self.assertEqual('', err)