Esempio n. 1
0
def getListOfJobStates(jobIds, username):
    jobStates = []

    with guard:
        for i in range(len(jobs)):
            if not jobIds or jobs[i].jobId in jobIds:
                jobStates.append(
                    JobStatus(
                        jobs[i].jobId, JobStatus.kRunning
                        if jobs[i].running else JobStatus.kWaiting))

    return jobStates
Esempio n. 2
0
def getListOfJobStates(jobName, username = None, detailed = True):
	if detailed:
		command = "llq -u `whoami` -m -x"
	else:
		command = "llq -u `whoami` -m"
	(returncode, stdout, stderr) = batchelor.runCommand(command)
	if returncode != 0:
		raise batchelor.BatchelorException("llq failed (stderr: '" + stderr + "')")
	jobList = []
	jobStates = []
	currentJobId = -1
	currentJobStatus = None;
	for line in stdout.split('\n'):
		line = line.rstrip('\n')
		if line.startswith("===== Job Step mgmt."):
			try:
				currentJobId = int(line[line.find(".")+1:line.rfind(".")])
				currentJobStatus = JobStatus(currentJobId)
			except ValueError:
				raise batchelor.BatchelorException("parsing of llq output to get job id failed.")
		line = ' '.join(line.split())

		if line.startswith("Job Name: "):
			if currentJobId < 0:
				raise batchelor.BatchelorException("parsing of llq output failed, got job name before job id.")
			name = line[10:]
			if name == jobName or jobName == None:
				jobList.append(currentJobId)
				jobStates.append(currentJobStatus)
		elif line.startswith("Step Virtual Memory: "):
			if currentJobId < 0:
				raise batchelor.BatchelorException("parsing of llq output failed, got job name before job id.")
			try:
				parsed = line.lstrip().lstrip('Step Virtual Memory:').split()
				currentJobStatus.setMemoryUsage( float(parsed[0]) * _kMemoryUnits[parsed[1]], 0)
			except ValueError:
				raise batchelor.BatchelorException("parsing of llq output to get job id failed.")
		elif line.startswith("Status: "):
			if currentJobId < 0:
				raise batchelor.BatchelorException("parsing of llq output failed, got job name before job id.")
			else:
				status = line.lstrip().lstrip("Status: ")
				currentJobStatus.setStatus(JobStatus.kUnknown, name = status)
				if status == 'Running':
					currentJobStatus.setStatus(JobStatus.kRunning)
				elif status == 'I' or status == 'Idle' or status == 'Pending':
					currentJobStatus.setStatus(JobStatus.kWaiting)
				elif status == 'Submission Error' or status == 'Terminated' or status == 'Removed' or status == 'Remove Pending':
					currentJobStatus.setStatus(JobStatus.kError)

		elif line.startswith("Step User Time: "):
			if currentJobId < 0:
				raise batchelor.BatchelorException("parsing of llq output failed, got job name before job id.")
			time_str = line.lstrip().lstrip("Step User Time:").split(':')
			try:
				hours = float(time_str[0])
				minuts = float(time_str[1])
				seconds = float(time_str[2])
				total_time = hours + minuts / 60.0 + seconds / 3600.0
				currentJobStatus.setCpuTime(total_time, 0)
			except ValueError:
				raise batchelor.BatchelorException("parsing of llq output to get job id failed.")
	
	return jobStates
Esempio n. 3
0
def getListOfJobStates(select_jobIDs, username):

    # get list of all jobs
    if username == None:
        command = "qstat"
    else:
        command = "qstat -u {0}".format(username)

    (returncode, stdout, stderr) = batchelor.runCommand(command)

    if returncode != 0:
        raise batchelor.BatchelorException("qstat failed (stderr: '" + stderr +
                                           "')")

    if stdout == "":
        return []

    jobList = stdout.split('\n')[2:]

    try:
        jobIDs = [int(job.split()[0]) for job in jobList]
        jobStates = [job.split()[4] for job in jobList]
    except ValueError:
        raise batchelor.BatchelorException(
            "parsing of qstat output to get job id failed.")

    list_of_states = []

    for i, jobID in enumerate(jobIDs):
        if select_jobIDs == None or jobID in select_jobIDs:
            job_status = JobStatus(jobID)
            job_status.setStatus(JobStatus.kUnknown, name=jobStates[i])

            if jobStates[i] == 'qw' or jobStates[i] == 'hqw':
                job_status.setStatus(JobStatus.kWaiting)

            elif jobStates[i] == 't':
                job_status.setStatus(JobStatus.kTransmitting)

            elif jobStates[i] == 'd' or jobStates[i] == 'dr' or jobStates[
                    i] == 'dt':
                job_status.setStatus(JobStatus.kDeletion)

            elif jobStates[i] == 'Eq':
                job_status.setStatus(JobStatus.kError)

            elif jobStates[i] == 'r' or jobStates[i] == 'hr':

                # get detailed job information
                command = "qstat -xml -j {0}".format(jobID)
                (returncode, stdout, stderr) = batchelor.runCommand(command)
                if returncode != 0:
                    raise batchelor.BatchelorException(
                        "qstat failed (stderr: '" + stderr + "')")
                elif 'unknown_jobs' in stdout:
                    continue
                    # the job has been ended between the qstat command and now
                else:
                    try:
                        root = ElementTree.fromstring(stdout)
                        for child in root[0]:
                            for task in child.findall('JB_ja_tasks'):
                                for sublist in task.findall('ulong_sublist'):
                                    task_number = sublist.findall(
                                        'JAT_task_number')
                                    if task_number:
                                        task_number = int(task_number[0].text)
                                        job_status.setStatus(
                                            JobStatus.kRunning)
                                        for usage_list in sublist.findall(
                                                'JAT_scaled_usage_list'):
                                            for scaled in usage_list.findall(
                                                    'scaled'):
                                                name = scaled.findall(
                                                    'UA_name')[0].text
                                                value = scaled.findall(
                                                    'UA_value')[0].text
                                                if name == 'cpu':
                                                    job_status.setCpuTime(
                                                        float(value) / 3600.0,
                                                        task_number)
                                                elif name == 'vmem':
                                                    job_status.setMemoryUsage(
                                                        float(value) /
                                                        (1024.0)**3,
                                                        task_number)
                    except xml.etree.ElementTree.ParseError as e:
                        raise batchelor.BatchelorException(
                            "xml-parser could not parse output of qstat -xml -j {0}: {1}"
                            .format(jobID, e))

                    # end of parsing through the xml tree

            list_of_states.append(job_status)

        # end of if jobs belongs to the selected jobs
    # end of loop over all jobs

    return list_of_states
def getListOfJobStates(select_jobIDs, username):


	# get list of all jobs
	if username == None:
		command = "qstat"
	else:
		command = "qstat -u {0}".format(username)

	(returncode, stdout, stderr) = batchelor.runCommand(command)

	if returncode != 0:
		raise batchelor.BatchelorException("qstat failed (stderr: '" + stderr + "')")

	if stdout == "":
		return []

	jobList = stdout.split('\n')[2:]

	try:
		jobIDs = [ int(job.split()[0]) for job in jobList ]
		jobStates = [ job.split()[4] for job in jobList ];
	except ValueError:
		raise batchelor.BatchelorException("parsing of qstat output to get job id failed.")

	list_of_states = [];

	for i, jobID in enumerate(jobIDs):
		if select_jobIDs == None or jobID in select_jobIDs:
			job_status = JobStatus(jobID);
			job_status.setStatus( JobStatus.kUnknown, name = jobStates[i] );

			if jobStates[i] == 'qw' or jobStates[i] == 'hqw':
				job_status.setStatus( JobStatus.kWaiting );

			elif jobStates[i] == 't':
				job_status.setStatus( JobStatus.kTransmitting )

			elif jobStates[i] == 'd' or jobStates[i] == 'dr' or jobStates[i] == 'dt':
				job_status.setStatus( JobStatus.kDeletion)

			elif jobStates[i] == 'Eq':
				job_status.setStatus( JobStatus.kError );

			elif jobStates[i] == 'r' or jobStates[i] == 'hr':

				# get detailed job information
				command = "qstat -xml -j {0}".format(jobID);
				(returncode, stdout, stderr) = batchelor.runCommand(command)
				if returncode != 0:
					raise batchelor.BatchelorException("qstat failed (stderr: '" + stderr + "')")
				elif 'unknown_jobs' in stdout:
					continue; # the job has been ended between the qstat command and now
				else:
					try:
						root = ElementTree.fromstring( stdout );
						for child in root[0]:
							for task in child.findall('JB_ja_tasks'):
								for sublist in task.findall('ulong_sublist'):
									task_number = sublist.findall('JAT_task_number')
									if task_number:
										task_number = int(task_number[0].text)
										job_status.setStatus( JobStatus.kRunning );
										for usage_list in sublist.findall('JAT_scaled_usage_list'):
											for scaled in usage_list.findall('scaled'):
												name = scaled.findall('UA_name')[0].text
												value = scaled.findall('UA_value')[0].text
												if name == 'cpu':
													job_status.setCpuTime(float(value) / 3600.0, task_number);
												elif name == 'vmem':
													job_status.setMemoryUsage(float(value) / (1024.0)**3, task_number);
					except xml.etree.ElementTree.ParseError as e:
						raise batchelor.BatchelorException("xml-parser could not parse output of qstat -xml -j {0}: {1}".format(jobID, e))

					# end of parsing through the xml tree



			list_of_states.append( job_status );


		# end of if jobs belongs to the selected jobs
	# end of loop over all jobs

	return list_of_states;
Esempio n. 5
0
def getListOfJobStates(jobName, username=None, detailed=True):
    command = "squeue --clusters=serial -u $(whoami) -l -h"
    (returncode, stdout, stderr) = batchelor.runCommand(command)
    if returncode != 0:
        raise batchelor.BatchelorException("squeue failed (stderr: '" +
                                           stderr + "')")
    jobList = []
    jobStates = []
    for line in stdout.split('\n'):
        if line.startswith("CLUSTER: serial"):
            continue
        line = line.rstrip('\n')
        lineSplit = line.split()
        try:
            currentJobId = int(lineSplit[0])
            currentJobStatus = JobStatus(currentJobId)

            # name
            name = lineSplit[2]
            if name == jobName or jobName == None:
                jobList.append(currentJobId)
                jobStates.append(currentJobStatus)

            # status
            status = lineSplit[4]
            currentJobStatus.setStatus(JobStatus.kUnknown, name=status)
            if status == 'RUNNING':
                currentJobStatus.setStatus(JobStatus.kRunning)
            elif status == 'PENDING' or status == 'SUSPENDED' or status == 'COMPLETING' or status == 'COMPLETED' or status == 'COMPLETI':
                currentJobStatus.setStatus(JobStatus.kWaiting)
            elif status == 'CANCELLED' or status == 'FAILED' or status == 'TIMEOUT' or status == 'NODE_FAIL':
                currentJobStatus.setStatus(JobStatus.kError)
            else:
                print "Unknown job status", status

            # time
            time_str = lineSplit[5]
            try:
                hours = 0.0
                if '-' in time_str:
                    time_str = time_str.split('-')
                    hours += float(time_str[0]) * 24
                    time_str = time_str[1].split(':')
                else:
                    time_str = time_str.split(':')
                seconds = float(time_str[-1])
                minutes = float(time_str[-2])
                if (len(time_str) > 2):
                    hours += float(time_str[-3])
                total_time = hours + minutes / 60.0 + seconds / 3600.0
                currentJobStatus.setCpuTime(total_time, 0)
            except ValueError:
                raise batchelor.BatchelorException(
                    "parsing of squeue output to get time information failed. ({0})"
                    .format(lineSplit[5]))
        except ValueError:
            raise batchelor.BatchelorException(
                "parsing of squeue output to get job id failed.")

    return jobStates
def getListOfJobStates(jobName, username=None, detailed=True):
    if detailed:
        command = "llq -u `whoami` -m -x"
    else:
        command = "llq -u `whoami` -m"
    (returncode, stdout, stderr) = batchelor.runCommand(command)
    if returncode != 0:
        raise batchelor.BatchelorException("llq failed (stderr: '" + stderr +
                                           "')")
    jobList = []
    jobStates = []
    currentJobId = -1
    currentJobStatus = None
    for line in stdout.split('\n'):
        line = line.rstrip('\n')
        if line.startswith("===== Job Step mgmt."):
            try:
                currentJobId = int(line[line.find(".") + 1:line.rfind(".")])
                currentJobStatus = JobStatus(currentJobId)
            except ValueError:
                raise batchelor.BatchelorException(
                    "parsing of llq output to get job id failed.")
        line = ' '.join(line.split())

        if line.startswith("Job Name: "):
            if currentJobId < 0:
                raise batchelor.BatchelorException(
                    "parsing of llq output failed, got job name before job id."
                )
            name = line[10:]
            if name == jobName or jobName == None:
                jobList.append(currentJobId)
                jobStates.append(currentJobStatus)
        elif line.startswith("Step Virtual Memory: "):
            if currentJobId < 0:
                raise batchelor.BatchelorException(
                    "parsing of llq output failed, got job name before job id."
                )
            try:
                parsed = line.lstrip().lstrip('Step Virtual Memory:').split()
                currentJobStatus.setMemoryUsage(
                    float(parsed[0]) * _kMemoryUnits[parsed[1]], 0)
            except ValueError:
                raise batchelor.BatchelorException(
                    "parsing of llq output to get job id failed.")
        elif line.startswith("Status: "):
            if currentJobId < 0:
                raise batchelor.BatchelorException(
                    "parsing of llq output failed, got job name before job id."
                )
            else:
                status = line.lstrip().lstrip("Status: ")
                currentJobStatus.setStatus(JobStatus.kUnknown, name=status)
                if status == 'Running':
                    currentJobStatus.setStatus(JobStatus.kRunning)
                elif status == 'I' or status == 'Idle' or status == 'Pending':
                    currentJobStatus.setStatus(JobStatus.kWaiting)
                elif status == 'Submission Error' or status == 'Terminated' or status == 'Removed' or status == 'Remove Pending':
                    currentJobStatus.setStatus(JobStatus.kError)

        elif line.startswith("Step User Time: "):
            if currentJobId < 0:
                raise batchelor.BatchelorException(
                    "parsing of llq output failed, got job name before job id."
                )
            time_str = line.lstrip().lstrip("Step User Time:").split(':')
            try:
                hours = float(time_str[0])
                minuts = float(time_str[1])
                seconds = float(time_str[2])
                total_time = hours + minuts / 60.0 + seconds / 3600.0
                currentJobStatus.setCpuTime(total_time, 0)
            except ValueError:
                raise batchelor.BatchelorException(
                    "parsing of llq output to get job id failed.")

    return jobStates
def getListOfJobStates(jobName, username = None, detailed = True):
	command = "squeue --clusters=serial -u $(whoami) -l -h"
	(returncode, stdout, stderr) = batchelor.runCommand(command)
	if returncode != 0:
		raise batchelor.BatchelorException("squeue failed (stderr: '" + stderr + "')")
	jobList = []
	jobStates = []
	for line in stdout.split('\n'):
		if line.startswith("CLUSTER: serial"):
			continue;
		line = line.rstrip('\n')
		lineSplit = line.split()
		try:
			currentJobId = int(lineSplit[0])
			currentJobStatus = JobStatus(currentJobId)

			# name
			name = lineSplit[2]
			if name == jobName or jobName == None:
				jobList.append(currentJobId)
				jobStates.append(currentJobStatus)

			# status
			status = lineSplit[4]
			currentJobStatus.setStatus(JobStatus.kUnknown, name = status)
			if status=='RUNNING':
				currentJobStatus.setStatus(JobStatus.kRunning)
			elif status=='PENDING' or status=='SUSPENDED' or status=='COMPLETING' or status=='COMPLETED' or status=='COMPLETI':
				currentJobStatus.setStatus(JobStatus.kWaiting)
			elif status=='CANCELLED' or status=='FAILED' or status=='TIMEOUT' or status=='NODE_FAIL':
				currentJobStatus.setStatus(JobStatus.kError)
			else:
				print "Unknown job status", status

			# time
			time_str = lineSplit[5]
			try:
				hours = 0.0
				if '-' in time_str:
					time_str = time_str.split('-')
					hours += float(time_str[0])*24
					time_str = time_str[1].split(':')
				else:
					time_str = time_str.split(':')
				seconds = float(time_str[-1])
				minutes = float(time_str[-2])
				if(len(time_str) > 2):
					hours += float(time_str[-3])
				total_time = hours + minutes / 60.0 + seconds / 3600.0
				currentJobStatus.setCpuTime(total_time, 0)
			except ValueError:
				raise batchelor.BatchelorException("parsing of squeue output to get time information failed. ({0})".format(lineSplit[5]))
		except ValueError:
			raise batchelor.BatchelorException("parsing of squeue output to get job id failed.")

	return jobStates