Esempio n. 1
0
def submitJob(config, command, outputFile, jobName, arrayStart = None, arrayEnd = None, arrayStep = None):
	(fileDescriptor, fileName) = tempfile.mkstemp()
	os.close(fileDescriptor)
	batchelor.runCommand("cp " + batchelor._getRealPath(config.get(submoduleIdentifier(), "header_file")) + " " + fileName)
	with open(fileName, 'a') as scriptFile:
		scriptFile.write(command)
	cmnd = "qsub "
	cmnd += "-j y "
	cmnd += "" if jobName is None else ("-N " + jobName + " ")
	if arrayStart is not None:
		cmnd += "-t " + str(arrayStart) + "-" + str(arrayEnd) + ":" + str(arrayStep) + " "
	cmnd += "-o " + outputFile + " "
	cmnd += "-P " + config.get(submoduleIdentifier(), "project") + " "
	cmnd += "-q " + config.get(submoduleIdentifier(), "queue") + " "
	cmnd += "-l h_vmem=" + config.get(submoduleIdentifier(), "memory") + " "
	cmnd += _getExcludedHostsString(config)
	cmnd += "< " + fileName
	(returncode, stdout, stderr) = batchelor.runCommand(cmnd)
	if returncode != 0:
		raise batchelor.BatchelorException("qsub failed (stderr: '" + stderr + "')")
	# example output: "Your job 1601905 ("J2415c980b8") has been submitted"
	if arrayStart is not None:
		jobId = stdout.lstrip("Your job-array ")
		jobId = jobId[:jobId.find('.')]
	else:
		jobId = stdout.lstrip("Your job ")
		jobId = jobId[:jobId.find(' ')]
	try:
		jobId = int(jobId)
	except ValueError:
		raise batchelor.BatchelorException('parsing of qsub output to get job id failed.')
	batchelor.runCommand("rm -f " + fileName)
	return jobId
Esempio n. 2
0
def submitJob(config, command, outputFile, jobName, wd=None):

    # check if only a certain amount of active jobs is allowd
    if config.has_option(submoduleIdentifier(), "max_active_jobs"):
        max_active_jobs = int(
            config.get(submoduleIdentifier(), "max_active_jobs"))
        i = 0
        waitTime = 90
        while len(getListOfActiveJobs(None)) >= max_active_jobs:
            if i == 0:
                sys.stdout.write("Waiting for free slots")
                sys.stdout.flush()
            time.sleep(waitTime)
            # wait 1.5  min
            i += 1
        if i > 0:
            sys.stdout.write("\r")

    if wd == None:
        wd = os.getcwd()
    (fileDescriptor, fileName) = tempfile.mkstemp()
    os.close(fileDescriptor)
    headerFileName = batchelor._getRealPath(
        config.get(submoduleIdentifier(), "header_file"))
    with open(fileName, 'w') as tempFile:
        tempFile.write("#!/bin/bash\n\n")
        tempFile.write("#SBATCH -D " + wd + "\n")
        tempFile.write("#SBATCH -o " + outputFile + "\n")
        tempFile.write("#SBATCH --time=" +
                       config.get(submoduleIdentifier(), "wall_clock_limit") +
                       "\n")
        tempFile.write("#SBATCH --mem=" +
                       config.get(submoduleIdentifier(), "memory") + "\n")
        if jobName is not None:
            tempFile.write("#SBATCH -J " + jobName + "\n")
        tempFile.write("#SBATCH --get-user-env \n")
        tempFile.write("#SBATCH --export=NONE \n")
        tempFile.write("#SBATCH --clusters=serial \n\n\n")
        with open(headerFileName, 'r') as headerFile:
            for line in headerFile:
                if line.startswith("#!"):
                    continue
                tempFile.write(line)
        tempFile.write("\n\n")
        tempFile.write(command)
    cmnd = "sbatch " + fileName
    (returncode, stdout, stderr) = batchelor.runCommand(cmnd)
    batchelor.runCommand("rm -f " + fileName)
    if returncode != 0:
        raise batchelor.BatchelorException("sbatch failed (stderr: '" +
                                           stderr + "')")
    jobId = stdout.split()[3]
    try:
        jobId = int(jobId)
    except ValueError:
        raise batchelor.BatchelorException(
            'parsing output of sbatch to get job id failed.')
    return jobId
def submitJob(config, command, outputFile, jobName, wd = None):
	if wd:
		raise batchelor.BatchelorException("Choosing the working directory is not jet implemented for {0}".format(submoduleIdentifier()))

	(fileDescriptor, fileName) = tempfile.mkstemp()
	os.close(fileDescriptor)
	headerFileName = batchelor._getRealPath(config.get(submoduleIdentifier(), "header_file"))
	with open(fileName, 'w') as tempFile:
		tempFile.write("#!/bin/bash\n\n")
		tempFile.write("#@ group = " + config.get(submoduleIdentifier(), "group") + "\n")
		tempFile.write("#@ output = " + outputFile + "\n")
		tempFile.write("#@ error = " + outputFile + "\n")
		tempFile.write("#@ notification = " + config.get(submoduleIdentifier(), "notification") + "\n")
		tempFile.write("#@ notify_user = "******"notify_user") + "\n")
		tempFile.write("#@ node_usage = " + config.get(submoduleIdentifier(), "node_usage") + "\n")
		tempFile.write("#@ wall_clock_limit = " + config.get(submoduleIdentifier(), "wall_clock_limit") + "\n")
		tempFile.write("#@ resources = " + config.get(submoduleIdentifier(), "resources") + "\n")
		tempFile.write("#@ job_type = " + config.get(submoduleIdentifier(), "job_type") + "\n")
		tempFile.write("#@ class = " + config.get(submoduleIdentifier(), "job_type") + "\n")
		if jobName is not None:
			tempFile.write("#@ job_name = " + jobName + "\n")
		tempFile.write("#@ queue\n\n\n")
		with open(headerFileName, 'r') as headerFile:
			for line in headerFile:
				if line.startswith("#!"):
					continue
				tempFile.write(line)
		tempFile.write("\n\n")
		tempFile.write("exec 2>&1\n")
		tempFile.write("\n")
		tempFile.write(command)
	cmnd = "llsubmit - < " + fileName
	(returncode, stdout, stderr) = batchelor.runCommand(cmnd)
	if returncode != 0:
		batchelor.runCommand("rm -f " + fileName)
		raise batchelor.BatchelorException("llsubmit failed (stderr: '" + stderr + "')")
	# example output stdout:
	# llsubmit: The job "mgmt.12309" has been submitted.
	#
	# example output stderr:
	#
	# llsubmit: Stdin job command file written to "/tmp/loadlx_stdin.27558.CdoVxX".
	#
	# INFO: Project: pr83mo
	# INFO: Project's Expiration Date:    2015-01-31
	# INFO: Budget:                     Total [cpuh]        Used [cpuh]      Credit [cpuh]
	# INFO:                                  1350000      1011028 (75%)       338972 (25%)
	#
	# llsubmit: Processed command file through Submit Filter: "/lrz/loadl/filter/submit_filter_c2pap.pl".
	jobId = stdout.split("\n")[0]
	jobId = jobId[jobId.find('"mgmt.')+6:jobId.rfind('"')]
	try:
		jobId = int(jobId)
	except ValueError:
		batchelor.runCommand("rm -f " + fileName)
		raise batchelor.BatchelorException('parsing of qsub output to get job id failed.')
	batchelor.runCommand("rm -f " + fileName)
	return jobId
Esempio n. 4
0
def submitJob(config, command, outputFile, jobName, wd = None, arrayStart = None, arrayEnd = None, arrayStep = None, priority=None, ompNumThreads=None):

	# some checks of the job-settings
	if wd and os.path.realpath(wd).count(os.path.realpath(os.path.expanduser('~'))):
		raise batchelor.BatchelorException("The given working-directory is in your home-folder which is no allowed at E18: '{0}'".format(wd))

	if os.path.realpath(outputFile).count(os.path.realpath(os.path.expanduser('~'))):
		raise batchelor.BatchelorException("The given output-file is in your home-folder which is no allowed at E18: '{0}'".format(outputFile))

	if priority:
		priority = max(int(-1024 + 2048 * (priority+1.0)/2.0), -1023)

	(fileDescriptor, fileName) = tempfile.mkstemp()
	os.close(fileDescriptor)
	batchelor.runCommand("cp " + batchelor._getRealPath(config.get(submoduleIdentifier(), "header_file")) + " " + fileName)
	with open(fileName, 'a') as scriptFile:
		if ompNumThreads is not None:
			scriptFile.write("export OMP_NUM_THREADS={0}\n".format(ompNumThreads))
		scriptFile.write(command)
	cmnd = "qsub "
	cmnd += "-j y "
	cmnd += "-b no "
	cmnd += "-m n "
	cmnd += "" if jobName is None else ("-N " + jobName + " ")
	if arrayStart is not None:
		cmnd += "-t " + str(arrayStart) + "-" + str(arrayEnd) + ":" + str(arrayStep) + " "
	cmnd += "-o '" + outputFile + "' "
	cmnd += "-wd '" + ("/tmp/" if not wd else wd) + "' "
	if config.has_option(submoduleIdentifier(), "shortqueue") and config.get(submoduleIdentifier(), "shortqueue") in [1, "1", "TRUE", "true", "True"]:
		cmnd += "-l short=1 "
	elif config.has_option(submoduleIdentifier(), "longqueue") and config.get(submoduleIdentifier(), "longqueue") in [1, "1", "TRUE", "true", "True"]:
		cmnd += "-l long=1 "
	else:
		cmnd += "-l medium=1 "
	cmnd += "-l h_pmem=" + config.get(submoduleIdentifier(), "memory") + " "
	cmnd += "-l arch=" + config.get(submoduleIdentifier(), "arch") + " "
	cmnd += _getExcludedHostsString(config)
	cmnd += "-p {0} ".format(priority) if priority else ""
	cmnd += "-pe mt {0} ".format(ompNumThreads) if ompNumThreads is not None else ""
	cmnd += "< " + fileName
	(returncode, stdout, stderr) = batchelor.runCommand(cmnd)
	if returncode != 0:
		raise batchelor.BatchelorException("qsub failed (stderr: '" + stderr + "')")
	# example output: "Your job 1601905 ("J2415c980b8") has been submitted"
	if arrayStart is not None:
		jobId = stdout.lstrip("Your job-array ")
		jobId = jobId[:jobId.find('.')]
	else:
		jobId = stdout.lstrip("Your job ")
		jobId = jobId[:jobId.find(' ')]
	try:
		jobId = int(jobId)
	except ValueError:
		raise batchelor.BatchelorException('parsing of qsub output to get job id failed.')
	batchelor.runCommand("rm -f " + fileName)
	return jobId
Esempio n. 5
0
def submitJob(config,
              command,
              outputFile,
              jobName,
              wd=None,
              arrayStart=None,
              arrayEnd=None,
              arrayStep=None):

    (fileDescriptor, fileName) = tempfile.mkstemp()
    os.close(fileDescriptor)
    batchelor.runCommand("cp " + batchelor._getRealPath(
        config.get(submoduleIdentifier(), "header_file")) + " " + fileName)
    with open(fileName, 'a') as scriptFile:
        scriptFile.write(command)
    if arrayStart is not None:
        if (jobName is None) or (len(jobName) is 0):
            jobName = ''.join(random.sample(string.lowercase, 7))
        jobName = jobName + "[" + str(arrayStart) + "-" + str(
            arrayEnd) + ":" + str(arrayStep) + "]"
    cmnd = "bsub "
    cmnd += "" if jobName is None else ("-J " + jobName + " ")
    cmnd += "-o " + outputFile + " "
    cmnd += "-q " + config.get(submoduleIdentifier(), "queue") + " "
    cmnd += "-R '"
    cmnd += "-cwd '{0}'".format(wd) if wd else ""
    cmnd += " select[type=" + config.get(submoduleIdentifier(), "type") + "]"
    cmnd += " rusage[pool=" + config.get(submoduleIdentifier(), "pool") + "]"
    try:
        cmnd += " rusage[mem=" + config.get(submoduleIdentifier(),
                                            "memory") + "]"
        cmnd += " select[maxmem>" + config.get(submoduleIdentifier(),
                                               "memory") + "]"
    except ConfigParser.NoOptionError:
        pass
    cmnd += _getExcludedHostsString(config)
    cmnd += "' "
    cmnd += "< " + fileName
    (returncode, stdout, stderr) = batchelor.runCommand(cmnd)
    if returncode != 0:
        raise batchelor.BatchelorException("bsub failed (stderr: '" + stderr +
                                           "')")


# example output: Job <533476534> is submitted to queue <1nd>.
    jobId = stdout.lstrip("Job <")
    jobId = jobId[:jobId.find(">")]
    try:
        jobId = int(jobId)
    except ValueError:
        raise batchelor.BatchelorException(
            'parsing of bsub output to get job id failed.')
    batchelor.runCommand('rm -f ' + fileName)
    return jobId
Esempio n. 6
0
def submitJob(config, command, outputFile, jobName):
	(fileDescriptor, fileName) = tempfile.mkstemp()
	os.close(fileDescriptor)
	headerFileName = batchelor._getRealPath(config.get(submoduleIdentifier(), "header_file"))
	with open(fileName, 'w') as tempFile:
		tempFile.write("#!/bin/bash\n\n")
		tempFile.write("#@ group = " + config.get(submoduleIdentifier(), "group") + "\n")
		tempFile.write("#@ output = " + outputFile + "\n")
		tempFile.write("#@ notification = " + config.get(submoduleIdentifier(), "notification") + "\n")
		tempFile.write("#@ notify_user = "******"notify_user") + "\n")
		tempFile.write("#@ node_usage = " + config.get(submoduleIdentifier(), "node_usage") + "\n")
		tempFile.write("#@ wall_clock_limit = " + config.get(submoduleIdentifier(), "wall_clock_limit") + "\n")
		tempFile.write("#@ resources = " + config.get(submoduleIdentifier(), "resources") + "\n")
		tempFile.write("#@ job_type = " + config.get(submoduleIdentifier(), "job_type") + "\n")
		tempFile.write("#@ class = " + config.get(submoduleIdentifier(), "job_type") + "\n")
		if jobName is not None:
			tempFile.write("#@ job_name = " + jobName + "\n")
		tempFile.write("#@ queue\n\n\n")
		with open(headerFileName, 'r') as headerFile:
			for line in headerFile:
				if line.startswith("#!"):
					continue
				tempFile.write(line)
		tempFile.write("\n\n")
		tempFile.write(command)
	cmnd = "llsubmit - < " + fileName
	(returncode, stdout, stderr) = batchelor.runCommand(cmnd)
	if returncode != 0:
		batchelor.runCommand("rm -f " + fileName)
		raise batchelor.BatchelorException("llsubmit failed (stderr: '" + stderr + "')")
	# example output stdout:
	# llsubmit: The job "mgmt.12309" has been submitted.
	#
	# example output stderr:
	#
	# llsubmit: Stdin job command file written to "/tmp/loadlx_stdin.27558.CdoVxX".
	#
	# INFO: Project: pr83mo
	# INFO: Project's Expiration Date:    2015-01-31
	# INFO: Budget:                     Total [cpuh]        Used [cpuh]      Credit [cpuh]
	# INFO:                                  1350000      1011028 (75%)       338972 (25%)
	#
	# llsubmit: Processed command file through Submit Filter: "/lrz/loadl/filter/submit_filter_c2pap.pl".
	jobId = stdout.split("\n")[0]
	jobId = jobId[jobId.find('"mgmt.')+6:jobId.rfind('"')]
	try:
		jobId = int(jobId)
	except ValueError:
		batchelor.runCommand("rm -f " + fileName)
		raise batchelor.BatchelorException('parsing of qsub output to get job id failed.')
	batchelor.runCommand("rm -f " + fileName)
	return jobId
def submitJob(config, command, outputFile, jobName, wd = None):
	
	
	# check if only a certain amount of active jobs is allowd
	if config.has_option(submoduleIdentifier(), "max_active_jobs"):
		max_active_jobs = int(config.get(submoduleIdentifier(), "max_active_jobs"))
		i=0;
		waitTime = 90
		while len(getListOfActiveJobs(None)) >= max_active_jobs:
			if i == 0:
				sys.stdout.write("Waiting for free slots")
				sys.stdout.flush()
			time.sleep(waitTime); # wait 1.5  min
			i+=1
		if i > 0:
			sys.stdout.write("\r")

	if wd == None:
		wd = os.getcwd()
	(fileDescriptor, fileName) = tempfile.mkstemp()
	os.close(fileDescriptor)
	headerFileName = batchelor._getRealPath(config.get(submoduleIdentifier(), "header_file"))
	with open(fileName, 'w') as tempFile:
		tempFile.write("#!/bin/bash\n\n")
		tempFile.write("#SBATCH -D " + wd + "\n")
		tempFile.write("#SBATCH -o " + outputFile + "\n")
		tempFile.write("#SBATCH --time=" + config.get(submoduleIdentifier(), "wall_clock_limit") + "\n")
		tempFile.write("#SBATCH --mem=" + config.get(submoduleIdentifier(), "memory") + "\n")
		if jobName is not None:
			tempFile.write("#SBATCH -J " + jobName + "\n")
		tempFile.write("#SBATCH --get-user-env \n")
		tempFile.write("#SBATCH --export=NONE \n")
		tempFile.write("#SBATCH --clusters=serial \n\n\n")
		with open(headerFileName, 'r') as headerFile:
			for line in headerFile:
				if line.startswith("#!"):
					continue
				tempFile.write(line)
		tempFile.write("\n\n")
		tempFile.write(command)
	cmnd = "sbatch " + fileName
	(returncode, stdout, stderr) = batchelor.runCommand(cmnd)
	batchelor.runCommand("rm -f " + fileName)
	if returncode != 0:
		raise batchelor.BatchelorException("sbatch failed (stderr: '" + stderr + "')")
	jobId = stdout.split()[3]
	try:
		jobId = int(jobId)
	except ValueError:
		raise batchelor.BatchelorException('parsing output of sbatch to get job id failed.')
	return jobId
def submitJob(config, command, outputFile, jobName, wd = None, arrayStart = None, arrayEnd = None, arrayStep = None):
	if arrayStart is not None or arrayEnd is not None or arrayStep is not None:
		raise BatchelorException("Array jobs are not (yet) implementet for CERNs HTCondor system")

	filesDir = os.path.join(os.getcwd(), '.log')
	if " " in filesDir:
		raise BatchelorException("Cannot handle submit directories with whitespaces")

	if not os.path.exists(filesDir):
		os.makedirs(filesDir)
	(fileDescriptor, submitFileName) = tempfile.mkstemp(dir=filesDir, prefix='submitFiles_', suffix='.submit')
	os.close(fileDescriptor)
	atexit.register(lambda: os.remove( submitFileName ))
	(fileDescriptor, scriptFileName) = tempfile.mkstemp(dir=filesDir, prefix='scriptFiles_', suffix='.sh')
	os.close(fileDescriptor)
	atexit.register(lambda: os.remove( scriptFileName ))
	os.chmod(scriptFileName, 0755)

	batchelor.runCommand("cp " + batchelor._getRealPath(config.get(submoduleIdentifier(), "header_file")) + " " + scriptFileName)
	with open(scriptFileName, 'a') as scriptFile:
		scriptFile.write(command)
	with open(submitFileName, 'w') as submitFile:
		outputFile = os.path.abspath(outputFile)
		submitFile.write("executable = {0}\n".format(scriptFileName))
		if outputFile:
			submitFile.write("output = {0}\n".format(outputFile))
			submitFile.write("log = {0}.condor\n".format(outputFile))
			submitFile.write("error = {0}.err\n".format(outputFile))

		submitFile.write("should_transfer_files = NO\n") # Disable file transport
		submitFile.write("request_cpus  = 1\n")
		submitFile.write("request_memory = {0}\n".format(config.get(submoduleIdentifier(), "memory")))
		submitFile.write("request_disk = {0}\n".format(config.get(submoduleIdentifier(), "disk")))
		submitFile.write("+JobFlavour = \"{0}\"\n".format(config.get(submoduleIdentifier(), "flavour")))
		submitFile.write("queue 1\n")
	cmnd = "condor_submit '{0}'".format(submitFileName)
	if jobName:
		cmnd += " -batch-name {0} ".format(jobName)
	kwargs = {}
	if wd:
		kwargs['wd'] = wd
	(returncode, stdout, stderr) = batchelor.runCommand(cmnd, **kwargs)
	if returncode != 0:
		raise batchelor.BatchelorException("condor_submit failed (stderr: '" + stderr + "')")
	jobId = stdout.split('\n')[1].split()[5].rstrip(".")
	try:
		jobId = int(jobId)
	except ValueError:
		raise batchelor.BatchelorException('parsing of condor_submit output to get job id failed.')
	return jobId
def submitJob(config,
              command,
              outputFile,
              jobName,
              wd=None,
              arrayStart=None,
              arrayEnd=None,
              arrayStep=None):
    if wd:
        raise batchelor.BatchelorException(
            "Choosing the working directory is not jet implemented for {0}".
            format(submoduleIdentifier()))

    (fileDescriptor, fileName) = tempfile.mkstemp()
    os.close(fileDescriptor)
    batchelor.runCommand("cp " + batchelor._getRealPath(
        config.get(submoduleIdentifier(), "header_file")) + " " + fileName)
    with open(fileName, 'a') as scriptFile:
        scriptFile.write(command)
    cmnd = "qsub "
    cmnd += "-j y "
    cmnd += "" if jobName is None else ("-N " + jobName + " ")
    if arrayStart is not None:
        cmnd += "-t " + str(arrayStart) + "-" + str(arrayEnd) + ":" + str(
            arrayStep) + " "
    cmnd += "-o " + outputFile + " "
    cmnd += "-P " + config.get(submoduleIdentifier(), "project") + " "
    cmnd += "-q " + config.get(submoduleIdentifier(), "queue") + " "
    cmnd += "-l h_vmem=" + config.get(submoduleIdentifier(), "memory") + " "
    cmnd += _getExcludedHostsString(config)
    cmnd += "< " + fileName
    (returncode, stdout, stderr) = batchelor.runCommand(cmnd)
    if returncode != 0:
        raise batchelor.BatchelorException("qsub failed (stderr: '" + stderr +
                                           "')")
    # example output: "Your job 1601905 ("J2415c980b8") has been submitted"
    if arrayStart is not None:
        jobId = stdout.lstrip("Your job-array ")
        jobId = jobId[:jobId.find('.')]
    else:
        jobId = stdout.lstrip("Your job ")
        jobId = jobId[:jobId.find(' ')]
    try:
        jobId = int(jobId)
    except ValueError:
        raise batchelor.BatchelorException(
            'parsing of qsub output to get job id failed.')
    batchelor.runCommand("rm -f " + fileName)
    return jobId
def submitArrayJobs(config, commands, outputFile, jobName, wd=None):
    nTasksPerJob = int(config.get(submoduleIdentifier(), "n_tasks_per_job"))
    i = 0
    jids = []
    outputFileOrig = outputFile
    headerFileName = batchelor._getRealPath(
        config.get(submoduleIdentifier(), "header_file"))
    with open(headerFileName, 'r') as headerFile:
        header = headerFile.read().replace(r'"', r'\"')
    while i < len(commands):
        j = min(len(commands), i + nTasksPerJob)
        nTasks = j - i
        srunConf = "\n".join(
            ["{i} {cmd}".format(i=ii, cmd=commands[ii]) for ii in range(i, j)])
        srunConf = srunConf.replace(r'"', r'\"')
        tmpDir = os.path.join(os.environ['SCRATCH'], 'tmp')
        if not os.path.isdir(tmpDir):
            os.makedirs(tmpDir)
        fullCmd = 'tmpDir=$(mktemp -d -p {TMPDIR})\ntrap "rm -rf \'${{tmpDir}}\'" EXIT\n'.format(
            TMPDIR=tmpDir)
        fullCmd += 'echo "{srun}" > ${{tmpDir}}/srun.conf\n'.format(
            srun='\n'.join([
                "{i} bash ${{tmpDir}}/{i}.sh".format(i=k)
                for k in range(nTasks)
            ]))
        for k, ii in enumerate(range(i, j)):
            fullCmd += 'echo "#!/bin/bash\n{header}\n{cmd}" > ${{tmpDir}}/{i}.sh\n'.format(
                header=header, cmd=commands[ii].replace(r'"', r'\"'), i=k)
        fullCmd += 'srun -n {nTasks} --multi-prog ${{tmpDir}}/srun.conf'.format(
            nTasks=nTasks)
        if outputFile != "/dev/null" and len(commands) > nTasksPerJob:
            outputFile = outputFileOrig + ".{0}_{1}".format(i, j)
        jid = _submitJob(config,
                         fullCmd,
                         outputFile,
                         jobName,
                         wd,
                         nTasks=nTasks)
        jids += [jid] * nTasks
        i = j
    return jids
Esempio n. 11
0
def submitJob(config, command, outputFile, jobName, wd = None, arrayStart = None, arrayEnd = None, arrayStep = None):

	(fileDescriptor, fileName) = tempfile.mkstemp()
	os.close(fileDescriptor)
	batchelor.runCommand("cp " + batchelor._getRealPath(config.get(submoduleIdentifier(), "header_file")) + " " + fileName)
	with open(fileName, 'a') as scriptFile:
		scriptFile.write(command)
	if arrayStart is not None:
		if (jobName is None) or (len(jobName) is 0):
			jobName = ''.join(random.sample(string.lowercase,7))
		jobName = jobName + "[" + str(arrayStart) + "-" +  str(arrayEnd) + ":" + str(arrayStep) + "]"
	cmnd = "bsub "
	cmnd += "" if jobName is None else ("-J " + jobName + " ")
	cmnd += "-o " + outputFile + " "
	cmnd += "-q " + config.get(submoduleIdentifier(), "queue") + " "
	cmnd += "-R '"
	cmnd += "-cwd '{0}'".format(wd) if wd else ""
	cmnd += " select[type=" + config.get(submoduleIdentifier(), "type") + "]"
	cmnd += " rusage[pool=" + config.get(submoduleIdentifier(), "pool") + "]"
	try:
		cmnd += " rusage[mem=" + config.get(submoduleIdentifier(), "memory") + "]"
		cmnd += " select[maxmem>" + config.get(submoduleIdentifier(), "memory") + "]"
	except ConfigParser.NoOptionError:
		pass
	cmnd += _getExcludedHostsString(config)
	cmnd += "' "
	cmnd += "< " + fileName
	(returncode, stdout, stderr) = batchelor.runCommand(cmnd)
	if returncode != 0:
		raise batchelor.BatchelorException("bsub failed (stderr: '" + stderr + "')")
# example output: Job <533476534> is submitted to queue <1nd>.
	jobId = stdout.lstrip("Job <")
	jobId = jobId[:jobId.find(">")]
	try:
		jobId = int(jobId)
	except ValueError:
		raise batchelor.BatchelorException('parsing of bsub output to get job id failed.')
	batchelor.runCommand('rm -f ' + fileName)
	return jobId
Esempio n. 12
0
def submitJob(config,
              command,
              outputFile,
              jobName,
              wd=None,
              arrayStart=None,
              arrayEnd=None,
              arrayStep=None,
              priority=None,
              ompNumThreads=None):

    # some checks of the job-settings
    if wd and os.path.realpath(wd).count(
            os.path.realpath(os.path.expanduser('~'))):
        raise batchelor.BatchelorException(
            "The given working-directory is in your home-folder which is no allowed at E18: '{0}'"
            .format(wd))

    if os.path.realpath(outputFile).count(
            os.path.realpath(os.path.expanduser('~'))):
        raise batchelor.BatchelorException(
            "The given output-file is in your home-folder which is no allowed at E18: '{0}'"
            .format(outputFile))

    if priority:
        priority = max(int(-1024 + 2048 * (priority + 1.0) / 2.0), -1023)

    (fileDescriptor, fileName) = tempfile.mkstemp()
    os.close(fileDescriptor)
    batchelor.runCommand("cp " + batchelor._getRealPath(
        config.get(submoduleIdentifier(), "header_file")) + " " + fileName)
    with open(fileName, 'a') as scriptFile:
        if ompNumThreads is not None:
            scriptFile.write(
                "export OMP_NUM_THREADS={0}\n".format(ompNumThreads))
        scriptFile.write(command)
    cmnd = "qsub "
    cmnd += "-j y "
    cmnd += "-b no "
    cmnd += "-m n "
    cmnd += "" if jobName is None else ("-N " + jobName + " ")
    if arrayStart is not None:
        cmnd += "-t " + str(arrayStart) + "-" + str(arrayEnd) + ":" + str(
            arrayStep) + " "
    cmnd += "-o '" + outputFile + "' "
    cmnd += "-wd '" + ("/tmp/" if not wd else wd) + "' "
    if config.has_option(submoduleIdentifier(), "shortqueue") and config.get(
            submoduleIdentifier(),
            "shortqueue") in [1, "1", "TRUE", "true", "True"]:
        cmnd += "-l short=1 "
    elif config.has_option(submoduleIdentifier(), "longqueue") and config.get(
            submoduleIdentifier(),
            "longqueue") in [1, "1", "TRUE", "true", "True"]:
        cmnd += "-l long=1 "
    else:
        cmnd += "-l medium=1 "
    cmnd += "-l h_pmem=" + config.get(submoduleIdentifier(), "memory") + " "
    cmnd += "-l arch=" + config.get(submoduleIdentifier(), "arch") + " "
    cmnd += _getExcludedHostsString(config)
    cmnd += "-p {0} ".format(priority) if priority else ""
    cmnd += "-pe mt {0} ".format(
        ompNumThreads) if ompNumThreads is not None else ""
    cmnd += "< " + fileName
    (returncode, stdout, stderr) = batchelor.runCommand(cmnd)
    if returncode != 0:
        raise batchelor.BatchelorException("qsub failed (stderr: '" + stderr +
                                           "')")
    # example output: "Your job 1601905 ("J2415c980b8") has been submitted"
    if arrayStart is not None:
        jobId = stdout.lstrip("Your job-array ")
        jobId = jobId[:jobId.find('.')]
    else:
        jobId = stdout.lstrip("Your job ")
        jobId = jobId[:jobId.find(' ')]
    try:
        jobId = int(jobId)
    except ValueError:
        raise batchelor.BatchelorException(
            'parsing of qsub output to get job id failed.')
    batchelor.runCommand("rm -f " + fileName)
    return jobId
Esempio n. 13
0
def submitJob(config,
              command,
              outputFile,
              jobName,
              wd=None,
              arrayStart=None,
              arrayEnd=None,
              arrayStep=None):
    if arrayStart is not None or arrayEnd is not None or arrayStep is not None:
        raise BatchelorException(
            "Array jobs are not (yet) implementet for CERNs HTCondor system")

    filesDir = os.path.join(os.getcwd(), '.log')
    if " " in filesDir:
        raise BatchelorException(
            "Cannot handle submit directories with whitespaces")

    if not os.path.exists(filesDir):
        os.makedirs(filesDir)
    (fileDescriptor, submitFileName) = tempfile.mkstemp(dir=filesDir,
                                                        prefix='submitFiles_',
                                                        suffix='.submit')
    os.close(fileDescriptor)
    atexit.register(lambda: os.remove(submitFileName))
    (fileDescriptor, scriptFileName) = tempfile.mkstemp(dir=filesDir,
                                                        prefix='scriptFiles_',
                                                        suffix='.sh')
    os.close(fileDescriptor)
    atexit.register(lambda: os.remove(scriptFileName))
    os.chmod(scriptFileName, 0755)

    batchelor.runCommand("cp " + batchelor._getRealPath(
        config.get(submoduleIdentifier(), "header_file")) + " " +
                         scriptFileName)
    with open(scriptFileName, 'a') as scriptFile:
        scriptFile.write(command)
    with open(submitFileName, 'w') as submitFile:
        outputFile = os.path.abspath(outputFile)
        submitFile.write("executable = {0}\n".format(scriptFileName))
        if outputFile:
            submitFile.write("output = {0}\n".format(outputFile))
            submitFile.write("log = {0}.condor\n".format(outputFile))
            submitFile.write("error = {0}.err\n".format(outputFile))

        submitFile.write(
            "should_transfer_files = NO\n")  # Disable file transport
        submitFile.write("request_cpus  = 1\n")
        submitFile.write("request_memory = {0}\n".format(
            config.get(submoduleIdentifier(), "memory")))
        submitFile.write("request_disk = {0}\n".format(
            config.get(submoduleIdentifier(), "disk")))
        submitFile.write("+JobFlavour = \"{0}\"\n".format(
            config.get(submoduleIdentifier(), "flavour")))
        submitFile.write("queue 1\n")
    cmnd = "condor_submit '{0}'".format(submitFileName)
    if jobName:
        cmnd += " -batch-name {0} ".format(jobName)
    kwargs = {}
    if wd:
        kwargs['wd'] = wd
    (returncode, stdout, stderr) = batchelor.runCommand(cmnd, **kwargs)
    if returncode != 0:
        raise batchelor.BatchelorException("condor_submit failed (stderr: '" +
                                           stderr + "')")
    jobId = stdout.split('\n')[1].split()[5].rstrip(".")
    try:
        jobId = int(jobId)
    except ValueError:
        raise batchelor.BatchelorException(
            'parsing of condor_submit output to get job id failed.')
    return jobId
def _submitJob(config, command, outputFile, jobName, wd=None, nTasks=None):

    # check if only a certain amount of active jobs is allowd
    if config.has_option(submoduleIdentifier(), "max_active_jobs"):
        max_active_jobs = int(
            config.get(submoduleIdentifier(), "max_active_jobs"))
        i = 0
        waitTime = 90
        while True:
            try:
                nRunningJobs = len(getListOfActiveJobs(None))
            except batchelor.BatchelorException:
                nRunningJobs = max_active_jobs
            if nRunningJobs < max_active_jobs:
                break
            if i == 0:
                sys.stdout.write("Waiting for free slots")
                sys.stdout.flush()
            time.sleep(waitTime)
            # wait 1.5  min
            i += 1
        if i > 0:
            sys.stdout.write("\r")

    if wd == None:
        wd = os.getcwd()
    (fileDescriptor, fileName) = tempfile.mkstemp()
    os.close(fileDescriptor)
    headerFileName = batchelor._getRealPath(
        config.get(submoduleIdentifier(), "header_file"))
    with open(fileName, 'w') as tempFile:
        tempFile.write("#!/bin/bash\n\n")
        tempFile.write("#SBATCH -D " + wd + "\n")
        tempFile.write("#SBATCH -o " + outputFile + "\n")
        tempFile.write("#SBATCH --time=" +
                       config.get(submoduleIdentifier(), "wall_clock_limit") +
                       "\n")
        if config.get(submoduleIdentifier(), "clusters") != 'mpp3':
            tempFile.write("#SBATCH --mem-per-cpu=" +
                           config.get(submoduleIdentifier(), "memory") + "\n")
        if jobName is not None:
            tempFile.write("#SBATCH -J " + jobName + "\n")
        tempFile.write("#SBATCH --get-user-env \n")
        tempFile.write("#SBATCH --export=NONE \n")
        if nTasks is not None:
            if config.get(submoduleIdentifier(), "clusters") != 'mpp3':
                tempFile.write("#SBATCH --ntasks={0:d} \n".format(nTasks))
            else:
                tempFile.write("#SBATCH --nodes={0:d} \n".format(
                    (nTasks + 63) // 64))
            tempFile.write("#SBATCH --ntasks-per-node={0} \n".format(
                config.get(submoduleIdentifier(), "n_tasks_per_node")))
        tempFile.write("#SBATCH --clusters={0}\n".format(
            config.get(submoduleIdentifier(), "clusters")))
        if config.get(submoduleIdentifier(),
                      "clusters") not in ['cm2_tiny', 'mpp3']:
            tempFile.write("#SBATCH --partition={0}\n\n".format(
                config.get(submoduleIdentifier(), "partition")))
        if config.get(submoduleIdentifier(),
                      "clusters") == 'cm2' or config.get(
                          submoduleIdentifier(), "clusters") == 'c2pap':
            tempFile.write("#SBATCH --qos={0}\n\n".format(
                config.get(submoduleIdentifier(), "partition")))
        tempFile.write("module load slurm_setup \n\n\n")
        with open(headerFileName, 'r') as headerFile:
            for line in headerFile:
                if line.startswith("#!"):
                    continue
                tempFile.write(line)
        tempFile.write("\n\n")
        tempFile.write(command)
    cmnd = "sbatch " + fileName
    (returncode, stdout, stderr) = batchelor.runCommand(cmnd)
    batchelor.runCommand("rm -f " + fileName)
    if returncode != 0:
        raise batchelor.BatchelorException("sbatch failed (stderr: '" +
                                           stderr + "')")
    jobId = stdout.split()[3]
    try:
        jobId = int(jobId)
    except ValueError:
        raise batchelor.BatchelorException(
            'parsing output of sbatch to get job id failed.')
    return jobId