def get_master_input_sandbox(job, extra): sandbox = job.inputsandbox[:] sandbox += extra.master_input_files[:] buffers = extra.master_input_buffers sandbox += [FileBuffer(n, s) for (n, s) in buffers.items()] logger.debug("Master input sandbox: %s", str(sandbox)) return sandbox
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): job = app.getJobObject() version = appsubconfig["version"] outDir = appsubconfig["outDir"] docDir = appsubconfig["docDir"] elementList = docDir.split(os.sep) docList = appsubconfig["docList"] softwareDir = appsubconfig["softwareDir"] lineList = [] inbox = [] outbox = [] headList, headBox = self.head( job = job, version = version, \ softwareDir = softwareDir ) lineList.extend(headList) outbox.extend(headBox) bodyList, bodyBox = self.body( job = job, docDir = docDir, \ docList = docList ) lineList.extend(bodyList) inbox.extend(bodyBox) tailList, tailBox = self.tail(job=job) lineList.extend(tailList) outbox.extend(tailBox) jobScript = "\n".join(lineList) jobWrapper = FileBuffer("PDF.sh", jobScript, executable=1) outbox.extend(job.outputsandbox) return StandardJobConfig\ ( exe = jobWrapper, inputbox = inbox, outputbox = outbox )
def configure(self, masterappconfig): self.args = convertIntToStringArgs(self.args) job = self.getJobObject() if self.cmtsetup == None: raise ApplicationConfigurationError(None, 'No cmt setup script given.') # Need to handle the possibility of multiple output files ! # setup the output file for arg in self.args: if arg == '-o': raise ApplicationConfigurationError( None, 'Option "-o" given in args. You must use the outputfile variable instead, even if you have multiple output files.' ) if self.outputfile == None: raise ApplicationConfigurationError( None, 'No output file given. Fill the outputfile variable.') else: if type(self.outputfile) == type([]): for OutFi in self.outputfile: self.args.append('-o') self.args.append(OutFi) else: self.args.append('-o') self.args.append(self.outputfile) # So get the list of filenames get_dataset_filenames() and create a file containing the list of files and put it in the sandbox if job.inputdata == None: raise ApplicationConfigurationError( None, 'The inputdata variable is not defined.') fileList = job.inputdata.get_dataset_filenames() if len(fileList) < 1: raise ApplicationConfigurationError(None, 'No input data file given.') self.args.extend(fileList) argsStr = ' '.join(self.args) # Create the bash script and put it in input dir. script = '#!/bin/bash\n' script += 'source ' + self.cmtsetup + '\n' script += self.exe + ' ' + argsStr + '\n' from Ganga.GPIDev.Lib.File import FileBuffer if self.exe.find('.exe') > -1: scriptname = self.exe.replace('.exe', '.sh') else: scriptname = self.exe + '.sh' job.getInputWorkspace().writefile(FileBuffer(scriptname, script), executable=1) self._scriptname = job.inputdir + scriptname return (None, None)
def preparejob(self, jobconfig, master_input_sandbox): job = self.getJobObject() # print str(job.backend_output_postprocess) mon = job.getMonitoringService() import Ganga.Core.Sandbox as Sandbox subjob_input_sandbox = job.createPackedInputSandbox(jobconfig.getSandboxFiles() + Sandbox.getGangaModulesAsSandboxFiles(Sandbox.getDefaultModules())) appscriptpath = [jobconfig.getExeString()] + jobconfig.getArgStrings() if self.nice: appscriptpath = ['nice', '-n %d' % self.nice] + appscriptpath if self.nice < 0: logger.warning('increasing process priority is often not allowed, your job may fail due to this') sharedoutputpath = job.getOutputWorkspace().getPath() ## FIXME DON'T just use the blind list here, request the list of files to be in the output from a method. outputpatterns = jobconfig.outputbox environment = dict() if jobconfig.env is None else jobconfig.env import tempfile workdir = tempfile.mkdtemp(dir=config['location']) import inspect script_location = os.path.join(os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))), 'LocalHostExec.py') from Ganga.GPIDev.Lib.File import FileUtils script = FileUtils.loadScript(script_location, '') script = script.replace('###INLINEMODULES###', inspect.getsource(Sandbox.WNSandbox)) from Ganga.GPIDev.Lib.File.OutputFileManager import getWNCodeForOutputSandbox, getWNCodeForOutputPostprocessing, getWNCodeForDownloadingInputFiles, getWNCodeForInputdataListCreation from Ganga.Utility.Config import getConfig jobidRepr = repr(job.getFQID('.')) script = script.replace('###OUTPUTSANDBOXPOSTPROCESSING###', getWNCodeForOutputSandbox(job, ['stdout', 'stderr', '__syslog__'], jobidRepr)) script = script.replace('###OUTPUTUPLOADSPOSTPROCESSING###', getWNCodeForOutputPostprocessing(job, '')) script = script.replace('###DOWNLOADINPUTFILES###', getWNCodeForDownloadingInputFiles(job, '')) script = script.replace('###CREATEINPUTDATALIST###', getWNCodeForInputdataListCreation(job, '')) script = script.replace('###APPLICATION_NAME###', repr(getName(job.application))) script = script.replace('###INPUT_SANDBOX###', repr(subjob_input_sandbox + master_input_sandbox)) script = script.replace('###SHAREDOUTPUTPATH###', repr(sharedoutputpath)) script = script.replace('###APPSCRIPTPATH###', repr(appscriptpath)) script = script.replace('###OUTPUTPATTERNS###', str(outputpatterns)) script = script.replace('###JOBID###', jobidRepr) script = script.replace('###ENVIRONMENT###', repr(environment)) script = script.replace('###WORKDIR###', repr(workdir)) script = script.replace('###INPUT_DIR###', repr(job.getStringInputDir())) self.workdir = workdir script = script.replace('###GANGADIR###', repr(getConfig('System')['GANGA_PYTHONPATH'])) wrkspace = job.getInputWorkspace() scriptPath = wrkspace.writefile(FileBuffer('__jobscript__', script), executable=1) return scriptPath
def downloadWrapper(app): from os.path import join, split from Ganga.GPIDev.Lib.File import FileBuffer import string from Ganga.GPIDev.Lib.File import getSharedPath rootsys = join('.', 'root') rootenv = {'ROOTSYS': rootsys} script = app.script if script == File(): if not app.usepython: script = File(defaultScript()) else: script = File(defaultPyRootScript()) else: script = File(os.path.join(os.path.join(Ganga.GPIDev.Lib.File.getSharedPath(), app.is_prepared.name), os.path.basename(app.script.name))) commandline = '' scriptPath = join('.', script.subdir, split(script.name)[1]) if not app.usepython: # Arguments to the ROOT script needs to be a comma separated list # enclosed in (). Strings should be enclosed in escaped double quotes. arglist = [] for arg in app.args: if isinstance(arg, str): arglist.append('\\\'' + arg + '\\\'') else: arglist.append(arg) rootarg = '\(\"' + string.join([str(s) for s in arglist], ',') + '\"\)' # use root commandline = 'root.exe -b -q ' + scriptPath + rootarg + '' else: # use python pyarg = string.join([str(s) for s in app.args], ' ') commandline = '\'%(PYTHONCMD)s ' + scriptPath + ' ' + pyarg + ' -b \'' logger.debug("Command line: %s: ", commandline) # Write a wrapper script that installs ROOT and runs script script_location = os.path.join(os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))), 'wrapperScriptTemplate.py') from Ganga.GPIDev.Lib.File import FileUtils wrapperscript = FileUtils.loadScript(script_location, '') wrapperscript = wrapperscript.replace('###COMMANDLINE###', commandline) wrapperscript = wrapperscript.replace('###ROOTVERSION###', app.version) wrapperscript = wrapperscript.replace('###SCRIPTPATH###', scriptPath) wrapperscript = wrapperscript.replace('###USEPYTHON###', str(app.usepython)) logger.debug('Script to run on worker node\n' + wrapperscript) scriptName = "rootwrapper_generated_%s.py" % randomString() runScript = FileBuffer(scriptName, wrapperscript, executable=1) inputsandbox = app._getParent().inputsandbox + [script] return runScript, inputsandbox, rootenv
def configure(self, masterappconfig): self.args = convertIntToStringArgs(self.args) job = self.getJobObject() if self.cmtsetup == None: raise ApplicationConfigurationError(None, 'No cmt setup script given.') # setup the output file for arg in self.args: if arg == '-o': raise ApplicationConfigurationError( None, 'Option "-o" given in args. You must use the outputfile variable instead.' ) if self.outputfile == None: raise ApplicationConfigurationError( None, 'No output file given. Fill the outputfile variable.') else: self.args.append('-o') self.args.append(self.outputfile) # So get the list of filenames get_dataset_filenames() and create a file containing the list of files and put it in the sandbox fileList = job.inputdir + 'FileList' if not job.inputdata.set_dataset_into_list(fileList): raise ApplicationConfigurationError( None, 'Problem with the preparation of the list of input files') self.args.append(fileList) argsStr = ' '.join(self.args) # ANT: Create the bash script here and put it in input dir. script = '#!/bin/bash\n' script += 'source ' + self.cmtsetup + '\n' script += self.exe + ' ' + argsStr + '\n' from Ganga.GPIDev.Lib.File import FileBuffer if self.exe.find('.exe') > -1: scriptname = self.exe.replace('.exe', '.sh') else: scriptname = self.exe + '.sh' job.getInputWorkspace().writefile(FileBuffer(scriptname, script), executable=1) self._scriptname = job.inputdir + scriptname return (None, None)
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) run_script = self.__create_run_script(app, appsubconfig, appmasterconfig, jobmasterconfig, inputsandbox, outputsandbox) return StandardJobConfig(FileBuffer('gaudi-script.py', run_script, executable=1), inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def writefile(self, fileobj, executable=None): from Ganga.GPIDev.Lib.File import FileBuffer if not isType(fileobj, FileBuffer): try: name, contents = fileobj except TypeError as err: import traceback traceback.print_stack() logger.debug("TypeError: %s" % str(err)) pass else: fileobj = FileBuffer(name, contents) logger.warning('file "%s": usage of tuples is deprecated, use FileBuffer instead', name) # output file name # Added a subdir to files, (see Ganga/GPIDev/Lib/File/File.py) This allows # to copy files into the a subdirectory of the workspace # FIXME: make a helper method for os.makedirs path_to_build = os.path.join(self.getPath(), fileobj.subdir) if not os.path.isdir(path_to_build): os.makedirs(path_to_build) logger.debug('created %s', self.getPath()) else: logger.debug('already exists: %s', self.getPath()) outname = expandfilename(self.getPath(fileobj.getPathInSandbox())) fileobj.create(outname) if executable: chmod_executable(outname) return outname
def createNewJob(self): """Create any jobs required for this unit""" j = GPI.Job() j._impl.backend = self._getParent().backend.clone() j._impl.application = self._getParent().application.clone() j.inputdata = self.inputdata.clone() trf = self._getParent() task = trf._getParent() # copy across the outputfiles for f in trf.outputfiles: j.outputfiles += [f.clone()] j.inputsandbox = trf.inputsandbox if type(self.eventswanted) == type(''): subLines = self.eventswanted else: subLines = '\n'.join(self.eventswanted) # Base for the naming of each subjob's CSV file incsvfile = j._impl.application.csvfile tmpname = os.path.basename(incsvfile) if len(tmpname.split('.')) > 1: patterncsv = '.'.join(tmpname.split('.')[0:-1])+"_sub%d."+ tmpname.split('.')[-1] else: patterncsv = tmpname+"_sub%d" from Ganga.GPIDev.Lib.File import FileBuffer thiscsv = patterncsv % self.subpartid # Create the CSV file for this Unit j._impl.getInputWorkspace().writefile(FileBuffer(thiscsv,subLines),executable=0) j._impl.application.csvfile = j._impl.getInputWorkspace().getPath()+thiscsv j.inputsandbox.append(j._impl.getInputWorkspace().getPath()+thiscsv) # Base for the naming of each subjob's output file tmpname = os.path.basename(j._impl.application.outputfile) if len(tmpname.split('.')) > 1: patternout = '.'.join(tmpname.split('.')[0:-1])+"_sub%d."+ tmpname.split('.')[-1] else: patternout = tmpname+"_sub%d" j._impl.application.outputfile = patternout % self.subpartid # Sort out the splitter if trf.splitter: j.splitter = trf.splitter.clone() return j
def wrapper(self, regexp, version, timeout, kernel): """Write a wrapper Python script that executes the notebooks""" wrapperscript = FileUtils.loadScript(self.templatelocation(), '') wrapperscript = wrapperscript.replace('###NBFILES###', str(regexp)) wrapperscript = wrapperscript.replace('###VERSION###', str(version)) wrapperscript = wrapperscript.replace('###TIMEOUT###', str(timeout)) wrapperscript = wrapperscript.replace('###KERNEL###', str(kernel)) wrapperscript = wrapperscript.replace('###UUID###', str(uuid.uuid4())) logger.debug('Script to run on worker node\n' + wrapperscript) scriptName = "notebook_wrapper_generated.py" runScript = FileBuffer(scriptName, wrapperscript, executable=1) return runScript
def writefile(self, fileobj, executable=None): from Ganga.GPIDev.Lib.File import FileBuffer try: name, contents = fileobj except TypeError: pass else: fileobj = FileBuffer(name, contents) logger.warning( 'file "%s": usage of tuples is deprecated, use FileBuffer instead', name) # output file name # Added a subdir to files, (see Ganga/GPIDev/Lib/File/File.py) This allows # to copy files into the a subdirectory of the workspace # FIXME: make a helper method for os.makedirs try: os.makedirs(self.getPath() + fileobj.subdir) logger.debug('created %s', self.getPath()) except OSError as x: import errno if x.errno == errno.EEXIST: logger.debug('EEXIT: %s', self.getPath()) else: raise outname = expandfilename(self.getPath(fileobj.getPathInSandbox())) fileobj.create(outname) if executable: chmod_executable(outname) return outname
def split(self, job): import os subjobs = [] subsets = splitCSVFile(job.application.csvfile, self.nbevents) # Less files than number of jobs wanted => easy logger.info('Creating %d subjobs ...', len(allLines)) # Base for the naming of each subjob's CSV file tmpname = os.path.basename(incsvfile) if len(tmpname.split('.')) > 1: patterncsv = '.'.join( tmpname.split('.')[0:-1]) + "_sub%d." + tmpname.split('.')[-1] else: patterncsv = tmpname + "_sub%d" # Base for the naming of each subjob's output file tmpname = os.path.basename(job.application.outputfile) if len(tmpname.split('.')) > 1: patternout = '.'.join( tmpname.split('.')[0:-1]) + "_sub%d." + tmpname.split('.')[-1] else: patternout = tmpname + "_sub%d" for s, sub in enumerate(subsets): j = addProxy(self.createSubjob(job)) j.inputdata = job.inputdata subLines = '\n'.join(sub) from Ganga.GPIDev.Lib.File import FileBuffer thiscsv = patterncsv % s # Save in the main job's inputdir now, then the file will be moved to # the inputdir of each subjobs. job.getInputWorkspace().writefile(FileBuffer(thiscsv, subLines), executable=0) j.application.csvfile = os.path.join(job.inputdir, thiscsv) j.application.outputfile = patternout % s # Prepare the output filenames which must be unique subjobs.append(stripProxy(j)) return subjobs
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): job = app.getJobObject() version = appsubconfig["version"] libList = appsubconfig["libList"] classifierDir = appsubconfig["classifierDir"] outDir = appsubconfig["outDir"] imageDir = appsubconfig["imageDir"] elementList = imageDir.split(os.sep) imageList = appsubconfig["imageList"] tagFile = appsubconfig["tagFile"] lineList = [] inbox = [] outbox = [] headList, headBox = self.head( job = job, version = version, \ libList = libList, classifierDir = classifierDir ) lineList.extend(headList) outbox.extend(headBox) bodyList, bodyBox = self.body( job = job, imageDir = imageDir, \ imageList = imageList, tagFile = tagFile ) lineList.extend(bodyList) inbox.extend(bodyBox) tailList, tailBox = self.tail(job=job) lineList.extend(tailList) outbox.extend(tailBox) jobScript = "\n".join(lineList) jobWrapper = FileBuffer("Classify.sh", jobScript, executable=1) outbox.extend(job.outputsandbox) return StandardJobConfig\ ( exe = jobWrapper, inputbox = inbox, outputbox = outbox )
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): job = app.getJobObject() exePath = appsubconfig["exePath"] outDir = appsubconfig["outDir"] imageDir = appsubconfig["imageDir"] elementList = imageDir.split(os.sep) imageSubdir = \ os.sep.join( elementList[ elementList.index( "images" ) : ] ) urlRoot = \ os.path.join( "http://hovercraft.hep.phy.cam.ac.uk", imageSubdir ) imageList = appsubconfig["imageList"] lineList = [] outbox = [] headList, headBox = self.head(job=job, exePath=exePath) lineList.extend(headList) outbox.extend(headBox) bodyList, bodyBox = self.body( job = job, imageDir = imageDir, \ urlRoot = urlRoot, imageList = imageList ) lineList.extend(bodyList) outbox.extend(bodyBox) tailList, tailBox = self.tail(job=job) lineList.extend(tailList) outbox.extend(tailBox) jobScript = "\n".join(lineList) jobWrapper = FileBuffer("VansegLocal.sh", jobScript, executable=1) outbox.extend(job.outputsandbox) return StandardJobConfig\ ( exe = jobWrapper, outputbox = outbox )
def preparejob(self, jobconfig, master_job_sandbox): '''Prepare the JDL''' script = self.__jobWrapperTemplate__() job = self.getJobObject() inpw = job.getInputWorkspace() wrapperlog = '__jobscript__.log' import Ganga.Core.Sandbox as Sandbox # FIXME: check what happens if 'stdout','stderr' are specified here script = script.replace('###OUTPUTSANDBOX###', repr(jobconfig.outputbox)) script = script.replace('###APPLICATION_NAME###', getName(job.application)) script = script.replace('###APPLICATIONEXEC###', repr(jobconfig.getExeString())) script = script.replace('###APPLICATIONARGS###', repr(jobconfig.getArguments())) from Ganga.GPIDev.Lib.File.OutputFileManager import getWNCodeForOutputPostprocessing, getWNCodeForDownloadingInputFiles script = script.replace('###OUTPUTUPLOADSPOSTPROCESSING###', getWNCodeForOutputPostprocessing(job, ' ')) script = script.replace('###DOWNLOADINPUTFILES###', getWNCodeForDownloadingInputFiles(job, ' ')) if jobconfig.env: script = script.replace('###APPLICATIONENVS###', repr(jobconfig.env)) else: script = script.replace('###APPLICATIONENVS###', repr({})) script = script.replace('###WRAPPERLOG###', repr(wrapperlog)) import inspect script = script.replace('###INLINEMODULES###', inspect.getsource(Sandbox.WNSandbox)) mon = job.getMonitoringService() self.monInfo = None # set the monitoring file by default to the stdout if isinstance(self.monInfo, dict): self.monInfo['remotefile'] = 'stdout' # try to print out the monitoring service information in debug mode try: logger.debug('job info of monitoring service: %s' % str(self.monInfo)) except: pass # prepare input/output sandboxes import Ganga.Utility.files from Ganga.GPIDev.Lib.File import File from Ganga.Core.Sandbox.WNSandbox import PYTHON_DIR import inspect fileutils = File(inspect.getsourcefile(Ganga.Utility.files), subdir=PYTHON_DIR) packed_files = jobconfig.getSandboxFiles() + [fileutils] sandbox_files = job.createPackedInputSandbox(packed_files) # sandbox of child jobs should include master's sandbox sandbox_files.extend(master_job_sandbox) # check the input file size and pre-upload larger inputs to the iocache lfc_host = '' input_sandbox_uris = [] input_sandbox_names = [] ick = True max_prestaged_fsize = 0 for f in sandbox_files: idx = self.__check_and_prestage_inputfile__(f) if not idx: logger.error('input sandbox preparation failed: %s' % f) ick = False break else: if idx['lfc_host']: lfc_host = idx['lfc_host'] if idx['remote']: abspath = os.path.abspath(f) fsize = os.path.getsize(abspath) if fsize > max_prestaged_fsize: max_prestaged_fsize = fsize input_sandbox_uris.append( idx['remote'][os.path.basename(f)]) input_sandbox_names.append(os.path.basename( urlparse(f)[2])) if idx['local']: input_sandbox_uris += idx['local'] input_sandbox_names.append(os.path.basename(f)) if not ick: logger.error('stop job submission') return None # determin the lcg-cp timeout according to the max_prestaged_fsize # - using the assumption of 1 MB/sec. max_prestaged_fsize = 0 lfc_host = '' transfer_timeout = config['SandboxTransferTimeout'] predict_timeout = int(math.ceil(max_prestaged_fsize / 1000000.0)) if predict_timeout > transfer_timeout: transfer_timeout = predict_timeout if transfer_timeout < 60: transfer_timeout = 60 script = script.replace('###TRANSFERTIMEOUT###', '%d' % transfer_timeout) # update the job wrapper with the inputsandbox list script = script.replace( '###INPUTSANDBOX###', repr({ 'remote': {}, 'local': input_sandbox_names })) # write out the job wrapper and put job wrapper into job's inputsandbox scriptPath = inpw.writefile(FileBuffer( '__jobscript_%s__' % job.getFQID('.'), script), executable=1) input_sandbox = input_sandbox_uris + [scriptPath] for isb in input_sandbox: logger.debug('ISB URI: %s' % isb) # compose output sandbox to include by default the following files: # - gzipped stdout (transferred only when the JobLogHandler is WMS) # - gzipped stderr (transferred only when the JobLogHandler is WMS) # - __jobscript__.log (job wrapper's log) output_sandbox = [wrapperlog] from Ganga.GPIDev.Lib.File.OutputFileManager import getOutputSandboxPatterns for outputSandboxPattern in getOutputSandboxPatterns(job): output_sandbox.append(outputSandboxPattern) if config['JobLogHandler'] in ['WMS']: output_sandbox += ['stdout.gz', 'stderr.gz'] if len(jobconfig.outputbox): output_sandbox += [Sandbox.OUTPUT_TARBALL_NAME] # compose ARC XRSL xrsl = { #'VirtualOrganisation' : config['VirtualOrganisation'], 'executable': os.path.basename(scriptPath), 'environment': { 'GANGA_LCG_VO': config['VirtualOrganisation'], 'GANGA_LOG_HANDLER': config['JobLogHandler'], 'LFC_HOST': lfc_host }, #'stdout' : 'stdout', #'stderr' : 'stderr', 'inputFiles': input_sandbox, 'outputFiles': output_sandbox, #'OutputSandboxBaseDestURI': 'gsiftp://localhost' } xrsl['environment'].update({'GANGA_LCG_CE': self.CE}) #xrsl['Requirements'] = self.requirements.merge(jobconfig.requirements).convert() # if self.jobtype.upper() in ['NORMAL','MPICH']: #xrsl['JobType'] = self.jobtype.upper() # if self.jobtype.upper() == 'MPICH': #xrsl['Requirements'].append('(other.GlueCEInfoTotalCPUs >= NodeNumber)') # xrsl['Requirements'].append('Member("MPICH",other.GlueHostApplicationSoftwareRunTimeEnvironment)') #xrsl['NodeNumber'] = self.requirements.nodenumber # else: # logger.warning('JobType "%s" not supported' % self.jobtype) # return # additional settings from the job if jobconfig.env: xrsl['environment'].update(jobconfig.env) xrslText = Grid.expandxrsl(xrsl) # append any additional requirements from the requirements object xrslText += '\n'.join(self.requirements.other) logger.debug('subjob XRSL: %s' % xrslText) return inpw.writefile(FileBuffer('__xrslfile__', xrslText))
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) input_data, parametricinput_data = dirac_inputdata(app) # outputdata, outputdata_path = dirac_ouputdata(app) job = stripProxy(app).getJobObject() outputfiles = [ this_file for this_file in job.outputfiles if isType(this_file, DiracFile) ] commandline = [] commandline.append(app.exe) if isType(app.exe, File): #logger.info("app: %s" % str(app.exe.name)) #fileName = os.path.join(get_share_path(app), os.path.basename(app.exe.name)) #logger.info("EXE: %s" % str(fileName)) #inputsandbox.append(File(name=fileName)) inputsandbox.append(app.exe) commandline[0] = os.path.join('.', os.path.basename(app.exe.name)) commandline.extend([str(arg) for arg in app.args]) logger.debug('Command line: %s: ', commandline) #exe_script_path = os.path.join(job.getInputWorkspace().getPath(), "exe-script.py") exe_script_name = 'exe-script.py' logger.info("Setting Command to be: '%s'" % repr(commandline)) inputsandbox.append( FileBuffer( name=exe_script_name, contents=script_generator( exe_script_template(), #remove_unreplaced = False, # , COMMAND=repr(commandline), OUTPUTFILESINJECTEDCODE=getWNCodeForOutputPostprocessing( job, ' ')), executable=True)) contents = script_generator( exe_script_template(), COMMAND=repr(commandline), OUTPUTFILESINJECTEDCODE=getWNCodeForOutputPostprocessing( job, ' ')) #logger.info("Script is: %s" % str(contents)) from os.path import abspath, expanduser for this_file in job.inputfiles: if isinstance(this_file, LocalFile): for name in this_file.getFilenameList(): inputsandbox.append(File(abspath(expanduser(name)))) elif isinstance(this_file, DiracFile): name = this_file.lfn if isinstance(input_data, list): input_data.append(name) else: input_data = [name] dirac_outputfiles = dirac_outputfile_jdl(outputfiles, config['RequireDefaultSE']) # NOTE special case for replicas: replicate string must be empty for no # replication dirac_script = script_generator( diracAPI_script_template(), DIRAC_IMPORT='from DIRAC.Interfaces.API.Dirac import Dirac', DIRAC_JOB_IMPORT='from DIRAC.Interfaces.API.Job import Job', DIRAC_OBJECT='Dirac()', JOB_OBJECT='Job()', NAME=mangle_job_name(app), # os.path.basename(exe_script_path), EXE=exe_script_name, # ' '.join([str(arg) for arg in app.args]), EXE_ARG_STR='', EXE_LOG_FILE='Ganga_Executable.log', ENVIRONMENT=None, # app.env, INPUTDATA=input_data, PARAMETRIC_INPUTDATA=parametricinput_data, OUTPUT_SANDBOX=API_nullifier(outputsandbox), OUTPUTFILESSCRIPT=dirac_outputfiles, OUTPUT_PATH="", # job.fqid, SETTINGS=diracAPI_script_settings(app), DIRAC_OPTS=job.backend.diracOpts, REPLICATE='True' if config['ReplicateOutputData'] else '', # leave the sandbox for altering later as needs # to be done in backend.submit to combine master. # Note only using 2 #s as auto-remove 3 INPUT_SANDBOX='##INPUT_SANDBOX##') #logger.info("dirac_script: %s" % dirac_script) #logger.info("inbox: %s" % str(unique(inputsandbox))) #logger.info("outbox: %s" % str(unique(outputsandbox))) return StandardJobConfig(dirac_script, inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def configure(self, masterappconfig): if self.cmtsetup == None: raise ApplicationConfigurationError(None, 'No cmt setup script given.') # __________ TREx first ____________ trex_args = convertIntToStringArgs(self.trex_args) job = self.getJobObject() # Need to handle the possibility of multiple output files ! # setup the output file for arg in trex_args: if arg == '-o': raise ApplicationConfigurationError( None, 'Option "-o" given in trex_args. The module will define the output filename.' ) # So get the list of filenames get_dataset_filenames() and create a file containing the list of files and put it in the sandbox if job.inputdata == None: raise ApplicationConfigurationError( None, 'The inputdata variable is not defined.') fileList = job.inputdata.get_dataset_filenames() if len(fileList) < 1: raise ApplicationConfigurationError(None, 'No input data file given.') trex_args.extend(fileList) firstFile = fileList[0].split('/')[-1] # Define the output trex_args.append('-o') if self.filenamesubstr == None: trex_outputfile = 'recoOutput.root' else: trex_outputfile = firstFile.replace(self.filenamesubstr, "trex") trex_args.append(trex_outputfile) # __________ Now oaAnalysis ____________ oaana_args = convertIntToStringArgs(self.oaana_args) job = self.getJobObject() # Need to handle the possibility of multiple output files ! # setup the output file for arg in oaana_args: if arg == '-o': raise ApplicationConfigurationError( None, 'Option "-o" given in oaana_args. You must use the oaana_outputfile variable instead.' ) oaana_args.append('-o') if self.filenamesubstr == None: oaana_outputfile = 'recoOutput.root' else: oaana_outputfile = firstFile.replace(self.filenamesubstr, "anal") # protection against failed substitution if oaana_outputfile == trex_outputfile: oaana_outputfile = oaana_outputfile.replace( ".root", "_anal.root") oaana_args.append(oaana_outputfile) # Use the reco output as an input for the VFT processing. if self.oaana_only: oaana_args.extend(fileList) else: oaana_args.append(trex_outputfile) trex_argsStr = ' '.join(trex_args) oaana_argsStr = ' '.join(oaana_args) # Create the bash script and put it in input dir. script = '#!/bin/bash\n' script += 'source ' + self.cmtsetup + '\n' if not self.oaana_only: script += 'RunTREx.exe ' + trex_argsStr + '\n' script += 'RunOAAnalysis.exe ' + oaana_argsStr + '\n' from Ganga.GPIDev.Lib.File import FileBuffer scriptname = 'TRExPlusOAAnalysis.sh' job.getInputWorkspace().writefile(FileBuffer(scriptname, script), executable=1) self._scriptname = job.inputdir + scriptname return (None, None)
def preparejob(self, jobconfig, master_input_sandbox): """Method for preparing job script""" job = self.getJobObject() from Ganga.GPIDev.Lib.File import File from Ganga.Core.Sandbox.WNSandbox import PYTHON_DIR import Ganga.Utility.files import inspect fileutils = File(inspect.getsourcefile(Ganga.Utility.files), subdir=PYTHON_DIR) inputfiles = jobconfig.getSandboxFiles() + [fileutils] inbox = job.createPackedInputSandbox(inputfiles) inbox.extend(master_input_sandbox) inpDir = job.getInputWorkspace(create=True).getPath() outDir = job.getOutputWorkspace(create=True).getPath() workdir = tempfile.mkdtemp() self.workdir = workdir exeString = jobconfig.getExeString() argList = jobconfig.getArgStrings() argString = " ".join(map(lambda x: " %s " % x, argList)) outputSandboxPatterns = jobconfig.outputbox patternsToZip = [] wnCodeForPostprocessing = '' wnCodeToDownloadInputFiles = '' if (len(job.outputfiles) > 0): from Ganga.GPIDev.Lib.File.OutputFileManager import getOutputSandboxPatternsForInteractive, getWNCodeForOutputPostprocessing (outputSandboxPatterns, patternsToZip) = getOutputSandboxPatternsForInteractive(job) wnCodeForPostprocessing = 'def printError(message):pass\ndef printInfo(message):pass' + \ getWNCodeForOutputPostprocessing(job, '') all_inputfiles = [this_file for this_file in job.inputfiles] if job.master is not None: all_inputfiles.extend( [this_file for this_file in job.master.inputfiles]) wnCodeToDownloadInputFiles = '' if (len(all_inputfiles) > 0): from Ganga.GPIDev.Lib.File.OutputFileManager import outputFilePostProcessingOnWN for inputFile in all_inputfiles: inputfileClassName = getName(inputFile) logger.debug("name: %s" % inputfileClassName) logger.debug( "result: %s" % str(outputFilePostProcessingOnWN(job, inputfileClassName))) if outputFilePostProcessingOnWN(job, inputfileClassName): inputFile.processWildcardMatches() if inputFile.subfiles: getfromFile = False for subfile in inputFile.subfiles: wnCodeToDownloadInputFiles += subfile.getWNScriptDownloadCommand( '') else: getfromFile = True else: getFromFile = True if getFromFile: wnCodeToDownloadInputFiles += inputFile.getWNScriptDownloadCommand( '') wnCodeToDownloadInputData = '' if job.inputdata and (len(job.inputdata) > 0): from Ganga.GPIDev.Lib.File.OutputFileManager import getWNCodeForDownloadingInputFiles wnCodeToDownloadInputData = getWNCodeForDownloadingInputFiles( job, '') import inspect replace_dict = { '###CONSTRUCT_TIME###': (time.strftime("%c")), '###WNSANDBOX_SOURCE###': inspect.getsource(Sandbox.WNSandbox), '###GANGA_PYTHONPATH###': getConfig("System")["GANGA_PYTHONPATH"], '###OUTPUTDIR###': outDir, '###WORKDIR###': workdir, '###IN_BOX###': inbox, '###WN_INPUTFILES###': wnCodeToDownloadInputFiles, '###WN_INPUTDATA###': wnCodeToDownloadInputData, '###JOBCONFIG_ENV###': jobconfig.env if jobconfig.env is not None else dict(), '###EXE_STRING###': exeString, '###ARG_STRING###': argString, '###WN_POSTPROCESSING###': wnCodeForPostprocessing, '###PATTERNS_TO_ZIP###': patternsToZip, '###OUTPUT_SANDBOX_PATTERNS###': outputSandboxPatterns } script_location = os.path.join( os.path.dirname( os.path.abspath(inspect.getfile(inspect.currentframe()))), 'InteractiveScriptTemplate.py.template') from Ganga.GPIDev.Lib.File import FileUtils commandString = FileUtils.loadScript(script_location, '') for k, v in replace_dict.iteritems(): commandString = commandString.replace(str(k), str(v)) return job.getInputWorkspace().writefile(FileBuffer( "__jobscript__", commandString), executable=1)
def configure(self, masterappconfig): if self.cmtsetup == None: raise ApplicationConfigurationError(None, 'No cmt setup script given.') # __________ Reco first ____________ reco_args = convertIntToStringArgs(self.reco_args) job = self.getJobObject() # Need to handle the possibility of multiple output files ! # setup the output file for arg in reco_args: if arg == '-o': raise ApplicationConfigurationError( None, 'Option "-o" given in reco_args. You must use the filenamesubstr and reconewstr variables instead to define an output.' ) # So get the list of filenames get_dataset_filenames() and create a file containing the list of files and put it in the sandbox if job.inputdata == None: raise ApplicationConfigurationError( None, 'The inputdata variable is not defined.') fileList = job.inputdata.get_dataset_filenames() if len(fileList) < 1: raise ApplicationConfigurationError(None, 'No input data file given.') firstFile = fileList[0].split('/')[-1] # Define the output reco_args.append('-o') if self.filenamesubstr == None: reco_outputfile = 'recoOutput.root' else: reco_outputfile = firstFile.replace(self.filenamesubstr, self.reconewstr) reco_args.append(reco_outputfile) # Just to define the output before the potentially long list of input files reco_args.extend(fileList) # __________ Now VFT ____________ vft_args = convertIntToStringArgs(self.vft_args) job = self.getJobObject() # Need to handle the possibility of multiple output files ! # setup the output file for arg in vft_args: if arg == '-o': raise ApplicationConfigurationError( None, 'Option "-o" given in vft_args. You must use the filenamesubstr and reconewstr variables instead to define an output.' ) # Define the output vft_args.append('-o') if self.filenamesubstr == None: vft_outputfile = 'vftOutput.root' else: vft_outputfile = firstFile.replace(self.filenamesubstr, self.vftnewstr) vft_args.append(vft_outputfile) # Use the reco output as an input for the VFT processing # or use the input file list if running in VFT only mode. if self.vft_only: vft_args.extend(fileList) else: vft_args.append(reco_outputfile) reco_argsStr = ' '.join(reco_args) vft_argsStr = ' '.join(vft_args) # Create the bash script and put it in input dir. script = '#!/bin/bash\n' script += 'source ' + self.cmtsetup + '\n' if not self.vft_only: script += self.reco_exe + ' ' + reco_argsStr + '\n' script += self.vft_exe + ' ' + vft_argsStr + '\n' from Ganga.GPIDev.Lib.File import FileBuffer scriptname = 'RecoPlusVFT.sh' job.getInputWorkspace().writefile(FileBuffer(scriptname, script), executable=1) self._scriptname = job.inputdir + scriptname return (None, None)
def preparejob(self, jobconfig, master_input_sandbox): job = self.getJobObject() mon = job.getMonitoringService() import Ganga.Core.Sandbox as Sandbox from Ganga.GPIDev.Lib.File import File from Ganga.Core.Sandbox.WNSandbox import PYTHON_DIR import inspect fileutils = File( inspect.getsourcefile(Ganga.Utility.files), subdir=PYTHON_DIR ) subjob_input_sandbox = job.createPackedInputSandbox(jobconfig.getSandboxFiles() + [ fileutils ] ) appscriptpath = [jobconfig.getExeString()] + jobconfig.getArgStrings() sharedoutputpath = job.getOutputWorkspace().getPath() ## FIXME Check this isn't a GangaList outputpatterns = jobconfig.outputbox environment = jobconfig.env if not jobconfig.env is None else {} import inspect script_location = os.path.join(os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))), 'BatchScriptTemplate.py') from Ganga.GPIDev.Lib.File import FileUtils text = FileUtils.loadScript(script_location, '') import Ganga.Core.Sandbox as Sandbox import Ganga.Utility as Utility from Ganga.Utility.Config import getConfig from Ganga.GPIDev.Lib.File.OutputFileManager import getWNCodeForOutputSandbox, getWNCodeForOutputPostprocessing, getWNCodeForDownloadingInputFiles jobidRepr = repr(self.getJobObject().getFQID('.')) replace_dict = { '###OUTPUTSANDBOXPOSTPROCESSING###' : getWNCodeForOutputSandbox(job, ['__syslog__'], jobidRepr), '###OUTPUTUPLOADSPOSTPROCESSING###' : getWNCodeForOutputPostprocessing(job, ''), '###DOWNLOADINPUTFILES###' : getWNCodeForDownloadingInputFiles(job, ''), '###INLINEMODULES###' : inspect.getsource(Sandbox.WNSandbox), '###INLINEHOSTNAMEFUNCTION###' : inspect.getsource(Utility.util.hostname), '###APPSCRIPTPATH###' : repr(appscriptpath), #'###SHAREDINPUTPATH###' : repr(sharedinputpath)), '###INPUT_SANDBOX###' : repr(subjob_input_sandbox + master_input_sandbox), '###SHAREDOUTPUTPATH###' : repr(sharedoutputpath), '###OUTPUTPATTERNS###' : repr(outputpatterns), '###JOBID###' : jobidRepr, '###ENVIRONMENT###' : repr(environment), '###PREEXECUTE###' : self.config['preexecute'], '###POSTEXECUTE###' : self.config['postexecute'], '###JOBIDNAME###' : self.config['jobid_name'], '###QUEUENAME###' : self.config['queue_name'], '###HEARTBEATFREQUENCE###' : self.config['heartbeat_frequency'], '###INPUT_DIR###' : repr(job.getStringInputDir()), '###GANGADIR###' : repr(getConfig('System')['GANGA_PYTHONPATH']) } for k, v in replace_dict.iteritems(): text = text.replace(str(k), str(v)) logger.debug('subjob input sandbox %s ', subjob_input_sandbox) logger.debug('master input sandbox %s ', master_input_sandbox) from Ganga.GPIDev.Lib.File import FileBuffer return job.getInputWorkspace().writefile(FileBuffer('__jobscript__', text), executable=1)
def configure(self, masterappconfig): exefile = 'skimFromCSV.exe' exe = 'skimFromCSV.exe' # exe = '/'.join([os.getenv("RECONUTILSROOT"),os.getenv("CMTCONFIG"),exefile]) # if not isfile(exe): # raise ApplicationConfigurationError(None,'Cannot find executable '+exe) job = self.getJobObject() if self.cmtsetup == None: raise ApplicationConfigurationError('No cmt setup script given.') if not isfile(self.cmtsetup): raise ApplicationConfigurationError( 'Cannot find cmt setup script ' + self.cmtsetup) # Copy CSV file to inputdir. Done in splitter for subjobs. if not isfile(self.csvfile): raise ApplicationConfigurationError('Cannot find CSV file ' + self.csvfile) from shutil import copy tmpcsv = os.path.join(job.inputdir, os.path.basename(self.csvfile)) if not os.path.exists(tmpcsv): copy(self.csvfile, job.inputdir) self.csvfile = tmpcsv args = [] args.append('-O') args.append('file=' + self.csvfile) if self.outputfile == None: raise ApplicationConfigurationError( 'No output file given. Fill the outputfile variable.') args.append('-o') args.append(self.outputfile) # Read the CSV file csvfile = open(self.csvfile, 'rb') run_subrun = [] for line in csvfile: if line[0] == '#': continue row = line.split(",") if len(row) < 3: print "Ignoring badly-formatted line:", ",".join(row) continue r_sr = "%(run)08d-%(subrun)04d" % { "run": int(row[0]), "subrun": int(row[1]) } if r_sr not in run_subrun: run_subrun.append(r_sr) # So get the list of filenames get_dataset_filenames() and create a file containing the list of files and put it in the sandbox if job.inputdata == None: raise ApplicationConfigurationError( 'The inputdata variable is not defined.') rawFileList = job.inputdata.get_dataset_filenames() if len(rawFileList) < 1: raise ApplicationConfigurationError('No input data file given.') fileList = [] for r_sr in run_subrun: for rfile in rawFileList: if rfile.find(r_sr) > -1: fileList.append(rfile) continue if not len(fileList): raise ApplicationConfigurationError( 'No file matching the run_subrun in the CSV file %s.' % self.csvfile) args.extend(fileList) argsStr = ' '.join(args) # Create the bash script and put it in input dir. script = '#!/bin/bash\n' script += 'source ' + self.cmtsetup + '\n' script += '${RECONUTILSROOT}/${CMTCONFIG}/' + exe + ' ' + argsStr + '\n' # Little trick to be able to control the final destination # of the subjob's CSV file with SandboxFile or MassStorageFile if job.master is not None: script += 'cp %s .' % self.csvfile from Ganga.GPIDev.Lib.File import FileBuffer if exefile.find('.exe') > -1: scriptname = exefile.replace('.exe', '.sh') else: scriptname = exefile + '.sh' job.getInputWorkspace().writefile(FileBuffer(scriptname, script), executable=1) self._scriptname = job.inputdir + scriptname return (None, None)
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): logger.debug("Prepare") inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) job = stripProxy(app).getJobObject() if job.inputdata: if not job.splitter: if len(job.inputdata) > 100: raise BackendError( "You're submitting a job to Dirac with no splitter and more than 100 files, please add a splitter and try again!" ) outputfiles = [ this_file for this_file in job.outputfiles if isType(this_file, DiracFile) ] data_str = 'import os\n' data_str += 'execfile(\'data.py\')\n' if hasattr(job, '_splitter_data'): data_str += job._splitter_data inputsandbox.append(FileBuffer('data-wrapper.py', data_str)) input_data = [] # Cant wait to get rid of this when people no-longer specify # inputdata in options file ####################################################################### # splitters ensure that subjobs pick up inputdata from job over that in # optsfiles but need to take care of unsplit jobs if not job.master: share_path = os.path.join(get_share_path(app), 'inputdata', 'options_data.pkl') if not job.inputdata: if os.path.exists(share_path): f = open(share_path, 'r+b') job.inputdata = pickle.load(f) f.close() ####################################################################### # Cant wait to get rid of this when people no-longer specify # outputsandbox or outputdata in options file ####################################################################### share_path = os.path.join(get_share_path(app), 'output', 'options_parser.pkl') if os.path.exists(share_path): # if not os.path.exists(share_path): # raise GangaException('could not find the parser') f = open(share_path, 'r+b') parser = pickle.load(f) f.close() outbox, outdata = parser.get_output(job) from Ganga.GPIDev.Lib.File import FileUtils from Ganga.GPIDev.Base.Filters import allComponentFilters fileTransform = allComponentFilters['gangafiles'] outdata_files = [ fileTransform(this_file, None) for this_file in outdata if not FileUtils.doesFileExist(this_file, job.outputfiles) ] job.non_copyable_outputfiles.extend([ output_file for output_file in outdata_files if not isType(output_file, DiracFile) ]) outbox_files = [ fileTransform(this_file, None) for this_file in outbox if not FileUtils.doesFileExist(this_file, job.outputfiles) ] job.non_copyable_outputfiles.extend([ outbox_file for outbox_file in outbox_files if not isType(outbox_file, DiracFile) ]) outputsandbox = [ f.namePattern for f in job.non_copyable_outputfiles ] outputsandbox.extend([ f.namePattern for f in job.outputfiles if not isType(f, DiracFile) ]) outputsandbox = unique(outputsandbox) # + outbox[:]) ####################################################################### input_data_dirac, parametricinput_data = dirac_inputdata( job.application) if input_data_dirac is not None: for f in input_data_dirac: if isType(f, DiracFile): input_data.append(f.lfn) elif isType(f, str): input_data.append(f) else: raise ApplicationConfigurationError( "Don't know How to handle anythig other than DiracFiles or strings to LFNs!" ) commandline = "python ./gaudipython-wrapper.py" if is_gaudi_child(app): commandline = 'gaudirun.py ' commandline += ' '.join([str(arg) for arg in app.args]) commandline += ' options.pkl data-wrapper.py' logger.debug('Command line: %s: ', commandline) gaudi_script_path = os.path.join(job.getInputWorkspace().getPath(), "gaudi-script.py") script_generator( gaudi_script_template(), #remove_unreplaced = False, outputfile_path=gaudi_script_path, PLATFORM=app.platform, COMMAND=commandline, XMLSUMMARYPARSING=getXMLSummaryScript() # , #OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, ' ') ) #logger.debug( "input_data %s" % str( input_data ) ) # We want to propogate the ancestor depth to DIRAC when we have # inputdata set if job.inputdata is not None and isType(job.inputdata, LHCbDataset): # As the RT Handler we already know we have a Dirac backend if type(job.backend.settings) is not dict: raise ApplicationConfigurationError( None, 'backend.settings should be a dict') if 'AncestorDepth' in job.backend.settings: ancestor_depth = job.backend.settings['AncestorDepth'] else: ancestor_depth = job.inputdata.depth else: ancestor_depth = 0 lhcbdirac_script_template = lhcbdiracAPI_script_template() lhcb_dirac_outputfiles = lhcbdirac_outputfile_jdl(outputfiles) # not necessary to use lhcbdiracAPI_script_template any more as doing our own uploads to Dirac # remove after Ganga6 release # NOTE special case for replicas: replicate string must be empty for no # replication dirac_script = script_generator( lhcbdirac_script_template, DIRAC_IMPORT= 'from LHCbDIRAC.Interfaces.API.DiracLHCb import DiracLHCb', DIRAC_JOB_IMPORT= 'from LHCbDIRAC.Interfaces.API.LHCbJob import LHCbJob', DIRAC_OBJECT='DiracLHCb()', JOB_OBJECT='LHCbJob()', NAME=mangle_job_name(app), APP_NAME=stripProxy(app).appname, APP_VERSION=app.version, APP_SCRIPT=gaudi_script_path, APP_LOG_FILE='Ganga_%s_%s.log' % (stripProxy(app).appname, app.version), INPUTDATA=input_data, PARAMETRIC_INPUTDATA=parametricinput_data, OUTPUT_SANDBOX=API_nullifier(outputsandbox), OUTPUTFILESSCRIPT=lhcb_dirac_outputfiles, # job.fqid,#outputdata_path, OUTPUT_PATH="", OUTPUT_SE=getConfig('DIRAC')['DiracOutputDataSE'], SETTINGS=diracAPI_script_settings(job.application), DIRAC_OPTS=job.backend.diracOpts, PLATFORM=app.platform, REPLICATE='True' if getConfig('DIRAC')['ReplicateOutputData'] else '', ANCESTOR_DEPTH=ancestor_depth, ## This is to be modified in the final 'submit' function in the backend ## The backend also handles the inputfiles DiracFiles ass appropriate INPUT_SANDBOX='##INPUT_SANDBOX##') logger.debug("prepare: LHCbGaudiDiracRunTimeHandler") return StandardJobConfig(dirac_script, inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def get_input_sandbox(extra): sandbox = [] sandbox += extra.input_files[:] sandbox += [FileBuffer(n, s) for (n, s) in extra.input_buffers.items()] logger.debug("Input sandbox: %s", str(sandbox)) return sandbox
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): logger.debug("Prepare") inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) job = app.getJobObject() logger.debug("Loading pickle files") #outputfiles=set([file.namePattern for file in job.outputfiles]).difference(set(getOutputSandboxPatterns(job))) # Cant wait to get rid of this when people no-longer specify # inputdata in options file ####################################################################### # splitters ensure that subjobs pick up inputdata from job over that in # optsfiles but need to take sare of unsplit jobs if not job.master: share_path = os.path.join(get_share_path(app), 'inputdata', 'options_data.pkl') if not job.inputdata: if os.path.exists(share_path): f = open(share_path, 'r+b') job.inputdata = pickle.load(f) f.close() ####################################################################### # Cant wait to get rid of this when people no-longer specify # outputsandbox or outputdata in options file ####################################################################### share_path = os.path.join(get_share_path(app), 'output', 'options_parser.pkl') logger.debug("Adding info from pickle files") if os.path.exists(share_path): f = open(share_path, 'r+b') parser = pickle.load(f) f.close() outbox, outdata = parser.get_output(job) from Ganga.GPIDev.Lib.File import FileUtils from Ganga.GPIDev.Base.Filters import allComponentFilters fileTransform = allComponentFilters['gangafiles'] job.non_copyable_outputfiles.extend([ fileTransform(this_file, None) for this_file in outdata if not FileUtils.doesFileExist(this_file, job.outputfiles) ]) job.non_copyable_outputfiles.extend([ fileTransform(this_file, None) for this_file in outbox if not FileUtils.doesFileExist(this_file, job.outputfiles) ]) outputsandbox.extend( [f.namePattern for f in job.non_copyable_outputfiles]) outputsandbox.extend([f.namePattern for f in job.outputfiles]) outputsandbox = unique(outputsandbox) ####################################################################### logger.debug("Doing XML Catalog stuff") data = job.inputdata data_str = '' if data: logger.debug("Returning options String") data_str = data.optionsString() if data.hasLFNs(): logger.debug("Returning Catalogue") inputsandbox.append( FileBuffer('catalog.xml', data.getCatalog())) cat_opts = '\nfrom Gaudi.Configuration import FileCatalog\nFileCatalog().Catalogs = ["xmlcatalog_file:catalog.xml"]\n' data_str += cat_opts logger.debug("Doing splitter_data stuff") if hasattr(job, '_splitter_data'): data_str += job._splitter_data inputsandbox.append(FileBuffer('data.py', data_str)) logger.debug("Doing GaudiPython stuff") cmd = 'python ./gaudipython-wrapper.py' opts = '' if is_gaudi_child(job.application): opts = 'options.pkl' cmd = 'gaudirun.py ' + \ ' '.join(job.application.args) + ' %s data.py' % opts logger.debug("Setting up script") script = script_generator( create_runscript(job.application.newStyleApp), remove_unreplaced=False, OPTS=opts, PROJECT_OPTS=job.application.setupProjectOptions, APP_NAME=job.application.appname, APP_VERSION=job.application.version, APP_PACKAGE=job.application.package, PLATFORM=job.application.platform, CMDLINE=cmd, XMLSUMMARYPARSING=getXMLSummaryScript()) # , # OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, '')) logger.debug("Returning StandardJobConfig") return StandardJobConfig(FileBuffer('gaudi-script.py', script, executable=1), inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def makesagajobdesc(self, job, jobconfig): ## We need a unique subdirectory per job to avoid input/output file clashes ## The easiest way to do this is with a UUID-style directory name wd_uuid = "ganga-job-" #if job.name == '': # wd_uuid += "noname-" #else: # wd_uuid += job.name + "-" import uuid wd_uuid += str(uuid.uuid4()) job.backend.workdir_uuid = wd_uuid ## Now we need to create a wrapper script on the fly. The wrapper ## script will be be transfered to the execution host and takes care ## of the archive unpacking as well as job monitoring / reporting. ws = SAGAWrapperScript() import inspect import Ganga.Core.Sandbox as Sandbox import Ganga.Utility as Utility ws.setInlineModules(inspect.getsource(Sandbox.WNSandbox)) ws.setExecutable(jobconfig.getExeString()) ws.setArguments(jobconfig.getArgStrings()) ws.setOutputPatterns(jobconfig.outputbox) ws.setInputSandbox("_input_sandbox_" + str(job.id) + ".tgz") text = ws.getScript() jd = saga.job.description() logger.debug("setting up new saga job with id: %s", job.id) # create jobscript in input sandbox from Ganga.GPIDev.Lib.File import FileBuffer jobscript = job.getInputWorkspace().writefile(FileBuffer( '__jobscript__', text), executable=1) logger.debug(" * created new jobscript wrapper: %s", jobscript) # workdir if len(job.backend.workdir) != 0: jd.working_directory = job.backend.workdir logger.debug(" * backend.workdir -> saga.workdir: %s", jd.working_directory) else: #default to the remote filesystem path component jd.working_directory = saga.url(self.filesystem_url + "/" + self.workdir_uuid + "/").path logger.debug( " * saga.workdir: %s (not given - extracted from 'filesystem_url')", jd.working_directory) # executable exe = jd.working_directory + '__jobscript__' jd.executable = exe #jobconfig.getExeString() logger.debug(" * application.exe -> saga.executable: %s", jd.executable) # arguments argList = jobconfig.getArgStrings() #for arg in job.application.args: # argList.append( arg ) #"\\'%s\\'" % arg ) if len(argList) != 0: jd.arguments = argList logger.debug(" * application.args -> saga.arguments: %s", jd.arguments) # environment envList = [] for k, v in job.application.env.items(): envList.append(k + "=" + v) #"\\'%s\\'" % arg ) if len(envList) != 0: jd.environment = envList logger.debug(" * application.env -> saga.environment: %s", jd.environment) # queue if len(job.backend.queue) != 0: jd.queue = job.backend.queue logger.debug(" * backend.queue -> saga.queue: %s", jd.queue) # allocation if len(job.backend.allocation) != 0: jd.job_project = [job.backend.allocation] logger.debug(" * backend.allocation -> saga.job_project: %s", jd.job_project) # spmd_variation if len(job.backend.spmd_variation) != 0: jd.spmd_variation = job.backend.spmd_variation logger.debug( " * backend.spmd_variation -> saga.spmd_variation: %s", jd.spmd_variation) # number_of_processes if len(job.backend.number_of_processes) != 0: jd.number_of_processes = job.backend.number_of_processes logger.debug( " * backend.number_of_processes -> saga.number_of_processes: %s", jd.number_of_processes) ## We have to create special filenames for stdout/stderr redirection ## To avoid name clashes, we append a UUID to the filename. path_component = saga.url(self.filesystem_url + "/" + self.workdir_uuid + "/") try: d = saga.filesystem.directory(path_component, saga.filesystem.Create) logger.debug( " * created output/working directory on the remote system: %s", path_component) except saga.exception as e: logger.error( 'exception caught while creating output/working directory: %s', e.get_all_messages()) self.getJobObject().updateStatus("failed") ## STDOUT self.saga_job_out = path_component.url + "/out.log" #jd.output = saga.url(self.saga_job_out).path logger.debug(" * stdout should become available here: %s", saga.url(self.saga_job_out).url) ## STDERR self.saga_job_err = path_component.url + "/err.log" #jd.error = saga.url(self.saga_job_err).path logger.debug(" * stderr should become available here: %s", saga.url(self.saga_job_err).url) return jd
def configure(self, masterappconfig): self.ana_useropt = convertIntToStringArgs(self.ana_useropt) args = [] args.append('$RECONUTILSROOT/macros/grtf_VFT/make_ana.py') job = self.getJobObject() if self.cmtsetup == None: raise ApplicationConfigurationError('No cmt setup script given.') if not self.tree == None: args.append('-t') args.append(self.tree) if not self.ana_custom == None: args.append('-c') args.append(self.ana_custom) if not self.ana_useropt == None: for UsrOpt in self.ana_useropt: args.append('-O') args.append(UsrOpt) if self.ana_output == None: raise ApplicationConfigurationError( 'No output file given. Fill the ana_output variable.') else: args.append('-o') args.append(self.ana_output) # So get the list of filenames get_dataset_filenames() and create a file containing the list of files and put it in the sandbox if job.inputdata == None: raise ApplicationConfigurationError( 'The inputdata variable is not defined.') fileList = job.inputdata.get_dataset_filenames() if len(fileList) < 1: raise ApplicationConfigurationError('No input data file given.') args.extend(fileList) if self.run_pdf: args.append('&&') args.append( '$ND280ANALYSISTOOLSROOT/macros/grtf/pdfgen/make_pdf.py') if not 'ana_output' in [ self.pdf_rdp, self.pdf_mcp, self.pdf_oldrdp, self.pdf_oldmcp ]: raise ApplicationConfigurationError( 'None of the pdf inputs is set to use the make_ana.py output. Please set "pdf_rdp", "pdf_mcp", "pdf_oldrdp", or "pdf_oldmcp" to the value "ana_output"' ) for key in ['pdf_rdp', 'pdf_mcp', 'pdf_oldrdp', 'pdf_oldmcp']: if getattr(self, key) == 'ana_output': setattr(self, key, self.ana_output) argDict = { '--custom': 'pdf_custom', '--title': 'pdf_title', '--rdp': 'pdf_rdp', '--mcp': 'pdf_mcp', '--oldrdp': 'pdf_oldrdp', '--oldmcp': 'pdf_oldmcp', '--rdptitle': 'pdf_rdptitle', '--mcptitle': 'pdf_mcptitle', '--oldrdptitle': 'pdf_oldrdptitle', '--oldmcptitle': 'pdf_oldmcptitle', '--out': 'pdf_output' } # argDict = { '--custom': self.pdf_custom, '--title': self.pdf_title, '--rdp': self.pdf_rdp, '--mcp': self.pdf_mcp, '--oldrdp': self.pdf_oldrdp, '--oldmcp': self.pdf_oldmcp, '--rdptitle': self.pdf_rdptitle, '--mcptitle': self.pdf_mcptitle, '--oldrdptitle': self.pdf_oldrdptitle, '--oldmcptitle': self.pdf_oldmcptitle, '--out': self.pdf_output } for key in argDict: if not getattr(self, argDict[key]) == None: args.append(key + '=' + getattr(self, argDict[key])) for opt in self.pdf_options: for key in argDict: if opt.find(key) > -1 and not getattr( self, argDict[key]) == None: raise ApplicationConfigurationError( 'The make_pdf.py command line argument %s was set through both the ganga application variable "%s" and pdf_options "%s". Use only one of them.' % (key, argDict[key], opt)) args.append(opt) # Create the bash script and put it in input dir. script = '#!/bin/bash\n' script += 'source ' + self.cmtsetup + '\n' script += ' '.join(args) + '\n' from Ganga.GPIDev.Lib.File import FileBuffer scriptname = 'make_ana.sh' job.getInputWorkspace().writefile(FileBuffer(scriptname, script), executable=1) self._scriptname = job.inputdir + scriptname return (None, None)
def configure(self, masterappconfig): if self.cmtsetup == None: raise ApplicationConfigurationError('No cmt setup script given.') # __________ Reco first ____________ reco_args = convertIntToStringArgs(self.reco_args) job = self.getJobObject() # Need to handle the possibility of multiple output files ! # setup the output file for arg in reco_args: if arg == '-o': raise ApplicationConfigurationError( 'Option "-o" given in reco_args. You must use the filenamesubstr and reconewstr variables instead to define an output.' ) # So get the list of filenames get_dataset_filenames() and create a file containing the list of files and put it in the sandbox if job.inputdata == None: raise ApplicationConfigurationError( 'The inputdata variable is not defined.') fileList = job.inputdata.get_dataset_filenames() if len(fileList) < 1: raise ApplicationConfigurationError('No input data file given.') firstFile = fileList[0].split('/')[-1] # Define the output reco_args.append('-o') if self.filenamesubstr == None: reco_outputfile = 'recoOutput.root' else: reco_outputfile = firstFile.replace(self.filenamesubstr, self.reconewstr) reco_args.append(reco_outputfile) # Just to define the output before the potentially long list of input files reco_args.extend(fileList) # __________ Now oaAnalysis ____________ anal_args = convertIntToStringArgs(self.anal_args) job = self.getJobObject() # Need to handle the possibility of multiple output files ! # setup the output file for arg in anal_args: if arg == '-o': raise ApplicationConfigurationError( 'Option "-o" given in anal_args. You must use the filenamesubstr and reconewstr variables instead to define an output.' ) # Define the output anal_args.append('-o') if self.filenamesubstr == None: anal_outputfile = 'analOutput.root' else: anal_outputfile = firstFile.replace(self.filenamesubstr, self.analnewstr) anal_args.append(anal_outputfile) # Now add the input file anal_args.append(reco_outputfile) reco_argsStr = ' '.join(reco_args) anal_argsStr = ' '.join(anal_args) # Create the bash script and put it in input dir. script = '#!/bin/bash\n' script += 'source ' + self.cmtsetup + '\n' script += 'RunOARecon.exe ' + reco_argsStr + '\n' script += 'RunOAAnalysis.exe ' + anal_argsStr + '\n' from Ganga.GPIDev.Lib.File import FileBuffer scriptname = 'oaReconPlusoaAnalysis.sh' job.getInputWorkspace().writefile(FileBuffer(scriptname, script), executable=1) self._scriptname = job.inputdir + scriptname return (None, None)
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): """ This function prepares the application of the actual job being submitted, master or not Args: app (IApplication): This is the application actually being submitted belonging to the master or sub job being configured appsubconfig (tuple): This is used to prepare the inputsandbox according to the configuration for each subjob if it varies appmasterconfig (tuple): This is also used to prepare the inputsandbox but contains the config of the app for the master job jobmasterconfig (StandardJobConfig): This is the configuration of the master job which may or may not be the same job as owning the app """ # Construct some common objects used in job submission here inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) input_data, parametricinput_data = dirac_inputdata(app, hasOtherInputData=True) job = app.getJobObject() # Construct the im3shape-script which is used by this job. i.e. the script and full command line to be used in this job exe_script_name = 'im3shape-script.py' output_filename = os.path.basename(job.inputdata[0].lfn) + '.' + str(app.rank) + '.' + str(app.size) im3shape_args = ' '.join([ os.path.basename(job.inputdata[0].lfn), os.path.basename(app.ini_location.namePattern), # input.fz, config.ini app.catalog, output_filename, # catalog, output str(app.rank), str(app.size) ]) full_cmd = app.exe_name + ' ' + im3shape_args outputfiles = [this_file for this_file in job.outputfiles if isinstance(this_file, DiracFile)] inputsandbox.append(FileBuffer( name=exe_script_name, contents=script_generator(Im3Shape_script_template(), ## ARGS for app from job.app RUN_DIR = app.run_dir, BLACKLIST = os.path.basename(app.blacklist.namePattern), COMMAND = full_cmd, ## Stuff for Ganga OUTPUTFILES = repr([this_file.namePattern for this_file in job.outputfiles]), OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, ' '), ), executable=True) ) # TODO once there is a common, IApplication.getMeFilesForThisApp function replace this list with a getter ad it shouldn't really be hard-coded app_file_list = [app.im3_location, app.ini_location, app.blacklist] app_file_list = [this_file for this_file in app_file_list if isinstance(this_file, DiracFile)] job.inputfiles.extend(app_file_list) # Slightly mis-using this here but it would be nice to have these files #job.inputfiles.extend(job.inputdata) # NOTE special case for replicas: replicate string must be empty for no # replication dirac_script = script_generator(diracAPI_script_template(), DIRAC_IMPORT = 'from DIRAC.Interfaces.API.Dirac import Dirac', DIRAC_JOB_IMPORT = 'from DIRAC.Interfaces.API.Job import Job', DIRAC_OBJECT = 'Dirac()', JOB_OBJECT = 'Job()', NAME = mangle_job_name(app), EXE = exe_script_name, EXE_ARG_STR = '', EXE_LOG_FILE = 'Ganga_Executable.log', ENVIRONMENT = None, INPUTDATA = input_data, PARAMETRIC_INPUTDATA = parametricinput_data, OUTPUT_SANDBOX = API_nullifier(outputsandbox), OUTPUTFILESSCRIPT = dirac_outputfile_jdl(outputfiles, False), OUTPUT_PATH = "", # job.fqid, SETTINGS = diracAPI_script_settings(app), DIRAC_OPTS = job.backend.diracOpts, REPLICATE = 'True' if getConfig('DIRAC')['ReplicateOutputData'] else '', # leave the sandbox for altering later as needs # to be done in backend.submit to combine master. # Note only using 2 #s as auto-remove 3 INPUT_SANDBOX = '##INPUT_SANDBOX##' ) return StandardJobConfig(dirac_script, inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))