def master_prepare(self, app, appmasterconfig): inputsandbox, outputsandbox = master_sandbox_prepare( app, appmasterconfig) if type(app.exe) == File: exefile = os.path.join(get_share_path(app), os.path.basename(app.exe.name)) if not os.path.exists(exefile): msg = 'Executable must exist!' raise ApplicationConfigurationError(None, msg) os.system('chmod +x %s' % exefile) return StandardJobConfig(inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def master_prepare(self, app, appmasterconfig): inputsandbox, outputsandbox = master_sandbox_prepare( app, appmasterconfig) # check file is set OK if not app.script.name: msg = 'Root.script.name must be set.' raise ApplicationConfigurationError(None, msg) sharedir_scriptpath = os.path.join(get_share_path(app), os.path.basename(app.script.name)) if not os.path.exists(sharedir_scriptpath): msg = 'Script must exist!' raise ApplicationConfigurationError(None, msg) return StandardJobConfig(inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def master_prepare(self, app, appmasterconfig): inputsandbox, outputsandbox = master_sandbox_prepare( app, appmasterconfig) # check file is set OK if not app.script.name: msg = 'Root.script.name must be set.' raise ApplicationConfigurationError(msg) sharedir_scriptpath = os.path.join(get_share_path(app), os.path.basename(app.script.name)) if not os.path.exists(sharedir_scriptpath): msg = 'Script must exist!' raise ApplicationConfigurationError(msg) return StandardJobConfig(inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) job = app.getJobObject() if job.inputdata: if not job.splitter: if len(job.inputdata) > 100: raise BackendError( "You're submitting a job to Dirac with no splitter and more than 100 files, please add a splitter and try again!" ) outputfiles = [this_file for this_file in job.outputfiles if isType(this_file, DiracFile)] data_str = "import os\n" data_str += "execfile('data.py')\n" if hasattr(job, "_splitter_data"): data_str += job._splitter_data inputsandbox.append(FileBuffer("data-wrapper.py", data_str)) input_data = [] # Cant wait to get rid of this when people no-longer specify # inputdata in options file ####################################################################### # splitters ensure that subjobs pick up inputdata from job over that in # optsfiles but need to take care of unsplit jobs if not job.master: share_path = os.path.join(get_share_path(app), "inputdata", "options_data.pkl") if not job.inputdata: if os.path.exists(share_path): f = open(share_path, "r+b") job.inputdata = pickle.load(f) f.close() ####################################################################### # Cant wait to get rid of this when people no-longer specify # outputsandbox or outputdata in options file ####################################################################### share_path = os.path.join(get_share_path(app), "output", "options_parser.pkl") if os.path.exists(share_path): # if not os.path.exists(share_path): # raise GangaException('could not find the parser') f = open(share_path, "r+b") parser = pickle.load(f) f.close() outbox, outdata = parser.get_output(job) from Ganga.GPIDev.Lib.File import FileUtils from Ganga.GPIDev.Base.Filters import allComponentFilters fileTransform = allComponentFilters["gangafiles"] outdata_files = [ fileTransform(this_file, None) for this_file in outdata if not FileUtils.doesFileExist(this_file, job.outputfiles) ] job.non_copyable_outputfiles.extend( [output_file for output_file in outdata_files if not isType(output_file, DiracFile)] ) outbox_files = [ fileTransform(this_file, None) for this_file in outbox if not FileUtils.doesFileExist(this_file, job.outputfiles) ] job.non_copyable_outputfiles.extend( [outbox_file for outbox_file in outbox_files if not isType(outbox_file, DiracFile)] ) outputsandbox = [f.namePattern for f in job.non_copyable_outputfiles] outputsandbox.extend([f.namePattern for f in job.outputfiles if not isType(f, DiracFile)]) outputsandbox = unique(outputsandbox) # + outbox[:]) ####################################################################### input_data_dirac, parametricinput_data = dirac_inputdata(job.application) if input_data_dirac is not None: for f in input_data_dirac: if isType(f, DiracFile): input_data.append(f.lfn) elif isType(f, str): input_data.append(f) else: raise ApplicationConfigurationError( "Don't know How to handle anythig other than DiracFiles or strings to LFNs!" ) commandline = "python ./gaudipython-wrapper.py" if is_gaudi_child(app): commandline = "gaudirun.py " commandline += " ".join([str(arg) for arg in app.args]) commandline += " options.pkl data-wrapper.py" logger.debug("Command line: %s: ", commandline) gaudi_script_path = os.path.join(job.getInputWorkspace().getPath(), "gaudi-script.py") script_generator( gaudi_script_template(), # remove_unreplaced = False, outputfile_path=gaudi_script_path, PLATFORM=app.platform, COMMAND=commandline, XMLSUMMARYPARSING=getXMLSummaryScript() # , # OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, ' ') ) # logger.debug( "input_data %s" % str( input_data ) ) # We want to propogate the ancestor depth to DIRAC when we have # inputdata set if job.inputdata is not None and isType(job.inputdata, LHCbDataset): # As the RT Handler we already know we have a Dirac backend if type(job.backend.settings) is not dict: raise ApplicationConfigurationError(None, "backend.settings should be a dict") if "AncestorDepth" in job.backend.settings: ancestor_depth = job.backend.settings["AncestorDepth"] else: ancestor_depth = job.inputdata.depth else: ancestor_depth = 0 lhcbdirac_script_template = lhcbdiracAPI_script_template() lhcb_dirac_outputfiles = lhcbdirac_outputfile_jdl(outputfiles) # not necessary to use lhcbdiracAPI_script_template any more as doing our own uploads to Dirac # remove after Ganga6 release # NOTE special case for replicas: replicate string must be empty for no # replication dirac_script = script_generator( lhcbdirac_script_template, DIRAC_IMPORT="from LHCbDIRAC.Interfaces.API.DiracLHCb import DiracLHCb", DIRAC_JOB_IMPORT="from LHCbDIRAC.Interfaces.API.LHCbJob import LHCbJob", DIRAC_OBJECT="DiracLHCb()", JOB_OBJECT="LHCbJob()", NAME=mangle_job_name(app), APP_NAME=app.appname, APP_VERSION=app.version, APP_SCRIPT=gaudi_script_path, APP_LOG_FILE="Ganga_%s_%s.log" % (app.appname, app.version), INPUTDATA=input_data, PARAMETRIC_INPUTDATA=parametricinput_data, OUTPUT_SANDBOX=API_nullifier(outputsandbox), OUTPUTFILESSCRIPT=lhcb_dirac_outputfiles, # job.fqid,#outputdata_path, OUTPUT_PATH="", OUTPUT_SE=getConfig("DIRAC")["DiracOutputDataSE"], SETTINGS=diracAPI_script_settings(job.application), DIRAC_OPTS=job.backend.diracOpts, PLATFORM=app.platform, REPLICATE="True" if getConfig("DIRAC")["ReplicateOutputData"] else "", ANCESTOR_DEPTH=ancestor_depth, ## This is to be modified in the final 'submit' function in the backend ## The backend also handles the inputfiles DiracFiles ass appropriate INPUT_SANDBOX="##INPUT_SANDBOX##", ) logger.debug("prepare: LHCbGaudiDiracRunTimeHandler") return StandardJobConfig(dirac_script, inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) input_data, parametricinput_data = dirac_inputdata(app) logger.debug("input_data: " + str(input_data)) job = app.getJobObject() outputfiles = [this_file for this_file in job.outputfiles if isType(this_file, DiracFile)] lhcb_dirac_outputfiles = lhcbdirac_outputfile_jdl(outputfiles) # NOTE special case for replicas: replicate string must be empty for no # replication params = {'DIRAC_IMPORT': 'from LHCbDIRAC.Interfaces.API.DiracLHCb import DiracLHCb', 'DIRAC_JOB_IMPORT': 'from LHCbDIRAC.Interfaces.API.LHCbJob import LHCbJob', 'DIRAC_OBJECT': 'DiracLHCb()', 'JOB_OBJECT': 'LHCbJob()', 'NAME': mangle_job_name(app), 'INPUTDATA': input_data, 'PARAMETRIC_INPUTDATA': parametricinput_data, 'OUTPUT_SANDBOX': API_nullifier(outputsandbox), 'OUTPUTFILESSCRIPT' : lhcb_dirac_outputfiles, 'OUTPUT_PATH': "", # job.fqid, 'OUTPUT_SE': getConfig('DIRAC')['DiracOutputDataSE'], 'SETTINGS': diracAPI_script_settings(app), 'DIRAC_OPTS': job.backend.diracOpts, 'PLATFORM': getConfig('ROOT')['arch'], 'REPLICATE': 'True' if getConfig('DIRAC')['ReplicateOutputData'] else '', # leave the sandbox for altering later as needs # to be done in backend.submit to combine master. # Note only using 2 #s as auto-remove 3 'INPUT_SANDBOX': '##INPUT_SANDBOX##' } scriptpath = os.path.join(get_share_path(app), os.path.basename(app.script.name)) wrapper_path = os.path.join(job.getInputWorkspace(create=True).getPath(), 'script_wrapper.py') python_wrapper =\ """#!/usr/bin/env python import os, sys def formatVar(var): try: float(var) return str(var) except ValueError as v: return '\\\"%s\\\"' % str(var) script_args = '###SCRIPT_ARGS###' del sys.argv[sys.argv.index('script_wrapper.py')] ###FIXARGS### if script_args == []: script_args = '' os.system('###COMMAND###' % script_args) ###INJECTEDCODE### """ python_wrapper = python_wrapper.replace('###SCRIPT_ARGS###', str('###JOINER###'.join([str(a) for a in app.args]))) params.update({ 'APP_NAME' : 'Root', 'APP_VERSION' : app.version, 'APP_SCRIPT' : wrapper_path, 'APP_LOG_FILE' : 'Ganga_Root.log' }) #params.update({'ROOTPY_SCRIPT': wrapper_path, # 'ROOTPY_VERSION': app.version, # 'ROOTPY_LOG_FILE': 'Ganga_Root.log', # 'ROOTPY_ARGS': [str(a) for a in app.args]}) f = open(wrapper_path, 'w') if app.usepython: python_wrapper = script_generator(python_wrapper, remove_unreplaced=False, FIXARGS='', COMMAND='/usr/bin/env python %s %s' % (os.path.basename(app.script.name), '%s'), JOINER=' ', #INJECTEDCODE = getWNCodeForOutputPostprocessing(job,'') ) else: python_wrapper = script_generator(python_wrapper, remove_unreplaced=False, FIXARGS='script_args=[formatVar(v) for v in script_args]', COMMAND='export DISPLAY=\"localhoast:0.0\" && root -l -q \"%s(%s)\"' % (os.path.basename(app.script.name), '%s'), JOINER=',', #INJECTEDCODE = getWNCodeForOutputPostprocessing(job,'') ) f.write(python_wrapper) f.close() dirac_script = script_generator(lhcbdiracAPI_script_template(), **params) return StandardJobConfig(dirac_script, inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) input_data, parametricinput_data = dirac_inputdata(app) logger.debug("input_data: " + str(input_data)) job = app.getJobObject() outputfiles = [ this_file for this_file in job.outputfiles if isType(this_file, DiracFile) ] lhcb_dirac_outputfiles = lhcbdirac_outputfile_jdl(outputfiles) # NOTE special case for replicas: replicate string must be empty for no # replication params = { 'DIRAC_IMPORT': 'from LHCbDIRAC.Interfaces.API.DiracLHCb import DiracLHCb', 'DIRAC_JOB_IMPORT': 'from LHCbDIRAC.Interfaces.API.LHCbJob import LHCbJob', 'DIRAC_OBJECT': 'DiracLHCb()', 'JOB_OBJECT': 'LHCbJob()', 'NAME': mangle_job_name(app), 'INPUTDATA': input_data, 'PARAMETRIC_INPUTDATA': parametricinput_data, 'OUTPUT_SANDBOX': API_nullifier(outputsandbox), 'OUTPUTFILESSCRIPT': lhcb_dirac_outputfiles, 'OUTPUT_PATH': "", # job.fqid, 'SETTINGS': diracAPI_script_settings(app), 'DIRAC_OPTS': job.backend.diracOpts, 'PLATFORM': getConfig('ROOT')['arch'], 'REPLICATE': 'True' if getConfig('DIRAC')['ReplicateOutputData'] else '', # leave the sandbox for altering later as needs # to be done in backend.submit to combine master. # Note only using 2 #s as auto-remove 3 'INPUT_SANDBOX': '##INPUT_SANDBOX##' } scriptpath = os.path.join(get_share_path(app), os.path.basename(app.script.name)) wrapper_path = os.path.join( job.getInputWorkspace(create=True).getPath(), 'script_wrapper.py') python_wrapper =\ """#!/usr/bin/env python import os, sys def formatVar(var): try: float(var) return str(var) except ValueError as v: return '\\\"%s\\\"' % str(var) script_args = '###SCRIPT_ARGS###' del sys.argv[sys.argv.index('script_wrapper.py')] ###FIXARGS### if script_args == []: script_args = '' os.system('###COMMAND###' % script_args) ###INJECTEDCODE### """ python_wrapper = python_wrapper.replace( '###SCRIPT_ARGS###', str('###JOINER###'.join([str(a) for a in app.args]))) params.update({ 'APP_NAME': 'Root', 'APP_VERSION': app.version, 'APP_SCRIPT': wrapper_path, 'APP_LOG_FILE': 'Ganga_Root.log' }) #params.update({'ROOTPY_SCRIPT': wrapper_path, # 'ROOTPY_VERSION': app.version, # 'ROOTPY_LOG_FILE': 'Ganga_Root.log', # 'ROOTPY_ARGS': [str(a) for a in app.args]}) f = open(wrapper_path, 'w') if app.usepython: python_wrapper = script_generator( python_wrapper, remove_unreplaced=False, FIXARGS='', COMMAND='/usr/bin/env python %s %s' % (os.path.basename(app.script.name), '%s'), JOINER=' ', #INJECTEDCODE = getWNCodeForOutputPostprocessing(job,'') ) else: python_wrapper = script_generator( python_wrapper, remove_unreplaced=False, FIXARGS='script_args=[formatVar(v) for v in script_args]', COMMAND= 'export DISPLAY=\"localhoast:0.0\" && root -l -q \"%s(%s)\"' % (os.path.basename(app.script.name), '%s'), JOINER=',', #INJECTEDCODE = getWNCodeForOutputPostprocessing(job,'') ) f.write(python_wrapper) f.close() dirac_script = script_generator(lhcbdiracAPI_script_template(), **params) return StandardJobConfig(dirac_script, inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): logger.debug("Prepare") inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) job = app.getJobObject() logger.debug("Loading pickle files") #outputfiles=set([file.namePattern for file in job.outputfiles]).difference(set(getOutputSandboxPatterns(job))) # Cant wait to get rid of this when people no-longer specify # inputdata in options file ####################################################################### # splitters ensure that subjobs pick up inputdata from job over that in # optsfiles but need to take sare of unsplit jobs if not job.master: share_path = os.path.join(get_share_path(app), 'inputdata', 'options_data.pkl') if not job.inputdata: if os.path.exists(share_path): f = open(share_path, 'r+b') job.inputdata = pickle.load(f) f.close() ####################################################################### # Cant wait to get rid of this when people no-longer specify # outputsandbox or outputdata in options file ####################################################################### share_path = os.path.join(get_share_path(app), 'output', 'options_parser.pkl') logger.debug("Adding info from pickle files") if os.path.exists(share_path): f = open(share_path, 'r+b') parser = pickle.load(f) f.close() outbox, outdata = parser.get_output(job) from Ganga.GPIDev.Lib.File import FileUtils from Ganga.GPIDev.Base.Filters import allComponentFilters fileTransform = allComponentFilters['gangafiles'] job.non_copyable_outputfiles.extend([fileTransform(this_file, None) for this_file in outdata if not FileUtils.doesFileExist(this_file, job.outputfiles)]) job.non_copyable_outputfiles.extend([fileTransform(this_file, None) for this_file in outbox if not FileUtils.doesFileExist(this_file, job.outputfiles)]) outputsandbox = [f.namePattern for f in job.non_copyable_outputfiles] outputsandbox.extend([f.namePattern for f in job.outputfiles]) outputsandbox = unique(outputsandbox) ####################################################################### logger.debug("Doing XML Catalog stuff") data = job.inputdata data_str = '' if data: logger.debug("Returning options String") data_str = data.optionsString() if data.hasLFNs(): logger.debug("Returning Catalogue") inputsandbox.append( FileBuffer('catalog.xml', data.getCatalog())) cat_opts = '\nfrom Gaudi.Configuration import FileCatalog\nFileCatalog().Catalogs = ["xmlcatalog_file:catalog.xml"]\n' data_str += cat_opts logger.debug("Doing splitter_data stuff") if hasattr(job, '_splitter_data'): data_str += job._splitter_data inputsandbox.append(FileBuffer('data.py', data_str)) logger.debug("Doing GaudiPython stuff") cmd = 'python ./gaudipython-wrapper.py' opts = '' if is_gaudi_child(job.application): opts = 'options.pkl' cmd = 'gaudirun.py ' + \ ' '.join(job.application.args) + ' %s data.py' % opts logger.debug("Setting up script") script = script_generator(create_runscript(job.application.newStyleApp), remove_unreplaced=False, OPTS=opts, PROJECT_OPTS=job.application.setupProjectOptions, APP_NAME=job.application.appname, APP_VERSION=job.application.version, APP_PACKAGE=job.application.package, PLATFORM=job.application.platform, CMDLINE=cmd, XMLSUMMARYPARSING=getXMLSummaryScript()) # , # OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, '')) logger.debug("Returning StandardJobConfig") return StandardJobConfig(FileBuffer('gaudi-script.py', script, executable=1), inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): logger.debug("Prepare") inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) job = app.getJobObject() logger.debug("Loading pickle files") #outputfiles=set([file.namePattern for file in job.outputfiles]).difference(set(getOutputSandboxPatterns(job))) # Cant wait to get rid of this when people no-longer specify # inputdata in options file ####################################################################### # splitters ensure that subjobs pick up inputdata from job over that in # optsfiles but need to take sare of unsplit jobs if not job.master: share_path = os.path.join(get_share_path(app), 'inputdata', 'options_data.pkl') if not job.inputdata: if os.path.exists(share_path): f = open(share_path, 'r+b') job.inputdata = pickle.load(f) f.close() ####################################################################### # Cant wait to get rid of this when people no-longer specify # outputsandbox or outputdata in options file ####################################################################### share_path = os.path.join(get_share_path(app), 'output', 'options_parser.pkl') logger.debug("Adding info from pickle files") if os.path.exists(share_path): f = open(share_path, 'r+b') parser = pickle.load(f) f.close() outbox, outdata = parser.get_output(job) from Ganga.GPIDev.Lib.File import FileUtils from Ganga.GPIDev.Base.Filters import allComponentFilters fileTransform = allComponentFilters['gangafiles'] job.non_copyable_outputfiles.extend([ fileTransform(this_file, None) for this_file in outdata if not FileUtils.doesFileExist(this_file, job.outputfiles) ]) job.non_copyable_outputfiles.extend([ fileTransform(this_file, None) for this_file in outbox if not FileUtils.doesFileExist(this_file, job.outputfiles) ]) outputsandbox.extend( [f.namePattern for f in job.non_copyable_outputfiles]) outputsandbox.extend([f.namePattern for f in job.outputfiles]) outputsandbox = unique(outputsandbox) ####################################################################### logger.debug("Doing XML Catalog stuff") data = job.inputdata data_str = '' if data: logger.debug("Returning options String") data_str = data.optionsString() if data.hasLFNs(): logger.debug("Returning Catalogue") inputsandbox.append( FileBuffer('catalog.xml', data.getCatalog())) cat_opts = '\nfrom Gaudi.Configuration import FileCatalog\nFileCatalog().Catalogs = ["xmlcatalog_file:catalog.xml"]\n' data_str += cat_opts logger.debug("Doing splitter_data stuff") if hasattr(job, '_splitter_data'): data_str += job._splitter_data inputsandbox.append(FileBuffer('data.py', data_str)) logger.debug("Doing GaudiPython stuff") cmd = 'python ./gaudipython-wrapper.py' opts = '' if is_gaudi_child(job.application): opts = 'options.pkl' cmd = 'gaudirun.py ' + \ ' '.join(job.application.args) + ' %s data.py' % opts logger.debug("Setting up script") script = script_generator( create_runscript(job.application.newStyleApp), remove_unreplaced=False, OPTS=opts, PROJECT_OPTS=job.application.setupProjectOptions, APP_NAME=job.application.appname, APP_VERSION=job.application.version, APP_PACKAGE=job.application.package, PLATFORM=job.application.platform, CMDLINE=cmd, XMLSUMMARYPARSING=getXMLSummaryScript()) # , # OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, '')) logger.debug("Returning StandardJobConfig") return StandardJobConfig(FileBuffer('gaudi-script.py', script, executable=1), inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): inputsandbox, outputsandbox = sandbox_prepare( app, appsubconfig, appmasterconfig, jobmasterconfig) input_data, parametricinput_data = dirac_inputdata(app) # outputdata, outputdata_path = dirac_ouputdata(app) job = app.getJobObject() outputfiles = [this_file for this_file in job.outputfiles if isType(this_file, DiracFile)] commandline = app.exe if type(app.exe) == File: inputsandbox.append(File(name=os.path.join(get_share_path(app), os.path.basename(app.exe.name)))) commandline = os.path.basename(app.exe.name) commandline += ' ' commandline += ' '.join([str(arg) for arg in app.args]) logger.debug('Command line: %s: ', commandline) #exe_script_path = os.path.join(job.getInputWorkspace().getPath(), "exe-script.py") exe_script_name = 'exe-script.py' inputsandbox.append(FileBuffer(name=exe_script_name, contents=script_generator(exe_script_template(), #remove_unreplaced = False, # , COMMAND=commandline #OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, ' ') ), executable=True)) dirac_outputfiles = dirac_outputfile_jdl(outputfiles) # NOTE special case for replicas: replicate string must be empty for no # replication dirac_script = script_generator(diracAPI_script_template(), DIRAC_IMPORT='from DIRAC.Interfaces.API.Dirac import Dirac', DIRAC_JOB_IMPORT='from DIRAC.Interfaces.API.Job import Job', DIRAC_OBJECT='Dirac()', JOB_OBJECT='Job()', NAME=mangle_job_name(app), # os.path.basename(exe_script_path), EXE=exe_script_name, # ' '.join([str(arg) for arg in app.args]), EXE_ARG_STR='', EXE_LOG_FILE='Ganga_Executable.log', ENVIRONMENT=None, # app.env, INPUTDATA=input_data, PARAMETRIC_INPUTDATA=parametricinput_data, OUTPUT_SANDBOX=API_nullifier(outputsandbox), OUTPUTFILESSCRIPT=dirac_outputfiles, OUTPUT_PATH="", # job.fqid, OUTPUT_SE=getConfig('DIRAC')['DiracOutputDataSE'], SETTINGS=diracAPI_script_settings(app), DIRAC_OPTS=job.backend.diracOpts, REPLICATE='True' if getConfig('DIRAC')['ReplicateOutputData'] else '', # leave the sandbox for altering later as needs # to be done in backend.submit to combine master. # Note only using 2 #s as auto-remove 3 INPUT_SANDBOX='##INPUT_SANDBOX##' ) return StandardJobConfig(dirac_script, inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): logger.debug("Prepare") inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) job = stripProxy(app).getJobObject() if job.inputdata: if not job.splitter: if len(job.inputdata) > 100: raise BackendError( "You're submitting a job to Dirac with no splitter and more than 100 files, please add a splitter and try again!" ) outputfiles = [ this_file for this_file in job.outputfiles if isType(this_file, DiracFile) ] data_str = 'import os\n' data_str += 'execfile(\'data.py\')\n' if hasattr(job, '_splitter_data'): data_str += job._splitter_data inputsandbox.append(FileBuffer('data-wrapper.py', data_str)) input_data = [] # Cant wait to get rid of this when people no-longer specify # inputdata in options file ####################################################################### # splitters ensure that subjobs pick up inputdata from job over that in # optsfiles but need to take care of unsplit jobs if not job.master: share_path = os.path.join(get_share_path(app), 'inputdata', 'options_data.pkl') if not job.inputdata: if os.path.exists(share_path): f = open(share_path, 'r+b') job.inputdata = pickle.load(f) f.close() ####################################################################### # Cant wait to get rid of this when people no-longer specify # outputsandbox or outputdata in options file ####################################################################### share_path = os.path.join(get_share_path(app), 'output', 'options_parser.pkl') if os.path.exists(share_path): # if not os.path.exists(share_path): # raise GangaException('could not find the parser') f = open(share_path, 'r+b') parser = pickle.load(f) f.close() outbox, outdata = parser.get_output(job) from Ganga.GPIDev.Lib.File import FileUtils from Ganga.GPIDev.Base.Filters import allComponentFilters fileTransform = allComponentFilters['gangafiles'] outdata_files = [ fileTransform(this_file, None) for this_file in outdata if not FileUtils.doesFileExist(this_file, job.outputfiles) ] job.non_copyable_outputfiles.extend([ output_file for output_file in outdata_files if not isType(output_file, DiracFile) ]) outbox_files = [ fileTransform(this_file, None) for this_file in outbox if not FileUtils.doesFileExist(this_file, job.outputfiles) ] job.non_copyable_outputfiles.extend([ outbox_file for outbox_file in outbox_files if not isType(outbox_file, DiracFile) ]) outputsandbox = [ f.namePattern for f in job.non_copyable_outputfiles ] outputsandbox.extend([ f.namePattern for f in job.outputfiles if not isType(f, DiracFile) ]) outputsandbox = unique(outputsandbox) # + outbox[:]) ####################################################################### input_data_dirac, parametricinput_data = dirac_inputdata( job.application) if input_data_dirac is not None: for f in input_data_dirac: if isType(f, DiracFile): input_data.append(f.lfn) elif isType(f, str): input_data.append(f) else: raise ApplicationConfigurationError( "Don't know How to handle anythig other than DiracFiles or strings to LFNs!" ) commandline = "python ./gaudipython-wrapper.py" if is_gaudi_child(app): commandline = 'gaudirun.py ' commandline += ' '.join([str(arg) for arg in app.args]) commandline += ' options.pkl data-wrapper.py' logger.debug('Command line: %s: ', commandline) gaudi_script_path = os.path.join(job.getInputWorkspace().getPath(), "gaudi-script.py") script_generator( gaudi_script_template(), #remove_unreplaced = False, outputfile_path=gaudi_script_path, PLATFORM=app.platform, COMMAND=commandline, XMLSUMMARYPARSING=getXMLSummaryScript() # , #OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, ' ') ) #logger.debug( "input_data %s" % str( input_data ) ) # We want to propogate the ancestor depth to DIRAC when we have # inputdata set if job.inputdata is not None and isType(job.inputdata, LHCbDataset): # As the RT Handler we already know we have a Dirac backend if type(job.backend.settings) is not dict: raise ApplicationConfigurationError( None, 'backend.settings should be a dict') if 'AncestorDepth' in job.backend.settings: ancestor_depth = job.backend.settings['AncestorDepth'] else: ancestor_depth = job.inputdata.depth else: ancestor_depth = 0 lhcbdirac_script_template = lhcbdiracAPI_script_template() lhcb_dirac_outputfiles = lhcbdirac_outputfile_jdl(outputfiles) # not necessary to use lhcbdiracAPI_script_template any more as doing our own uploads to Dirac # remove after Ganga6 release # NOTE special case for replicas: replicate string must be empty for no # replication dirac_script = script_generator( lhcbdirac_script_template, DIRAC_IMPORT= 'from LHCbDIRAC.Interfaces.API.DiracLHCb import DiracLHCb', DIRAC_JOB_IMPORT= 'from LHCbDIRAC.Interfaces.API.LHCbJob import LHCbJob', DIRAC_OBJECT='DiracLHCb()', JOB_OBJECT='LHCbJob()', NAME=mangle_job_name(app), APP_NAME=stripProxy(app).appname, APP_VERSION=app.version, APP_SCRIPT=gaudi_script_path, APP_LOG_FILE='Ganga_%s_%s.log' % (stripProxy(app).appname, app.version), INPUTDATA=input_data, PARAMETRIC_INPUTDATA=parametricinput_data, OUTPUT_SANDBOX=API_nullifier(outputsandbox), OUTPUTFILESSCRIPT=lhcb_dirac_outputfiles, # job.fqid,#outputdata_path, OUTPUT_PATH="", OUTPUT_SE=getConfig('DIRAC')['DiracOutputDataSE'], SETTINGS=diracAPI_script_settings(job.application), DIRAC_OPTS=job.backend.diracOpts, PLATFORM=app.platform, REPLICATE='True' if getConfig('DIRAC')['ReplicateOutputData'] else '', ANCESTOR_DEPTH=ancestor_depth, ## This is to be modified in the final 'submit' function in the backend ## The backend also handles the inputfiles DiracFiles ass appropriate INPUT_SANDBOX='##INPUT_SANDBOX##') logger.debug("prepare: LHCbGaudiDiracRunTimeHandler") return StandardJobConfig(dirac_script, inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def test_master_prepare(self): # setup failing app jobs g = tempfile.NamedTemporaryFile(mode='w') j_fail = Job(application=Executable(exe=File(g.name))) j_fail.prepare() g.close() # file now gone os.remove(os.path.join( get_share_path(j_fail.application._impl), os.path.split(g.name)[1])) ## j_prep_fail = Job(application=Executable(exe='ls')) # Start the testing for each app type for app in (j.application._impl for j in self.job_list): # check its a known app type self.assertTrue(isinstance(app.exe, str) or isinstance( app.exe, File._impl), 'Unknown app.exe type! %s' % type(app.exe)) # run the method we are testing jobconfig = self._rthandler.master_prepare( app, self.appmasterconfig) # check the return value is of the right type self.assertTrue(isinstance(jobconfig, StandardJobConfig), 'Expected a StandardJobConfig object returned. Instead got %s' % repr(jobconfig)) # create sets from the text string file names from the inputbox and # outputbox ipb = set(f.name for f in jobconfig.inputbox) opb = set(jobconfig.outputbox) # check that inputbox and outputbox contain only unique elements self.assertEqual(len(ipb), len( jobconfig.inputbox), 'Returned inputsandbox did not contain only unique elements') self.assertEqual(len(opb), len( jobconfig.outputbox), 'Returned outputsandbox did not contain only unique elements') # find the difference between the in/outputbox and those from the # defined job in/outputsandbox and appconfig_in/outputbox idiff = ipb.symmetric_difference( set([f.name for f in self.inputsandbox] + [f.name for f in self.appconfig_inputbox])) # added __postprocesslocations__ odiff = opb.symmetric_difference( set(self.outputsandbox + self.appconfig_outputbox + ['__postprocesslocations__'])) # expect that things placed in the sharedir on preparation will # feature in idiff so check and remove them for root, dirs, files in os.walk(get_share_path(app)): if files: qualified_files = set( [os.path.join(root, f) for f in files]) self.assertTrue(qualified_files.issubset( idiff), 'Could not find the following prepared file(s) in jobconfig.inputbox: %s' % repr(qualified_files.difference(idiff))) # once checked that they exist in the idiff then remove # them for ultimate check next idiff.difference_update(qualified_files) # check that no extra files, i.e. those not from the # job.in/outputsandbox or appconfig_in/outputbox or sharedir are # present self.assertEqual(idiff, set( ), 'jobconfig.inputbox != job.inputsandbox + appconfig.inputbox + prepared_sharedir_files: sym_diff = %s' % idiff) self.assertEqual(odiff, set( ), 'jobconfig.outputbox != job.outputsandbox + appconfig.outputbox: sym_diff = %s' % odiff) # check that the proper exception is raised in case of the exe file not # existing self.assertRaises(ApplicationConfigurationError, self._rthandler.master_prepare, j_fail.application._impl, self.appmasterconfig, msg="Checking that Exception raised if file doesn't exist") # check that the proper exception is raised in case of the app not # being prepared. self.assertRaises(GangaException, self._rthandler.master_prepare, j_prep_fail.application._impl, self.appmasterconfig, msg="Checking exception raised if app not prepared")
def test_prepare(self): appsubconfig = StandardJobConfig(inputbox=[File('file1.txt')._impl, File('file2.txt')._impl], outputbox=['file3.txt', 'file4.txt']) jobmasterconfig = StandardJobConfig(inputbox=[File('file5.txt')._impl, File('file6.txt')._impl], outputbox=['file7.txt', 'file8.txt']) # Start the testing for each app type for app in (j.application._impl for j in self.job_list): jobsubconfig = self._rthandler.prepare( app, appsubconfig, self.appmasterconfig, jobmasterconfig) # create sets from the text string file names from the inputbox and # outputbox ipb = set(f.name for f in jobsubconfig.inputbox) opb = set(jobsubconfig.outputbox) # check that inputbox and outputbox contain only unique elements self.assertEqual(len(ipb), len( jobsubconfig.inputbox), 'Returned inputsandbox did not contain only unique elements') self.assertEqual(len(opb), len( jobsubconfig.outputbox), 'Returned outputsandbox did not contain only unique elements') # find the difference between the in/outputbox and those from the # defined job in/outputsandbox and appconfig_in/outputbox idiff = ipb.symmetric_difference( set([f.name for f in appsubconfig.inputbox] + ['exe-script.py'])) odiff = opb.symmetric_difference( set(appsubconfig.outputbox + jobmasterconfig.outputbox)) if isinstance(app.exe, File._impl): fname = os.path.join(get_share_path(app), self.tmp_filename) self.assertTrue( fname in idiff, "Couldn't find the exe file in inputsandbox") # once checked that they exist in the idiff then remove them # for ultimate check next idiff.remove(fname) # check that no extra files, i.e. those not from the # job.in/outputsandbox or appconfig_in/outputbox or sharedir are # present self.assertEqual(idiff, set( ), 'jobsubconfig.inputbox != appsubconfig.inputbox + exe-script.py + exe file: sym_diff = %s' % idiff) self.assertEqual(odiff, set( ), 'jobsubconfig.outputbox != appsubconfig.outputbox + jobmasterconfig.outputbox: sym_diff = %s' % odiff) script = \ """# dirac job created by ganga from DIRAC.Core.Base.Script import parseCommandLine parseCommandLine() from DIRAC.Interfaces.API.Dirac import Dirac from DIRAC.Interfaces.API.Job import Job dirac = Dirac() j = Job() # default commands added by ganga j.setName('{Ganga_Executable_(###JOB_ID###)}') j.setExecutable('exe-script.py','','Ganga_Executable.log') j.setInputSandbox(##INPUT_SANDBOX##) j.setOutputSandbox(['file4.txt', 'file3.txt', 'file8.txt', 'file7.txt']) # <-- user settings j.setCPUTime(172800) # user settings --> # diracOpts added by user # submit the job to dirac result = dirac.submit(j) output(result)""" self.assertEqual(jobsubconfig.exe, script.replace( '###JOB_ID###', app._getParent().fqid), 'Dirac API script does not match, see diff below:\n' + '\n'.join(difflib.unified_diff(jobsubconfig.exe.splitlines(), script.replace( '###JOB_ID###', app._getParent().fqid).splitlines(), fromfile='Coming from prepare method', tofile='What the test expected')))