def master_prepare(self, app, appmasterconfig): """ Prepare the RTHandler for the master job so that applications to be submitted Args: app (GaudiExec): This application is only expected to handle GaudiExec Applications here appmasterconfig (unknown): Output passed from the application master configuration call """ inputsandbox, outputsandbox = master_sandbox_prepare(app, appmasterconfig) if not isinstance(app.uploadedInput, DiracFile): generateDiracInput(app) assert isinstance(app.uploadedInput, DiracFile), "Failed to upload needed file, aborting submit" rep_data = app.uploadedInput.getReplicas() assert rep_data != {}, "Failed to find a replica, aborting submit" if isinstance(app.jobScriptArchive, (DiracFile, LocalFile)): app.jobScriptArchive = None generateDiracScripts(app) assert isinstance(app.jobScriptArchive, DiracFile), "Failed to upload needed file, aborting submit" rep_data = app.jobScriptArchive.getReplicas() assert rep_data != {}, "Failed to find a replica, aborting submit" return StandardJobConfig(inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def master_prepare(self, app, appmasterconfig): """ Prepare the RTHandler for the master job so that applications to be submitted Args: app (GaudiExec): This application is only expected to handle GaudiExec Applications here appmasterconfig (unknown): Output passed from the application master configuration call """ inputsandbox, outputsandbox = master_sandbox_prepare(app, appmasterconfig) if not isinstance(app.uploadedInput, DiracFile): generateDiracInput(app) assert isinstance(app.uploadedInput, DiracFile), "Failed to upload needed file, aborting submit. Tried to upload to: %s\nIf your Ganga installation is not at CERN your username may be trying to create a non-existent LFN. Try setting the 'DIRAC' configuration 'DiracLFNBase' to your grid user path.\n" % DiracFile.diracLFNBase() rep_data = app.uploadedInput.getReplicas() assert rep_data != {}, "Failed to find a replica, aborting submit" if isinstance(app.jobScriptArchive, (DiracFile, LocalFile)): app.jobScriptArchive = None generateDiracScripts(app) assert isinstance(app.jobScriptArchive, DiracFile), "Failed to upload needed file, aborting submit" rep_data = app.jobScriptArchive.getReplicas() assert rep_data != {}, "Failed to find a replica, aborting submit" return StandardJobConfig(inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def configure(self, master_appconfig): ## strip leading and trailing blanks from arguments self.arguments = [ a.strip() for a in self.arguments ] ## strip leading and trailing blanks from the command self.commands = [ a.strip() for a in self.commands ] ## the script layout the_script = layout.format ( scripts = [ os.path.join ( f.subdir , os.path.basename ( f.name ) ) for f in self.scripts ] , arguments = self.arguments , command = self.commands ) # add summary.xml outputsandbox_temp = XMLPostProcessor._XMLJobFiles() outputsandbox_temp += unique(self.getJobObject().outputsandbox) outputsandbox = unique(outputsandbox_temp) input_files = [] input_files += [ FileBuffer('gaudipython-wrapper.py', the_script ) ] logger.debug("Returning StandardJobConfig") return (None, StandardJobConfig(inputbox=input_files, outputbox=outputsandbox))
def sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig): logger.debug("RTUTils sandbox_prepare") job = stripProxy(app).getJobObject() # Add the job.in/outputsandbox as splitters create subjobs that are # seperate Job objects and therefore have their own job.in/outputsandbox # which is NOT in general copied from the master in/outputsandbox # inputsandbox = job.inputsandbox[:] # copied in splitter #outputsandbox = job.outputsandbox[:] inputsandbox = [] outputsandbox = [] # Here add any sandbox files coming from the appsubconfig # currently none. masterjobconfig inputsandbox added automatically if appsubconfig: inputsandbox += appsubconfig.getSandboxFiles() # Strangly NEITHER the master outputsandbox OR job.outputsandbox # are added automatically. if jobmasterconfig: outputsandbox += jobmasterconfig.getOutputSandboxFiles() if appsubconfig: outputsandbox += appsubconfig.getOutputSandboxFiles() return unique(inputsandbox), unique(outputsandbox)
def master_prepare(self, app, appmasterconfig): """ Prepare the RTHandler for the master job so that applications to be submitted Args: app (GaudiExec): This application is only expected to handle GaudiExec Applications here appmasterconfig (unknown): Output passed from the application master configuration call """ inputsandbox, outputsandbox = master_sandbox_prepare( app, appmasterconfig) if isinstance(app.jobScriptArchive, LocalFile): app.jobScriptArchive = None generateJobScripts(app, appendJobScripts=True) scriptArchive = os.path.join(app.jobScriptArchive.localDir, app.jobScriptArchive.namePattern) inputsandbox.append(File(name=scriptArchive)) if app.getMetadata: logger.info("Adding options to make the summary.xml") inputsandbox.append( FileBuffer( 'summary.py', "\nfrom Gaudi.Configuration import *\nfrom Configurables import LHCbApp\nLHCbApp().XMLSummary='summary.xml'" )) return StandardJobConfig(inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def configure(self, master_appconfig): # self._configure() modulename = split(self.module.name)[-1].split('.')[0] script = """ from copy import deepcopy from Gaudi.Configuration import * importOptions('data.py') import %s as USERMODULE EventSelectorInput = deepcopy(EventSelector().Input) FileCatalogCatalogs = deepcopy(FileCatalog().Catalogs) EventSelector().Input=[] FileCatalog().Catalogs=[]\n""" % modulename script_configure = "USERMODULE.configure(EventSelectorInput,FileCatalogCatalogs%s)\n" if self.params: param_string = ",params=%s" % self.params else: param_string = "" script_configure = script_configure % param_string script += script_configure script += "USERMODULE.run(%d)\n" % self.events script += getXMLSummaryScript() # add summary.xml outputsandbox_temp = XMLPostProcessor._XMLJobFiles() outputsandbox_temp += unique(self.getJobObject().outputsandbox) outputsandbox = unique(outputsandbox_temp) input_files = [] input_files += [FileBuffer('gaudipython-wrapper.py', script)] logger.debug("Returning StandardJobConfig") return (None, StandardJobConfig(inputbox=input_files, outputbox=outputsandbox))
def master_prepare(self, app, appmasterconfig): """ Prepare the RTHandler for the master job so that applications to be submitted Args: app (GaudiExec): This application is only expected to handle GaudiExec Applications here appmasterconfig (unknown): Output passed from the application master configuration call """ inputsandbox, outputsandbox = master_sandbox_prepare(app, appmasterconfig) # If we are getting the metadata we need to make sure the summary.xml is added to the output sandbox if not there already. if app.getMetadata and not 'summary.xml' in outputsandbox: outputsandbox += ['summary.xml'] if not isinstance(app.uploadedInput, DiracFile): generateDiracInput(app) assert isinstance(app.uploadedInput, DiracFile), "Failed to upload needed file, aborting submit. Tried to upload to: %s\nIf your Ganga installation is not at CERN your username may be trying to create a non-existent LFN. Try setting the 'DIRAC' configuration 'DiracLFNBase' to your grid user path.\n" % DiracFile.diracLFNBase() rep_data = app.uploadedInput.getReplicas() assert rep_data != {}, "Failed to find a replica, aborting submit" if isinstance(app.jobScriptArchive, (DiracFile, LocalFile)): app.jobScriptArchive = None generateDiracScripts(app) assert isinstance(app.jobScriptArchive, DiracFile), "Failed to upload needed file, aborting submit" rep_data = app.jobScriptArchive.getReplicas() assert rep_data != {}, "Failed to find a replica, aborting submit" return StandardJobConfig(inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig): logger.debug("RTUTils sandbox_prepare") job = app.getJobObject() # Add the job.in/outputsandbox as splitters create subjobs that are # seperate Job objects and therefore have their own job.in/outputsandbox # which is NOT in general copied from the master in/outputsandbox # inputsandbox = job.inputsandbox[:] # copied in splitter # outputsandbox = job.outputsandbox[:] inputsandbox = [] outputsandbox = [] # Here add any sandbox files coming from the appsubconfig # currently none. masterjobconfig inputsandbox added automatically if appsubconfig: inputsandbox += appsubconfig.getSandboxFiles() # Strangly NEITHER the master outputsandbox OR job.outputsandbox # are added automatically. if jobmasterconfig: outputsandbox += jobmasterconfig.getOutputSandboxFiles() if appsubconfig: outputsandbox += appsubconfig.getOutputSandboxFiles() return unique(inputsandbox), unique(outputsandbox)
def configure(self, master_appconfig): ## strip leading and trailing blanks from arguments self.arguments = [ a.strip() for a in self.arguments ] ## strip leading and trailing blanks from the command self.commands = [ a.strip() for a in self.commands ] ## the script layout the_script = layout.format ( scripts = [ os.path.join ( f.subdir , os.path.basename ( f.name ) ) for f in self.scripts ] , imports = [ os.path.join ( f.subdir , os.path.basename ( f.name ) ) for f in self.imports ] , arguments = self.arguments , command = self.commands ) print 'SCRIPT:\n', the_script # add summary.xml outputsandbox_temp = XMLPostProcessor._XMLJobFiles() outputsandbox_temp += unique(self.getJobObject().outputsandbox) outputsandbox = unique(outputsandbox_temp) input_files = [] input_files += [ FileBuffer('gaudipython-wrapper.py', the_script ) ] logger.debug("Returning StandardJobConfig") return (None, StandardJobConfig(inputbox=input_files, outputbox=outputsandbox))
def master_prepare(self, app, appmasterconfig): inputsandbox, outputsandbox = master_sandbox_prepare(app, appmasterconfig, ['inputsandbox']) # add summary.xml outputsandbox += ['summary.xml', '__parsedxmlsummary__'] return StandardJobConfig(inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def master_prepare(self, app, appmasterconfig): """ This function prepares the application of a master job during submit. A priori we aren't doing anything with this in Im3ShapeApp but until this is understood I'd rather not remove it Args: app (IApplication): This is the application given in the master job appasterconfig (tuple): This is the configuration which is to prepare the app in the master job # TODO check type and this interface """ inputsandbox, outputsandbox = master_sandbox_prepare(app, appmasterconfig) return StandardJobConfig(inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def master_prepare(self, app, appmasterconfig): """ Prepare the RTHandler for the master job so that applications to be submitted Args: app (GaudiExec): This application is only expected to handle GaudiExec Applications here appmasterconfig (unknown): Output passed from the application master configuration call """ cred_req = app.getJobObject().backend.credential_requirements check_creds(cred_req) inputsandbox, outputsandbox = master_sandbox_prepare(app, appmasterconfig) # If we are getting the metadata we need to make sure the summary.xml is added to the output sandbox if not there already. if app.getMetadata and not 'summary.xml' in outputsandbox: outputsandbox += ['summary.xml'] # Check a previously uploaded input is there in case of a job copy if isinstance(app.uploadedInput, DiracFile): if app.uploadedInput.getReplicas() == {}: app.uploadedInput = None logger.info("Previously uploaded cmake target missing from Dirac. Uploading it again.") if not isinstance(app.uploadedInput, DiracFile): generateDiracInput(app) try: assert isinstance(app.uploadedInput, DiracFile) except AssertionError: raise ApplicationPrepareError("Failed to upload needed file, aborting submit. Tried to upload to: %s\nIf your Ganga installation is not at CERN your username may be trying to create a non-existent LFN. Try setting the 'DIRAC' configuration 'DiracLFNBase' to your grid user path.\n" % DiracFile.diracLFNBase(cred_req)) rep_data = app.uploadedInput.getReplicas() try: assert rep_data != {} except AssertionError: raise ApplicationPrepareError("Failed to find a replica of uploaded file, aborting submit") if isinstance(app.jobScriptArchive, (DiracFile, LocalFile)): app.jobScriptArchive = None generateDiracScripts(app) try: assert isinstance(app.jobScriptArchive, DiracFile) except AssertionError: raise ApplicationPrepareError("Failed to upload needed file, aborting submit") rep_data = app.jobScriptArchive.getReplicas() try: assert rep_data != {} except AssertionError: raise ApplicationPrepareError("Failed to find a replica, aborting submit") #Create a replica of the job and scripts files replicateJobFile(app.jobScriptArchive) replicateJobFile(app.uploadedInput) return StandardJobConfig(inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def master_prepare(self, app, appmasterconfig): inputsandbox, outputsandbox = master_sandbox_prepare(app, appmasterconfig, ["inputsandbox"]) # add summary.xml outputsandbox += ["summary.xml", "__parsedxmlsummary__"] logger.debug("Master Prepare LHCbGaudiDiracRunTimeHandler") return StandardJobConfig(inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def master_prepare(self, app, appmasterconfig): inputsandbox, outputsandbox = master_sandbox_prepare(app, appmasterconfig) if type(app.exe) == File: input_dir = app.getJobObject().getInputWorkspace().getPath() exefile = os.path.join(input_dir, os.path.basename(app.exe.name)) if not os.path.exists(exefile): msg = 'Executable: "%s" must exist!' % str(exefile) raise ApplicationConfigurationError(None, msg) os.system('chmod +x %s' % exefile) return StandardJobConfig(inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def master_prepare(self, app, appmasterconfig): """ Prepare the RTHandler for the master job so that applications to be submitted Args: app (GaudiRun): This application is only expected to handle GaudiRun Applications here appmasterconfig (unknown): Output passed from the application master configuration call """ WarnUsers() inputsandbox, outputsandbox = master_sandbox_prepare(app, appmasterconfig) return StandardJobConfig(inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) run_script = self.__create_run_script( app, appsubconfig, appmasterconfig, jobmasterconfig, inputsandbox, outputsandbox ) return StandardJobConfig( FileBuffer("gaudi-script.py", run_script, executable=1), inputbox=unique(inputsandbox), outputbox=unique(outputsandbox), )
def master_prepare(self, app, appmasterconfig): inputsandbox, outputsandbox = master_sandbox_prepare( app, appmasterconfig) if type(app.exe) == File: exefile = os.path.join(get_share_path(app), os.path.basename(app.exe.name)) if not os.path.exists(exefile): msg = 'Executable must exist!' raise ApplicationConfigurationError(None, msg) os.system('chmod +x %s' % exefile) return StandardJobConfig(inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def master_prepare(self, app, appmasterconfig): inputsandbox, outputsandbox = master_sandbox_prepare( app, appmasterconfig) if type(app.exe) == File: input_dir = app.getJobObject().getInputWorkspace().getPath() exefile = os.path.join(input_dir, os.path.basename(app.exe.name)) if not os.path.exists(exefile): msg = 'Executable: "%s" must exist!' % str(exefile) raise ApplicationConfigurationError(None, msg) os.system('chmod +x %s' % exefile) return StandardJobConfig(inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def master_prepare(self, app, appmasterconfig): logger.debug("Master Prepare") inputsandbox, outputsandbox = master_sandbox_prepare( app, appmasterconfig, ['inputsandbox']) # add summary.xml outputsandbox += ['summary.xml', '__parsedxmlsummary__'] logger.debug("Master Prepare LHCbGaudiDiracRunTimeHandler") return StandardJobConfig(inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def master_sandbox_prepare(app, appmasterconfig, sharedir_roots=None): if sharedir_roots is None: sharedir_roots = [''] logger.debug("RTUTils master_sandbox_prepare") # catch errors from not preparing properly if not hasattr(stripProxy(app), 'is_prepared') or app.is_prepared is None: logger.warning('Application is not prepared properly') if hasattr(stripProxy(app), 'is_prepared'): logger.warning("app.is_prepared: %s" % str(app.is_prepared)) import traceback traceback.print_stack() raise GangaException(None, 'Application not prepared properly') # Note EITHER the master inputsandbox OR the job.inputsandbox is added to # the subjob inputsandbox depending if the jobmasterconfig object is present # or not... Therefore combine the job.inputsandbox with appmasterconfig. job = stripProxy(app).getJobObject() # user added items from the interactive GPI from Ganga.Utility.Config import getConfig if not getConfig('Output')['ForbidLegacyInput']: inputsandbox = job.inputsandbox[:] else: if len(job.inputsandbox) > 0: from Ganga.GPIDev.Lib.Job import JobError raise JobError( "InputFiles have been requested but there are objects in the inputSandBox... Aborting Job Prepare!" ) inputsandbox = [] for filepattern in getInputFilesPatterns(job)[0]: inputsandbox.append(File(filepattern)) if len(inputsandbox) > 100: logger.warning( 'InputSandbox exceeds maximum size (100) supported by the Dirac backend' ) raise GangaException(None, 'InputSandbox exceed maximum size') outputsandbox = getOutputSandboxPatterns(job) # job.outputsandbox[:] # inputsandbox files stored in share_dir from prepare method sharedir_handler(app, sharedir_roots, inputsandbox) # Here add any sandbox files/data coming from the appmasterconfig # from master_configure. Catch the case where None is passed (as in tests) if appmasterconfig: inputsandbox += appmasterconfig.getSandboxFiles() outputsandbox += appmasterconfig.getOutputSandboxFiles() return unique(inputsandbox), unique(outputsandbox)
def get_output(self, job): '''Builds lists of output files and output data.''' outputdata = [ f.namePattern for f in job.outputfiles if outputFilePostProcessingOnWN(job, getName(f)) ] outsandbox = [ f.namePattern for f in job.outputfiles if not outputFilePostProcessingOnWN(job, getName(f)) ] # if user put any files in both, remove them from the sandbox for f in outsandbox: if outputdata.count(f) != 0: outsandbox.remove(f) msg = 'User placed the file %s in both the outputsandbox and ' msg += 'outputdata. It will be removed from the sandbox.' logger.warning(msg, f) gaudi_outsandbox, gaudi_outputdata = self.get_output_files() # handle (as best we can) any user supplied wildcards datalist = [] # files in sandbox that match pattern in data for f in outputdata: datalist += fnmatch.filter(gaudi_outsandbox, f) sandlist = [] # files in data that match sandbox pattern for f in outsandbox: sandlist += fnmatch.filter(gaudi_outputdata, f) datadatalist = [] # files in data that match patterns in data for f in outputdata: datadatalist += fnmatch.filter(gaudi_outputdata, f) # files in sandbox which match patterns in data -> data for f in datalist: gaudi_outputdata.append(f) if f in gaudi_outsandbox: gaudi_outsandbox.remove(f) # files in data which match patterns in sandbox but not data -> sandbox for f in sandlist: if datalist.count(f) == 0 and datadatalist.count(f) == 0: gaudi_outsandbox.append(f) if f in gaudi_outputdata: gaudi_outputdata.remove(f) outsandbox += gaudi_outsandbox outputdata += gaudi_outputdata return unique(outsandbox), unique(outputdata)
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) run_script = self.__create_run_script(app, appsubconfig, appmasterconfig, jobmasterconfig, inputsandbox, outputsandbox) return StandardJobConfig(FileBuffer('gaudi-script.py', run_script, executable=1), inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): inputsandbox, outputsandbox = sandbox_prepare( app, appsubconfig, appmasterconfig, jobmasterconfig) input_data, parametricinput_data = dirac_inputdata(app) job = app.getJobObject() outputfiles = [this_file.namePattern for this_file in job.outputfiles if isinstance(this_file, DiracFile)] gaudi_script_path = os.path.join( job.getInputWorkspace().getPath(), "gaudi-script.py") script_generator(gaudi_script_template(), #remove_unreplaced = False, outputfile_path=gaudi_script_path, PLATFORM=app.platform, COMMAND='gaudirun.py' # , #OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, ' ') ) dirac_script = script_generator(diracAPI_script_template(), DIRAC_IMPORT='from DIRAC.Interfaces.API.Dirac import Dirac', DIRAC_JOB_IMPORT='from DIRAC.Interfaces.API.Job import Job', DIRAC_OBJECT='Dirac()', JOB_OBJECT='Job()', NAME=mangle_job_name(app), EXE=gaudi_script_path, EXE_ARG_STR=' '.join( [str(arg) for arg in app.args]), EXE_LOG_FILE='Ganga_%s_%s.log' % ( app.appname, app.version), INPUTDATA=input_data, PARAMETRIC_INPUTDATA=parametricinput_data, OUTPUT_SANDBOX=outputsandbox, OUTPUTDATA=list(outputfiles), OUTPUT_PATH="", # job.fqid, OUTPUT_SE=getConfig( 'DIRAC')['DiracOutputDataSE'], SETTINGS=diracAPI_script_settings(app), DIRAC_OPTS=job.backend.diracOpts, PLATFORM=app.platform, # leave the sandbox for altering later as needs # to be done in backend.submit to combine master. # Note only using 2 #s as auto-remove 3 INPUT_SANDBOX='##INPUT_SANDBOX##' ) return StandardJobConfig(dirac_script, inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def master_sandbox_prepare(app, appmasterconfig, sharedir_roots=None): if sharedir_roots is None: sharedir_roots = [''] logger.debug("RTUTils master_sandbox_prepare") # catch errors from not preparing properly if not hasattr(stripProxy(app), 'is_prepared') or app.is_prepared is None: logger.warning('Application is not prepared properly') if hasattr(stripProxy(app), 'is_prepared'): logger.warning("app.is_prepared: %s" % str(app.is_prepared)) import traceback traceback.print_stack() raise GangaException(None, 'Application not prepared properly') # Note EITHER the master inputsandbox OR the job.inputsandbox is added to # the subjob inputsandbox depending if the jobmasterconfig object is present # or not... Therefore combine the job.inputsandbox with appmasterconfig. job = stripProxy(app).getJobObject() # user added items from the interactive GPI from Ganga.Utility.Config import getConfig if not getConfig('Output')['ForbidLegacyInput']: inputsandbox = job.inputsandbox[:] else: if len(job.inputsandbox) > 0: from Ganga.GPIDev.Lib.Job import JobError raise JobError("InputFiles have been requested but there are objects in the inputSandBox... Aborting Job Prepare!") inputsandbox = [] for filepattern in getInputFilesPatterns(job)[0]: inputsandbox.append(File(filepattern)) if len(inputsandbox) > 100: logger.warning('InputSandbox exceeds maximum size (100) supported by the Dirac backend') raise GangaException(None, 'InputSandbox exceed maximum size') outputsandbox = getOutputSandboxPatterns(job) # job.outputsandbox[:] # inputsandbox files stored in share_dir from prepare method sharedir_handler(app, sharedir_roots, inputsandbox) # Here add any sandbox files/data coming from the appmasterconfig # from master_configure. Catch the case where None is passed (as in tests) if appmasterconfig: inputsandbox += appmasterconfig.getSandboxFiles() outputsandbox += appmasterconfig.getOutputSandboxFiles() return unique(inputsandbox), unique(outputsandbox)
def master_prepare(self, app, appmasterconfig): inputsandbox, outputsandbox = master_sandbox_prepare( app, appmasterconfig) # check file is set OK if not app.script.name: msg = 'Root.script.name must be set.' raise ApplicationConfigurationError(msg) sharedir_scriptpath = os.path.join(get_share_path(app), os.path.basename(app.script.name)) if not os.path.exists(sharedir_scriptpath): msg = 'Script must exist!' raise ApplicationConfigurationError(msg) return StandardJobConfig(inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def master_prepare(self, app, appmasterconfig): inputsandbox, outputsandbox = master_sandbox_prepare( app, appmasterconfig) # check file is set OK if not app.script.name: msg = 'Root.script.name must be set.' raise ApplicationConfigurationError(None, msg) sharedir_scriptpath = os.path.join(get_share_path(app), os.path.basename(app.script.name)) if not os.path.exists(sharedir_scriptpath): msg = 'Script must exist!' raise ApplicationConfigurationError(None, msg) return StandardJobConfig(inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def dirac_inputdata(app, hasOtherInputData=False): """ Construct the JDL component which requests the inputdata for a job Args: app (IApplication): app which belongs to the job of interest hasOtherInputData (bool): This is used to stop BannedSites being added to the JDL structure through backend.settings """ job = app.getJobObject() input_data = None parametricinput_data = None inputLFNs = [] input_data = None parametricinput_data = None if not job.inputdata and (not job.master or not job.master.inputdata): return input_data, parametricinput_data wanted_job = job if not job.inputdata and job.master and job.master.inputdata is not None and job.master.inputdata: wanted_job = job.master inputLFNs = [ 'LFN:' + this_file.lfn for this_file in wanted_job.inputdata if isinstance(this_file, DiracFile) ] # master job with a splitter reaching prepare, hence bulk submit if not job.master and job.splitter: parametricinput_data = dirac_parametric_split(app) if parametricinput_data is not None and len( parametricinput_data) > getConfig('DIRAC')['MaxDiracBulkJobs']: raise BackendError( 'Dirac', 'Number of bulk submission jobs \'%s\' exceeds the maximum allowed \'%s\' if more are needed please modify your config. Note there is a hard limit in Dirac of currently 1000.' % (len(parametricinput_data), getConfig('DIRAC')['MaxDiracBulkJobs'])) # master job with no splitter or subjob already split proceed as normal else: input_data = inputLFNs if 'Destination' not in job.backend.settings and not inputLFNs and not hasOtherInputData: t1_sites = getConfig('DIRAC')['noInputDataBannedSites'] logger.info( 'Job has no inputdata (T1 sites will be banned to help avoid overloading them).' ) if 'BannedSites' in job.backend.settings: job.backend.settings['BannedSites'].extend(t1_sites) job.backend.settings['BannedSites'] = unique( job.backend.settings['BannedSites']) else: if t1_sites: job.backend.settings['BannedSites'] = t1_sites[:] if not input_data and not parametricinput_data: input_data = inputLFNs return input_data, parametricinput_data
def get_output(self, job): '''Builds lists of output files and output data.''' outputdata = [f.namePattern for f in job.outputfiles if outputFilePostProcessingOnWN(job, getName(f)) ] outsandbox = [f.namePattern for f in job.outputfiles if not outputFilePostProcessingOnWN(job, getName(f)) ] # if user put any files in both, remove them from the sandbox for f in outsandbox: if outputdata.count(f) != 0: outsandbox.remove(f) msg = 'User placed the file %s in both the outputsandbox and ' msg += 'outputdata. It will be removed from the sandbox.' logger.warning(msg, f) gaudi_outsandbox, gaudi_outputdata = self.get_output_files() # handle (as best we can) any user supplied wildcards datalist = [] # files in sandbox that match pattern in data for f in outputdata: datalist += fnmatch.filter(gaudi_outsandbox, f) sandlist = [] # files in data that match sandbox pattern for f in outsandbox: sandlist += fnmatch.filter(gaudi_outputdata, f) datadatalist = [] # files in data that match patterns in data for f in outputdata: datadatalist += fnmatch.filter(gaudi_outputdata, f) # files in sandbox which match patterns in data -> data for f in datalist: gaudi_outputdata.append(f) if f in gaudi_outsandbox: gaudi_outsandbox.remove(f) # files in data which match patterns in sandbox but not data -> sandbox for f in sandlist: if datalist.count(f) == 0 and datadatalist.count(f) == 0: gaudi_outsandbox.append(f) if f in gaudi_outputdata: gaudi_outputdata.remove(f) outsandbox += gaudi_outsandbox outputdata += gaudi_outputdata return unique(outsandbox), unique(outputdata)
def dirac_inputdata(app): job = stripProxy(app).getJobObject() input_data = None parametricinput_data = None inputLFNs = [] if hasattr(job.inputdata, "getLFNs"): inputLFNs = job.inputdata.getLFNs() if job.master: logger.debug("job.master.inputdata: %s " % str(job.master.inputdata)) logger.debug("job.inputdata: %s" % str(job.inputdata)) if hasattr(job.inputdata, "getLFNs"): logger.debug("getLFNs(): %s" % job.inputdata.getLFNs()) has_input_DiracFile = False for this_file in job.inputfiles: if isType(this_file, DiracFile): has_input_DiracFile = True break if job.master and not has_input_DiracFile: for this_file in job.master.inputfiles: if isType(this_file, DiracFile): has_input_DiracFile = True break if len(inputLFNs) > 0: # master job with a splitter reaching prepare, hence bulk submit if not job.master and job.splitter: parametricinput_data = dirac_parametric_split(app) if parametricinput_data is not None and len(parametricinput_data) > getConfig("DIRAC")["MaxDiracBulkJobs"]: raise BackendError( "Dirac", "Number of bulk submission jobs '%s' exceeds the maximum allowed '%s' if more are needed please modify your config. Note there is a hard limit in Dirac of currently 1000." % (len(parametricinput_data), getConfig("DIRAC")["MaxDiracBulkJobs"]), ) # master job with no splitter or subjob already split proceed as normal else: input_data = job.inputdata.getLFNs() elif "Destination" not in job.backend.settings and not has_input_DiracFile: ##THIS IS NOT VERY DIRAC CENTRIC ##PLEASE WHEN TIME MOVE TO LHCBDIRAC where T1 is more applicable rcurrie ##Also editing the settings on the fly is asking for potential problems, should avoid t1_sites = getConfig("DIRAC")["noInputDataBannedSites"] logger.info("Job has no inputdata (T1 sites will be banned to help avoid overloading them).") if "BannedSites" in job.backend.settings: job.backend.settings["BannedSites"].extend(t1_sites) job.backend.settings["BannedSites"] = unique(job.backend.settings["BannedSites"]) else: job.backend.settings["BannedSites"] = t1_sites[:] # import traceback # traceback.print_stack() return input_data, parametricinput_data
def master_prepare(self, app, appmasterconfig): """ Prepare the RTHandler for the master job so that applications to be submitted Args: app (GaudiExec): This application is only expected to handle GaudiExec Applications here appmasterconfig (unknown): Output passed from the application master configuration call """ inputsandbox, outputsandbox = master_sandbox_prepare(app, appmasterconfig) if isinstance(app.jobScriptArchive, LocalFile): app.jobScriptArchive = None generateJobScripts(app, appendJobScripts=True) scriptArchive = os.path.join(app.jobScriptArchive.localDir, app.jobScriptArchive.namePattern) inputsandbox.append(File(name=scriptArchive)) return StandardJobConfig(inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def configure(self, master_appconfig): # self._configure() name = join('.', self.script[0].subdir, split(self.script[0].name)[-1]) script = "from Gaudi.Configuration import *\n" if self.args: script += 'import sys\nsys.argv += %s\n' % str(self.args) script += "importOptions('data.py')\n" script += "execfile(\'%s\')\n" % name # add summary.xml outputsandbox_temp = XMLPostProcessor._XMLJobFiles() outputsandbox_temp += unique(self.getJobObject().outputsandbox) outputsandbox = unique(outputsandbox_temp) input_files = [] input_files += [FileBuffer('gaudipython-wrapper.py', script)] logger.debug("Returning Job Configuration") return (None, StandardJobConfig(inputbox=input_files, outputbox=outputsandbox))
def sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig): logger.debug("RTUTils sandbox_prepare") inputsandbox = [] outputsandbox = [] # Here add any sandbox files coming from the appsubconfig # currently none. masterjobconfig inputsandbox added automatically if appsubconfig: inputsandbox += appsubconfig.getSandboxFiles() # Strangly NEITHER the master outputsandbox OR job.outputsandbox # are added automatically. if jobmasterconfig: outputsandbox += jobmasterconfig.getOutputSandboxFiles() if appsubconfig: outputsandbox += appsubconfig.getOutputSandboxFiles() return unique(inputsandbox), unique(outputsandbox)
def configure(self, master_appconfig): self._configure() name = join('.', self.script[0].subdir, split(self.script[0].name)[-1]) script = "from Gaudi.Configuration import *\n" if self.args: script += 'import sys\nsys.argv += %s\n' % str(self.args) script += "importOptions('data.py')\n" script += "execfile(\'%s\')\n" % name self.extra.input_buffers['gaudipython-wrapper.py'] = script outsb = self.getJobObject().outputsandbox self.extra.outputsandbox = unique(outsb) return (None, self.extra)
def configure(self,master_appconfig): self._configure() name = join('.',self.script[0].subdir,split(self.script[0].name)[-1]) script = "from Gaudi.Configuration import *\n" if self.args: script += 'import sys\nsys.argv += %s\n' % str(self.args) script += "importOptions('data.py')\n" script += "execfile(\'%s\')\n" % name self.extra.input_buffers['gaudipython-wrapper.py'] = script outsb = self.getJobObject().outputsandbox self.extra.outputsandbox = unique(outsb) return (None,self.extra)
def master_prepare(self, app, appmasterconfig): """ Prepare the RTHandler for the master job so that applications to be submitted Args: app (GaudiExec): This application is only expected to handle GaudiExec Applications here appmasterconfig (unknown): Output passed from the application master configuration call """ inputsandbox, outputsandbox = master_sandbox_prepare(app, appmasterconfig) if isinstance(app.jobScriptArchive, LocalFile): app.jobScriptArchive = None generateJobScripts(app, appendJobScripts=True) scriptArchive = os.path.join(app.jobScriptArchive.localDir, app.jobScriptArchive.namePattern) inputsandbox.append(File(name=scriptArchive)) if app.getMetadata: logger.info("Adding options to make the summary.xml") inputsandbox.append(FileBuffer('summary.py', "\nfrom Gaudi.Configuration import *\nfrom Configurables import LHCbApp\nLHCbApp().XMLSummary='summary.xml'")) return StandardJobConfig(inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def dirac_inputdata(app, hasOtherInputData=False): """ Construct the JDL component which requests the inputdata for a job Args: app (IApplication): app which belongs to the job of interest hasOtherInputData (bool): This is used to stop BannedSites being added to the JDL structure through backend.settings """ job = app.getJobObject() input_data = None parametricinput_data = None inputLFNs = [] input_data = None parametricinput_data = None if not job.inputdata and (not job.master or not job.master.inputdata): return input_data, parametricinput_data wanted_job = job if not job.inputdata and job.master and job.master.inputdata is not None and job.master.inputdata: wanted_job = job.master inputLFNs = ['LFN:'+this_file.lfn for this_file in wanted_job.inputdata if isinstance(this_file, DiracFile)] # master job with a splitter reaching prepare, hence bulk submit if not job.master and job.splitter: parametricinput_data = dirac_parametric_split(app) if parametricinput_data is not None and len(parametricinput_data) > getConfig('DIRAC')['MaxDiracBulkJobs']: raise BackendError('Dirac', 'Number of bulk submission jobs \'%s\' exceeds the maximum allowed \'%s\' if more are needed please modify your config. Note there is a hard limit in Dirac of currently 1000.' % ( len(parametricinput_data), getConfig('DIRAC')['MaxDiracBulkJobs'])) # master job with no splitter or subjob already split proceed as normal else: input_data = inputLFNs if 'Destination' not in job.backend.settings and not inputLFNs and not hasOtherInputData: t1_sites = getConfig('DIRAC')['noInputDataBannedSites'] logger.info('Job has no inputdata (T1 sites will be banned to help avoid overloading them).') if 'BannedSites' in job.backend.settings: job.backend.settings['BannedSites'].extend(t1_sites) job.backend.settings['BannedSites'] = unique(job.backend.settings['BannedSites']) else: if t1_sites: job.backend.settings['BannedSites'] = t1_sites[:] if not input_data and not parametricinput_data: input_data = inputLFNs return input_data, parametricinput_data
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) input_data, parametricinput_data = dirac_inputdata(app) logger.debug("input_data: " + str(input_data)) job = app.getJobObject() outputfiles = [ this_file for this_file in job.outputfiles if isType(this_file, DiracFile) ] lhcb_dirac_outputfiles = lhcbdirac_outputfile_jdl(outputfiles) # NOTE special case for replicas: replicate string must be empty for no # replication params = { 'DIRAC_IMPORT': 'from LHCbDIRAC.Interfaces.API.DiracLHCb import DiracLHCb', 'DIRAC_JOB_IMPORT': 'from LHCbDIRAC.Interfaces.API.LHCbJob import LHCbJob', 'DIRAC_OBJECT': 'DiracLHCb()', 'JOB_OBJECT': 'LHCbJob()', 'NAME': mangle_job_name(app), 'INPUTDATA': input_data, 'PARAMETRIC_INPUTDATA': parametricinput_data, 'OUTPUT_SANDBOX': API_nullifier(outputsandbox), 'OUTPUTFILESSCRIPT': lhcb_dirac_outputfiles, 'OUTPUT_PATH': "", # job.fqid, 'SETTINGS': diracAPI_script_settings(app), 'DIRAC_OPTS': job.backend.diracOpts, 'PLATFORM': getConfig('ROOT')['arch'], 'REPLICATE': 'True' if getConfig('DIRAC')['ReplicateOutputData'] else '', # leave the sandbox for altering later as needs # to be done in backend.submit to combine master. # Note only using 2 #s as auto-remove 3 'INPUT_SANDBOX': '##INPUT_SANDBOX##' } scriptpath = os.path.join(get_share_path(app), os.path.basename(app.script.name)) wrapper_path = os.path.join( job.getInputWorkspace(create=True).getPath(), 'script_wrapper.py') python_wrapper =\ """#!/usr/bin/env python import os, sys def formatVar(var): try: float(var) return str(var) except ValueError as v: return '\\\"%s\\\"' % str(var) script_args = '###SCRIPT_ARGS###' del sys.argv[sys.argv.index('script_wrapper.py')] ###FIXARGS### if script_args == []: script_args = '' os.system('###COMMAND###' % script_args) ###INJECTEDCODE### """ python_wrapper = python_wrapper.replace( '###SCRIPT_ARGS###', str('###JOINER###'.join([str(a) for a in app.args]))) params.update({ 'APP_NAME': 'Root', 'APP_VERSION': app.version, 'APP_SCRIPT': wrapper_path, 'APP_LOG_FILE': 'Ganga_Root.log' }) #params.update({'ROOTPY_SCRIPT': wrapper_path, # 'ROOTPY_VERSION': app.version, # 'ROOTPY_LOG_FILE': 'Ganga_Root.log', # 'ROOTPY_ARGS': [str(a) for a in app.args]}) f = open(wrapper_path, 'w') if app.usepython: python_wrapper = script_generator( python_wrapper, remove_unreplaced=False, FIXARGS='', COMMAND='/usr/bin/env python %s %s' % (os.path.basename(app.script.name), '%s'), JOINER=' ', #INJECTEDCODE = getWNCodeForOutputPostprocessing(job,'') ) else: python_wrapper = script_generator( python_wrapper, remove_unreplaced=False, FIXARGS='script_args=[formatVar(v) for v in script_args]', COMMAND= 'export DISPLAY=\"localhoast:0.0\" && root -l -q \"%s(%s)\"' % (os.path.basename(app.script.name), '%s'), JOINER=',', #INJECTEDCODE = getWNCodeForOutputPostprocessing(job,'') ) f.write(python_wrapper) f.close() dirac_script = script_generator(lhcbdiracAPI_script_template(), **params) return StandardJobConfig(dirac_script, inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) input_data, parametricinput_data = dirac_inputdata(app) # outputdata, outputdata_path = dirac_ouputdata(app) job = stripProxy(app).getJobObject() outputfiles = [ this_file for this_file in job.outputfiles if isType(this_file, DiracFile) ] commandline = [] commandline.append(app.exe) if isType(app.exe, File): #logger.info("app: %s" % str(app.exe.name)) #fileName = os.path.join(get_share_path(app), os.path.basename(app.exe.name)) #logger.info("EXE: %s" % str(fileName)) #inputsandbox.append(File(name=fileName)) inputsandbox.append(app.exe) commandline[0] = os.path.join('.', os.path.basename(app.exe.name)) commandline.extend([str(arg) for arg in app.args]) logger.debug('Command line: %s: ', commandline) #exe_script_path = os.path.join(job.getInputWorkspace().getPath(), "exe-script.py") exe_script_name = 'exe-script.py' logger.info("Setting Command to be: '%s'" % repr(commandline)) inputsandbox.append( FileBuffer( name=exe_script_name, contents=script_generator( exe_script_template(), #remove_unreplaced = False, # , COMMAND=repr(commandline), OUTPUTFILESINJECTEDCODE=getWNCodeForOutputPostprocessing( job, ' ')), executable=True)) contents = script_generator( exe_script_template(), COMMAND=repr(commandline), OUTPUTFILESINJECTEDCODE=getWNCodeForOutputPostprocessing( job, ' ')) #logger.info("Script is: %s" % str(contents)) from os.path import abspath, expanduser for this_file in job.inputfiles: if isinstance(this_file, LocalFile): for name in this_file.getFilenameList(): inputsandbox.append(File(abspath(expanduser(name)))) elif isinstance(this_file, DiracFile): name = this_file.lfn if isinstance(input_data, list): input_data.append(name) else: input_data = [name] dirac_outputfiles = dirac_outputfile_jdl(outputfiles, config['RequireDefaultSE']) # NOTE special case for replicas: replicate string must be empty for no # replication dirac_script = script_generator( diracAPI_script_template(), DIRAC_IMPORT='from DIRAC.Interfaces.API.Dirac import Dirac', DIRAC_JOB_IMPORT='from DIRAC.Interfaces.API.Job import Job', DIRAC_OBJECT='Dirac()', JOB_OBJECT='Job()', NAME=mangle_job_name(app), # os.path.basename(exe_script_path), EXE=exe_script_name, # ' '.join([str(arg) for arg in app.args]), EXE_ARG_STR='', EXE_LOG_FILE='Ganga_Executable.log', ENVIRONMENT=None, # app.env, INPUTDATA=input_data, PARAMETRIC_INPUTDATA=parametricinput_data, OUTPUT_SANDBOX=API_nullifier(outputsandbox), OUTPUTFILESSCRIPT=dirac_outputfiles, OUTPUT_PATH="", # job.fqid, SETTINGS=diracAPI_script_settings(app), DIRAC_OPTS=job.backend.diracOpts, REPLICATE='True' if config['ReplicateOutputData'] else '', # leave the sandbox for altering later as needs # to be done in backend.submit to combine master. # Note only using 2 #s as auto-remove 3 INPUT_SANDBOX='##INPUT_SANDBOX##') #logger.info("dirac_script: %s" % dirac_script) #logger.info("inbox: %s" % str(unique(inputsandbox))) #logger.info("outbox: %s" % str(unique(outputsandbox))) return StandardJobConfig(dirac_script, inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def get_user_dlls(appname, version, user_release_area, platform, env): user_ra = user_release_area update_project_path(user_release_area) from Ganga.Utility.files import fullpath full_user_ra = fullpath(user_ra) # expand any symbolic links # Work our way through the CMTPROJECTPATH until we find a cmt directory if 'CMTPROJECTPATH' not in env: return [], [], [] projectdirs = env['CMTPROJECTPATH'].split(os.pathsep) appveruser = os.path.join(appname + '_' + version, 'cmt') appverrelease = os.path.join( appname.upper(), appname.upper() + '_' + version, 'cmt') for projectdir in projectdirs: projectDir = fullpath(os.path.join(projectdir, appveruser)) logger.debug('Looking for projectdir %s' % projectDir) if os.path.exists(projectDir): break projectDir = fullpath(os.path.join(projectdir, appverrelease)) logger.debug('Looking for projectdir %s' % projectDir) if os.path.exists(projectDir): break logger.debug('Using the CMT directory %s for identifying projects' % projectDir) # rc, showProj, m = shell.cmd1('cd ' + projectDir +';cmt show projects', # capture_stderr=True) from GangaGaudi.Lib.Applications.GaudiUtils import shellEnv_cmd rc, showProj, m = shellEnv_cmd('cmt show projects', env, projectDir) logger.debug(showProj) libs = [] merged_pys = [] subdir_pys = {} project_areas = [] py_project_areas = [] for line in showProj.split('\n'): for entry in line.split(): if entry.startswith(user_ra) or entry.startswith(full_user_ra): tmp = entry.rstrip('\)') libpath = fullpath(os.path.join(tmp, 'InstallArea', platform, 'lib')) logger.debug(libpath) project_areas.append(libpath) pypath = fullpath(os.path.join(tmp, 'InstallArea', 'python')) logger.debug(pypath) py_project_areas.append(pypath) pypath = fullpath(os.path.join(tmp, 'InstallArea', platform, 'python')) logger.debug(pypath) py_project_areas.append(pypath) # savannah 47793 (remove multiple copies of the same areas) from Ganga.Utility.util import unique project_areas = unique(project_areas) py_project_areas = unique(py_project_areas) ld_lib_path = [] if 'LD_LIBRARY_PATH' in env: ld_lib_path = env['LD_LIBRARY_PATH'].split(':') project_areas_dict = {} for area in project_areas: if area in ld_lib_path: project_areas_dict[area] = ld_lib_path.index(area) else: project_areas_dict[area] = 666 from operator import itemgetter sorted_project_areas = [] for item in sorted(project_areas_dict.items(), key=itemgetter(1)): sorted_project_areas.append(item[0]) lib_names = [] for libpath in sorted_project_areas: if os.path.exists(libpath): for f in os.listdir(libpath): if lib_names.count(f) > 0: continue fpath = os.path.join(libpath, f) if os.path.exists(fpath): lib_names.append(f) libs.append(fpath) else: logger.warning("File %s in %s does not exist. Skipping...", str(f), str(libpath)) for pypath in py_project_areas: if os.path.exists(pypath): from GangaGaudi.Lib.Applications.GaudiUtils import pyFileCollector from Ganga.Utility.Config import getConfig configGaudi = getConfig('GAUDI') pyFileCollector( pypath, merged_pys, subdir_pys, configGaudi['pyFileCollectionDepth']) import pprint logger.debug("%s", pprint.pformat(libs)) logger.debug("%s", pprint.pformat(merged_pys)) logger.debug("%s", pprint.pformat(subdir_pys)) return libs, merged_pys, subdir_pys
def getOutputSandboxFiles(self): """Get all output sandbox files. The duplicates are removed. """ from Ganga.Utility.util import unique return unique(self.outputbox)
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) input_data, parametricinput_data = dirac_inputdata(app) logger.debug("input_data: " + str(input_data)) job = app.getJobObject() outputfiles = [this_file for this_file in job.outputfiles if isType(this_file, DiracFile)] lhcb_dirac_outputfiles = lhcbdirac_outputfile_jdl(outputfiles) # NOTE special case for replicas: replicate string must be empty for no # replication params = {'DIRAC_IMPORT': 'from LHCbDIRAC.Interfaces.API.DiracLHCb import DiracLHCb', 'DIRAC_JOB_IMPORT': 'from LHCbDIRAC.Interfaces.API.LHCbJob import LHCbJob', 'DIRAC_OBJECT': 'DiracLHCb()', 'JOB_OBJECT': 'LHCbJob()', 'NAME': mangle_job_name(app), 'INPUTDATA': input_data, 'PARAMETRIC_INPUTDATA': parametricinput_data, 'OUTPUT_SANDBOX': API_nullifier(outputsandbox), 'OUTPUTFILESSCRIPT' : lhcb_dirac_outputfiles, 'OUTPUT_PATH': "", # job.fqid, 'OUTPUT_SE': getConfig('DIRAC')['DiracOutputDataSE'], 'SETTINGS': diracAPI_script_settings(app), 'DIRAC_OPTS': job.backend.diracOpts, 'PLATFORM': getConfig('ROOT')['arch'], 'REPLICATE': 'True' if getConfig('DIRAC')['ReplicateOutputData'] else '', # leave the sandbox for altering later as needs # to be done in backend.submit to combine master. # Note only using 2 #s as auto-remove 3 'INPUT_SANDBOX': '##INPUT_SANDBOX##' } scriptpath = os.path.join(get_share_path(app), os.path.basename(app.script.name)) wrapper_path = os.path.join(job.getInputWorkspace(create=True).getPath(), 'script_wrapper.py') python_wrapper =\ """#!/usr/bin/env python import os, sys def formatVar(var): try: float(var) return str(var) except ValueError as v: return '\\\"%s\\\"' % str(var) script_args = '###SCRIPT_ARGS###' del sys.argv[sys.argv.index('script_wrapper.py')] ###FIXARGS### if script_args == []: script_args = '' os.system('###COMMAND###' % script_args) ###INJECTEDCODE### """ python_wrapper = python_wrapper.replace('###SCRIPT_ARGS###', str('###JOINER###'.join([str(a) for a in app.args]))) params.update({ 'APP_NAME' : 'Root', 'APP_VERSION' : app.version, 'APP_SCRIPT' : wrapper_path, 'APP_LOG_FILE' : 'Ganga_Root.log' }) #params.update({'ROOTPY_SCRIPT': wrapper_path, # 'ROOTPY_VERSION': app.version, # 'ROOTPY_LOG_FILE': 'Ganga_Root.log', # 'ROOTPY_ARGS': [str(a) for a in app.args]}) f = open(wrapper_path, 'w') if app.usepython: python_wrapper = script_generator(python_wrapper, remove_unreplaced=False, FIXARGS='', COMMAND='/usr/bin/env python %s %s' % (os.path.basename(app.script.name), '%s'), JOINER=' ', #INJECTEDCODE = getWNCodeForOutputPostprocessing(job,'') ) else: python_wrapper = script_generator(python_wrapper, remove_unreplaced=False, FIXARGS='script_args=[formatVar(v) for v in script_args]', COMMAND='export DISPLAY=\"localhoast:0.0\" && root -l -q \"%s(%s)\"' % (os.path.basename(app.script.name), '%s'), JOINER=',', #INJECTEDCODE = getWNCodeForOutputPostprocessing(job,'') ) f.write(python_wrapper) f.close() dirac_script = script_generator(lhcbdiracAPI_script_template(), **params) return StandardJobConfig(dirac_script, inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): """ This function prepares the application of the actual job being submitted, master or not Args: app (IApplication): This is the application actually being submitted belonging to the master or sub job being configured appsubconfig (tuple): This is used to prepare the inputsandbox according to the configuration for each subjob if it varies appmasterconfig (tuple): This is also used to prepare the inputsandbox but contains the config of the app for the master job jobmasterconfig (StandardJobConfig): This is the configuration of the master job which may or may not be the same job as owning the app """ # Construct some common objects used in job submission here inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) input_data, parametricinput_data = dirac_inputdata(app, hasOtherInputData=True) job = app.getJobObject() # Construct the im3shape-script which is used by this job. i.e. the script and full command line to be used in this job exe_script_name = "im3shape-script.py" output_filename = os.path.basename(job.inputdata[0].lfn) + "." + str(app.rank) + "." + str(app.size) im3shape_args = " ".join( [ os.path.basename(job.inputdata[0].lfn), os.path.basename(app.ini_location.namePattern), # input.fz, config.ini app.catalog, output_filename, # catalog, output str(app.rank), str(app.size), ] ) full_cmd = app.exe_name + " " + im3shape_args outputfiles = [this_file for this_file in job.outputfiles if isinstance(this_file, DiracFile)] inputsandbox.append( FileBuffer( name=exe_script_name, contents=script_generator( Im3Shape_script_template(), ## ARGS for app from job.app RUN_DIR=app.run_dir, BLACKLIST=os.path.basename(app.blacklist.namePattern), COMMAND=full_cmd, ## Stuff for Ganga OUTPUTFILES=repr([this_file.namePattern for this_file in job.outputfiles]), OUTPUTFILESINJECTEDCODE=getWNCodeForOutputPostprocessing(job, " "), ), executable=True, ) ) # TODO once there is a common, IApplication.getMeFilesForThisApp function replace this list with a getter ad it shouldn't really be hard-coded app_file_list = [app.im3_location, app.ini_location, app.blacklist] app_file_list = [this_file for this_file in app_file_list if isinstance(this_file, DiracFile)] job.inputfiles.extend(app_file_list) # Slightly mis-using this here but it would be nice to have these files # job.inputfiles.extend(job.inputdata) # NOTE special case for replicas: replicate string must be empty for no # replication dirac_script = script_generator( diracAPI_script_template(), DIRAC_IMPORT="from DIRAC.Interfaces.API.Dirac import Dirac", DIRAC_JOB_IMPORT="from DIRAC.Interfaces.API.Job import Job", DIRAC_OBJECT="Dirac()", JOB_OBJECT="Job()", NAME=mangle_job_name(app), EXE=exe_script_name, EXE_ARG_STR="", EXE_LOG_FILE="Ganga_Executable.log", ENVIRONMENT=None, INPUTDATA=input_data, PARAMETRIC_INPUTDATA=parametricinput_data, OUTPUT_SANDBOX=API_nullifier(outputsandbox), OUTPUTFILESSCRIPT=dirac_outputfile_jdl(outputfiles, False), OUTPUT_PATH="", # job.fqid, SETTINGS=diracAPI_script_settings(app), DIRAC_OPTS=job.backend.diracOpts, REPLICATE="True" if getConfig("DIRAC")["ReplicateOutputData"] else "", # leave the sandbox for altering later as needs # to be done in backend.submit to combine master. # Note only using 2 #s as auto-remove 3 INPUT_SANDBOX="##INPUT_SANDBOX##", ) return StandardJobConfig(dirac_script, inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): logger.debug("Prepare") inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) job = stripProxy(app).getJobObject() if job.inputdata: if not job.splitter: if len(job.inputdata) > 100: raise BackendError( "You're submitting a job to Dirac with no splitter and more than 100 files, please add a splitter and try again!" ) outputfiles = [ this_file for this_file in job.outputfiles if isType(this_file, DiracFile) ] data_str = 'import os\n' data_str += 'execfile(\'data.py\')\n' if hasattr(job, '_splitter_data'): data_str += job._splitter_data inputsandbox.append(FileBuffer('data-wrapper.py', data_str)) input_data = [] # Cant wait to get rid of this when people no-longer specify # inputdata in options file ####################################################################### # splitters ensure that subjobs pick up inputdata from job over that in # optsfiles but need to take care of unsplit jobs if not job.master: share_path = os.path.join(get_share_path(app), 'inputdata', 'options_data.pkl') if not job.inputdata: if os.path.exists(share_path): f = open(share_path, 'r+b') job.inputdata = pickle.load(f) f.close() ####################################################################### # Cant wait to get rid of this when people no-longer specify # outputsandbox or outputdata in options file ####################################################################### share_path = os.path.join(get_share_path(app), 'output', 'options_parser.pkl') if os.path.exists(share_path): # if not os.path.exists(share_path): # raise GangaException('could not find the parser') f = open(share_path, 'r+b') parser = pickle.load(f) f.close() outbox, outdata = parser.get_output(job) from Ganga.GPIDev.Lib.File import FileUtils from Ganga.GPIDev.Base.Filters import allComponentFilters fileTransform = allComponentFilters['gangafiles'] outdata_files = [ fileTransform(this_file, None) for this_file in outdata if not FileUtils.doesFileExist(this_file, job.outputfiles) ] job.non_copyable_outputfiles.extend([ output_file for output_file in outdata_files if not isType(output_file, DiracFile) ]) outbox_files = [ fileTransform(this_file, None) for this_file in outbox if not FileUtils.doesFileExist(this_file, job.outputfiles) ] job.non_copyable_outputfiles.extend([ outbox_file for outbox_file in outbox_files if not isType(outbox_file, DiracFile) ]) outputsandbox = [ f.namePattern for f in job.non_copyable_outputfiles ] outputsandbox.extend([ f.namePattern for f in job.outputfiles if not isType(f, DiracFile) ]) outputsandbox = unique(outputsandbox) # + outbox[:]) ####################################################################### input_data_dirac, parametricinput_data = dirac_inputdata( job.application) if input_data_dirac is not None: for f in input_data_dirac: if isType(f, DiracFile): input_data.append(f.lfn) elif isType(f, str): input_data.append(f) else: raise ApplicationConfigurationError( "Don't know How to handle anythig other than DiracFiles or strings to LFNs!" ) commandline = "python ./gaudipython-wrapper.py" if is_gaudi_child(app): commandline = 'gaudirun.py ' commandline += ' '.join([str(arg) for arg in app.args]) commandline += ' options.pkl data-wrapper.py' logger.debug('Command line: %s: ', commandline) gaudi_script_path = os.path.join(job.getInputWorkspace().getPath(), "gaudi-script.py") script_generator( gaudi_script_template(), #remove_unreplaced = False, outputfile_path=gaudi_script_path, PLATFORM=app.platform, COMMAND=commandline, XMLSUMMARYPARSING=getXMLSummaryScript() # , #OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, ' ') ) #logger.debug( "input_data %s" % str( input_data ) ) # We want to propogate the ancestor depth to DIRAC when we have # inputdata set if job.inputdata is not None and isType(job.inputdata, LHCbDataset): # As the RT Handler we already know we have a Dirac backend if type(job.backend.settings) is not dict: raise ApplicationConfigurationError( None, 'backend.settings should be a dict') if 'AncestorDepth' in job.backend.settings: ancestor_depth = job.backend.settings['AncestorDepth'] else: ancestor_depth = job.inputdata.depth else: ancestor_depth = 0 lhcbdirac_script_template = lhcbdiracAPI_script_template() lhcb_dirac_outputfiles = lhcbdirac_outputfile_jdl(outputfiles) # not necessary to use lhcbdiracAPI_script_template any more as doing our own uploads to Dirac # remove after Ganga6 release # NOTE special case for replicas: replicate string must be empty for no # replication dirac_script = script_generator( lhcbdirac_script_template, DIRAC_IMPORT= 'from LHCbDIRAC.Interfaces.API.DiracLHCb import DiracLHCb', DIRAC_JOB_IMPORT= 'from LHCbDIRAC.Interfaces.API.LHCbJob import LHCbJob', DIRAC_OBJECT='DiracLHCb()', JOB_OBJECT='LHCbJob()', NAME=mangle_job_name(app), APP_NAME=stripProxy(app).appname, APP_VERSION=app.version, APP_SCRIPT=gaudi_script_path, APP_LOG_FILE='Ganga_%s_%s.log' % (stripProxy(app).appname, app.version), INPUTDATA=input_data, PARAMETRIC_INPUTDATA=parametricinput_data, OUTPUT_SANDBOX=API_nullifier(outputsandbox), OUTPUTFILESSCRIPT=lhcb_dirac_outputfiles, # job.fqid,#outputdata_path, OUTPUT_PATH="", OUTPUT_SE=getConfig('DIRAC')['DiracOutputDataSE'], SETTINGS=diracAPI_script_settings(job.application), DIRAC_OPTS=job.backend.diracOpts, PLATFORM=app.platform, REPLICATE='True' if getConfig('DIRAC')['ReplicateOutputData'] else '', ANCESTOR_DEPTH=ancestor_depth, ## This is to be modified in the final 'submit' function in the backend ## The backend also handles the inputfiles DiracFiles ass appropriate INPUT_SANDBOX='##INPUT_SANDBOX##') logger.debug("prepare: LHCbGaudiDiracRunTimeHandler") return StandardJobConfig(dirac_script, inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def dirac_inputdata(app): job = stripProxy(app).getJobObject() input_data = None parametricinput_data = None inputLFNs = [] if hasattr(job.inputdata, 'getLFNs'): inputLFNs = job.inputdata.getLFNs() if job.master: logger.debug("job.master.inputdata: %s " % str(job.master.inputdata)) logger.debug("job.inputdata: %s" % str(job.inputdata)) if hasattr(job.inputdata, 'getLFNs'): logger.debug("getLFNs(): %s" % job.inputdata.getLFNs()) has_input_DiracFile = False for this_file in job.inputfiles: if isType(this_file, DiracFile): has_input_DiracFile = True break if job.master and not has_input_DiracFile: for this_file in job.master.inputfiles: if isType(this_file, DiracFile): has_input_DiracFile = True break if len(inputLFNs) > 0: # master job with a splitter reaching prepare, hence bulk submit if not job.master and job.splitter: parametricinput_data = dirac_parametric_split(app) if parametricinput_data is not None and len( parametricinput_data) > getConfig( 'DIRAC')['MaxDiracBulkJobs']: raise BackendError( 'Dirac', 'Number of bulk submission jobs \'%s\' exceeds the maximum allowed \'%s\' if more are needed please modify your config. Note there is a hard limit in Dirac of currently 1000.' % (len(parametricinput_data), getConfig('DIRAC')['MaxDiracBulkJobs'])) # master job with no splitter or subjob already split proceed as normal else: input_data = job.inputdata.getLFNs() elif 'Destination' not in job.backend.settings and not has_input_DiracFile: ##THIS IS NOT VERY DIRAC CENTRIC ##PLEASE WHEN TIME MOVE TO LHCBDIRAC where T1 is more applicable rcurrie ##Also editing the settings on the fly is asking for potential problems, should avoid t1_sites = getConfig('DIRAC')['noInputDataBannedSites'] logger.info( 'Job has no inputdata (T1 sites will be banned to help avoid overloading them).' ) if 'BannedSites' in job.backend.settings: job.backend.settings['BannedSites'].extend(t1_sites) job.backend.settings['BannedSites'] = unique( job.backend.settings['BannedSites']) else: job.backend.settings['BannedSites'] = t1_sites[:] #import traceback # traceback.print_stack() return input_data, parametricinput_data
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): """ This function prepares the application of the actual job being submitted, master or not Args: app (IApplication): This is the application actually being submitted belonging to the master or sub job being configured appsubconfig (tuple): This is used to prepare the inputsandbox according to the configuration for each subjob if it varies appmasterconfig (tuple): This is also used to prepare the inputsandbox but contains the config of the app for the master job jobmasterconfig (StandardJobConfig): This is the configuration of the master job which may or may not be the same job as owning the app """ # Construct some common objects used in job submission here inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) input_data, parametricinput_data = dirac_inputdata(app, hasOtherInputData=True) job = app.getJobObject() # Construct the im3shape-script which is used by this job. i.e. the script and full command line to be used in this job exe_script_name = 'im3shape-script.py' output_filename = os.path.basename(job.inputdata[0].lfn) + '.' + str(app.rank) + '.' + str(app.size) im3shape_args = ' '.join([ os.path.basename(job.inputdata[0].lfn), os.path.basename(app.ini_location.namePattern), # input.fz, config.ini app.catalog, output_filename, # catalog, output str(app.rank), str(app.size) ]) full_cmd = app.exe_name + ' ' + im3shape_args outputfiles = [this_file for this_file in job.outputfiles if isinstance(this_file, DiracFile)] inputsandbox.append(FileBuffer( name=exe_script_name, contents=script_generator(Im3Shape_script_template(), ## ARGS for app from job.app RUN_DIR = app.run_dir, BLACKLIST = os.path.basename(app.blacklist.namePattern), COMMAND = full_cmd, ## Stuff for Ganga OUTPUTFILES = repr([this_file.namePattern for this_file in job.outputfiles]), OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, ' '), ), executable=True) ) # TODO once there is a common, IApplication.getMeFilesForThisApp function replace this list with a getter ad it shouldn't really be hard-coded app_file_list = [app.im3_location, app.ini_location, app.blacklist] app_file_list = [this_file for this_file in app_file_list if isinstance(this_file, DiracFile)] job.inputfiles.extend(app_file_list) # Slightly mis-using this here but it would be nice to have these files #job.inputfiles.extend(job.inputdata) # NOTE special case for replicas: replicate string must be empty for no # replication dirac_script = script_generator(diracAPI_script_template(), DIRAC_IMPORT = 'from DIRAC.Interfaces.API.Dirac import Dirac', DIRAC_JOB_IMPORT = 'from DIRAC.Interfaces.API.Job import Job', DIRAC_OBJECT = 'Dirac()', JOB_OBJECT = 'Job()', NAME = mangle_job_name(app), EXE = exe_script_name, EXE_ARG_STR = '', EXE_LOG_FILE = 'Ganga_Executable.log', ENVIRONMENT = None, INPUTDATA = input_data, PARAMETRIC_INPUTDATA = parametricinput_data, OUTPUT_SANDBOX = API_nullifier(outputsandbox), OUTPUTFILESSCRIPT = dirac_outputfile_jdl(outputfiles, False), OUTPUT_PATH = "", # job.fqid, SETTINGS = diracAPI_script_settings(app), DIRAC_OPTS = job.backend.diracOpts, REPLICATE = 'True' if getConfig('DIRAC')['ReplicateOutputData'] else '', # leave the sandbox for altering later as needs # to be done in backend.submit to combine master. # Note only using 2 #s as auto-remove 3 INPUT_SANDBOX = '##INPUT_SANDBOX##' ) return StandardJobConfig(dirac_script, inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): logger.debug("Prepare") inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) job = app.getJobObject() logger.debug("Loading pickle files") #outputfiles=set([file.namePattern for file in job.outputfiles]).difference(set(getOutputSandboxPatterns(job))) # Cant wait to get rid of this when people no-longer specify # inputdata in options file ####################################################################### # splitters ensure that subjobs pick up inputdata from job over that in # optsfiles but need to take sare of unsplit jobs if not job.master: share_path = os.path.join(get_share_path(app), 'inputdata', 'options_data.pkl') if not job.inputdata: if os.path.exists(share_path): f = open(share_path, 'r+b') job.inputdata = pickle.load(f) f.close() ####################################################################### # Cant wait to get rid of this when people no-longer specify # outputsandbox or outputdata in options file ####################################################################### share_path = os.path.join(get_share_path(app), 'output', 'options_parser.pkl') logger.debug("Adding info from pickle files") if os.path.exists(share_path): f = open(share_path, 'r+b') parser = pickle.load(f) f.close() outbox, outdata = parser.get_output(job) from Ganga.GPIDev.Lib.File import FileUtils from Ganga.GPIDev.Base.Filters import allComponentFilters fileTransform = allComponentFilters['gangafiles'] job.non_copyable_outputfiles.extend([ fileTransform(this_file, None) for this_file in outdata if not FileUtils.doesFileExist(this_file, job.outputfiles) ]) job.non_copyable_outputfiles.extend([ fileTransform(this_file, None) for this_file in outbox if not FileUtils.doesFileExist(this_file, job.outputfiles) ]) outputsandbox.extend( [f.namePattern for f in job.non_copyable_outputfiles]) outputsandbox.extend([f.namePattern for f in job.outputfiles]) outputsandbox = unique(outputsandbox) ####################################################################### logger.debug("Doing XML Catalog stuff") data = job.inputdata data_str = '' if data: logger.debug("Returning options String") data_str = data.optionsString() if data.hasLFNs(): logger.debug("Returning Catalogue") inputsandbox.append( FileBuffer('catalog.xml', data.getCatalog())) cat_opts = '\nfrom Gaudi.Configuration import FileCatalog\nFileCatalog().Catalogs = ["xmlcatalog_file:catalog.xml"]\n' data_str += cat_opts logger.debug("Doing splitter_data stuff") if hasattr(job, '_splitter_data'): data_str += job._splitter_data inputsandbox.append(FileBuffer('data.py', data_str)) logger.debug("Doing GaudiPython stuff") cmd = 'python ./gaudipython-wrapper.py' opts = '' if is_gaudi_child(job.application): opts = 'options.pkl' cmd = 'gaudirun.py ' + \ ' '.join(job.application.args) + ' %s data.py' % opts logger.debug("Setting up script") script = script_generator( create_runscript(job.application.newStyleApp), remove_unreplaced=False, OPTS=opts, PROJECT_OPTS=job.application.setupProjectOptions, APP_NAME=job.application.appname, APP_VERSION=job.application.version, APP_PACKAGE=job.application.package, PLATFORM=job.application.platform, CMDLINE=cmd, XMLSUMMARYPARSING=getXMLSummaryScript()) # , # OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, '')) logger.debug("Returning StandardJobConfig") return StandardJobConfig(FileBuffer('gaudi-script.py', script, executable=1), inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def master_prepare(self, app, appmasterconfig): """ Prepare the RTHandler for the master job so that applications to be submitted Args: app (GaudiExec): This application is only expected to handle GaudiExec Applications here appmasterconfig (unknown): Output passed from the application master configuration call """ cred_req = app.getJobObject().backend.credential_requirements check_creds(cred_req) inputsandbox, outputsandbox = master_sandbox_prepare( app, appmasterconfig) # If we are getting the metadata we need to make sure the summary.xml is added to the output sandbox if not there already. if app.getMetadata and not 'summary.xml' in outputsandbox: outputsandbox += ['summary.xml'] # Check a previously uploaded input is there in case of a job copy if isinstance(app.uploadedInput, DiracFile): if app.uploadedInput.getReplicas() == {}: app.uploadedInput = None logger.info( "Previously uploaded cmake target missing from Dirac. Uploading it again." ) if not isinstance(app.uploadedInput, DiracFile): generateDiracInput(app) try: assert isinstance(app.uploadedInput, DiracFile) except AssertionError: raise ApplicationPrepareError( "Failed to upload needed file, aborting submit. Tried to upload to: %s\nIf your Ganga installation is not at CERN your username may be trying to create a non-existent LFN. Try setting the 'DIRAC' configuration 'DiracLFNBase' to your grid user path.\n" % DiracFile.diracLFNBase(cred_req)) rep_data = app.uploadedInput.getReplicas() try: assert rep_data != {} except AssertionError: raise ApplicationPrepareError( "Failed to find a replica of uploaded file, aborting submit") if isinstance(app.jobScriptArchive, (DiracFile, LocalFile)): app.jobScriptArchive = None generateDiracScripts(app) try: assert isinstance(app.jobScriptArchive, DiracFile) except AssertionError: raise ApplicationPrepareError( "Failed to upload needed file, aborting submit") rep_data = app.jobScriptArchive.getReplicas() try: assert rep_data != {} except AssertionError: raise ApplicationPrepareError( "Failed to find a replica, aborting submit") #Create a replica of the job and scripts files replicateJobFile(app.jobScriptArchive) replicateJobFile(app.uploadedInput) return StandardJobConfig(inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def get_user_dlls(appname,version,user_release_area,platform,shell): user_ra = user_release_area update_cmtproject_path(user_release_area) full_user_ra = fullpath(user_ra) # expand any symbolic links # Work our way through the CMTPATH until we find a cmt directory if not shell.env.has_key('CMTPATH'): return [], [], [] projectdirs = shell.env['CMTPATH'].split(os.pathsep) #appveruser = os.path.join(appname + '_' + version,'cmt') appveruser = os.path.join(version,'cmt') #appverrelease = os.path.join(appname.upper(),appname.upper() + '_' + version, appverrelease = os.path.join('cmt') for projectdir in projectdirs: dir = fullpath(os.path.join(projectdir,appveruser)) logger.error('Looking for projectdir %s' % dir) if os.path.exists(dir): break dir = fullpath(os.path.join(projectdir,appverrelease)) logger.error('Looking for projectdir %s' % dir) if os.path.exists(dir): break logger.error('Using the CMT directory %s for identifying projects' % dir) rc, showProj, m = shell.cmd1('cd ' + dir +';cmt show projects', capture_stderr=True) logger.error(showProj) libs=[] merged_pys = [] subdir_pys = {} project_areas = [] py_project_areas = [] for line in showProj.split('\n'): for entry in line.split(): if entry.startswith(user_ra) or entry.startswith(full_user_ra): tmp = entry.rstrip('\)') libpath = fullpath(os.path.join(tmp,'InstallArea',platform,'lib')) logger.debug(libpath) project_areas.append(libpath) pypath = fullpath(os.path.join(tmp,'InstallArea','python')) logger.debug(pypath) py_project_areas.append(pypath) pypath = fullpath(os.path.join(tmp,'InstallArea',platform,'python')) logger.debug(pypath) py_project_areas.append(pypath) # savannah 47793 (remove multiple copies of the same areas) project_areas = unique(project_areas) py_project_areas = unique(py_project_areas) ld_lib_path = [] if shell.env.has_key('LD_LIBRARY_PATH'): ld_lib_path = shell.env['LD_LIBRARY_PATH'].split(':') project_areas_dict = {} for area in project_areas: if area in ld_lib_path: project_areas_dict[area] = ld_lib_path.index(area) else: project_areas_dict[area] = 666 from operator import itemgetter sorted_project_areas = [] for item in sorted(project_areas_dict.items(),key=itemgetter(1)): sorted_project_areas.append(item[0]) lib_names = [] for libpath in sorted_project_areas: if os.path.exists(libpath): for f in os.listdir(libpath): if lib_names.count(f) > 0: continue fpath = os.path.join(libpath,f) if os.path.exists(fpath): lib_names.append(f) libs.append(fpath) else: logger.warning("File %s in %s does not exist. Skipping...", str(f),str(libpath)) for pypath in py_project_areas: if os.path.exists( pypath): for f in os.listdir( pypath): confDB_path = os.path.join( pypath, f) if confDB_path.endswith( '.py'): if os.path.exists( confDB_path): merged_pys.append( confDB_path) else: logger.warning("File %s in %s does not exist. Skipping...", str(f),str(confDB_path)) elif os.path.isdir(confDB_path): pyfiles = [] for g in os.listdir(confDB_path): file_path = os.path.join(confDB_path, g) if (file_path.endswith('.py')): if os.path.exists(file_path): pyfiles.append(file_path) else: logger.warning("File %s in %s does not exist. Skipping...", str(g),str(f)) subdir_pys[ f] = pyfiles logger.debug("%s",pprint.pformat( libs)) logger.debug("%s",pprint.pformat( merged_pys)) logger.debug("%s",pprint.pformat( subdir_pys)) return libs, merged_pys, subdir_pys
def master_prepare(self, app, appmasterconfig): inputsandbox, outputsandbox = master_sandbox_prepare( app, appmasterconfig, ['inputsandbox']) return StandardJobConfig(inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def master_prepare(self, app, appmasterconfig): inputsandbox, outputsandbox = master_sandbox_prepare(app, appmasterconfig, ['inputsandbox']) return StandardJobConfig(inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig) job = app.getJobObject() if job.inputdata: if not job.splitter: if len(job.inputdata) > 100: raise BackendError( "You're submitting a job to Dirac with no splitter and more than 100 files, please add a splitter and try again!" ) outputfiles = [this_file for this_file in job.outputfiles if isType(this_file, DiracFile)] data_str = "import os\n" data_str += "execfile('data.py')\n" if hasattr(job, "_splitter_data"): data_str += job._splitter_data inputsandbox.append(FileBuffer("data-wrapper.py", data_str)) input_data = [] # Cant wait to get rid of this when people no-longer specify # inputdata in options file ####################################################################### # splitters ensure that subjobs pick up inputdata from job over that in # optsfiles but need to take care of unsplit jobs if not job.master: share_path = os.path.join(get_share_path(app), "inputdata", "options_data.pkl") if not job.inputdata: if os.path.exists(share_path): f = open(share_path, "r+b") job.inputdata = pickle.load(f) f.close() ####################################################################### # Cant wait to get rid of this when people no-longer specify # outputsandbox or outputdata in options file ####################################################################### share_path = os.path.join(get_share_path(app), "output", "options_parser.pkl") if os.path.exists(share_path): # if not os.path.exists(share_path): # raise GangaException('could not find the parser') f = open(share_path, "r+b") parser = pickle.load(f) f.close() outbox, outdata = parser.get_output(job) from Ganga.GPIDev.Lib.File import FileUtils from Ganga.GPIDev.Base.Filters import allComponentFilters fileTransform = allComponentFilters["gangafiles"] outdata_files = [ fileTransform(this_file, None) for this_file in outdata if not FileUtils.doesFileExist(this_file, job.outputfiles) ] job.non_copyable_outputfiles.extend( [output_file for output_file in outdata_files if not isType(output_file, DiracFile)] ) outbox_files = [ fileTransform(this_file, None) for this_file in outbox if not FileUtils.doesFileExist(this_file, job.outputfiles) ] job.non_copyable_outputfiles.extend( [outbox_file for outbox_file in outbox_files if not isType(outbox_file, DiracFile)] ) outputsandbox = [f.namePattern for f in job.non_copyable_outputfiles] outputsandbox.extend([f.namePattern for f in job.outputfiles if not isType(f, DiracFile)]) outputsandbox = unique(outputsandbox) # + outbox[:]) ####################################################################### input_data_dirac, parametricinput_data = dirac_inputdata(job.application) if input_data_dirac is not None: for f in input_data_dirac: if isType(f, DiracFile): input_data.append(f.lfn) elif isType(f, str): input_data.append(f) else: raise ApplicationConfigurationError( "Don't know How to handle anythig other than DiracFiles or strings to LFNs!" ) commandline = "python ./gaudipython-wrapper.py" if is_gaudi_child(app): commandline = "gaudirun.py " commandline += " ".join([str(arg) for arg in app.args]) commandline += " options.pkl data-wrapper.py" logger.debug("Command line: %s: ", commandline) gaudi_script_path = os.path.join(job.getInputWorkspace().getPath(), "gaudi-script.py") script_generator( gaudi_script_template(), # remove_unreplaced = False, outputfile_path=gaudi_script_path, PLATFORM=app.platform, COMMAND=commandline, XMLSUMMARYPARSING=getXMLSummaryScript() # , # OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, ' ') ) # logger.debug( "input_data %s" % str( input_data ) ) # We want to propogate the ancestor depth to DIRAC when we have # inputdata set if job.inputdata is not None and isType(job.inputdata, LHCbDataset): # As the RT Handler we already know we have a Dirac backend if type(job.backend.settings) is not dict: raise ApplicationConfigurationError(None, "backend.settings should be a dict") if "AncestorDepth" in job.backend.settings: ancestor_depth = job.backend.settings["AncestorDepth"] else: ancestor_depth = job.inputdata.depth else: ancestor_depth = 0 lhcbdirac_script_template = lhcbdiracAPI_script_template() lhcb_dirac_outputfiles = lhcbdirac_outputfile_jdl(outputfiles) # not necessary to use lhcbdiracAPI_script_template any more as doing our own uploads to Dirac # remove after Ganga6 release # NOTE special case for replicas: replicate string must be empty for no # replication dirac_script = script_generator( lhcbdirac_script_template, DIRAC_IMPORT="from LHCbDIRAC.Interfaces.API.DiracLHCb import DiracLHCb", DIRAC_JOB_IMPORT="from LHCbDIRAC.Interfaces.API.LHCbJob import LHCbJob", DIRAC_OBJECT="DiracLHCb()", JOB_OBJECT="LHCbJob()", NAME=mangle_job_name(app), APP_NAME=app.appname, APP_VERSION=app.version, APP_SCRIPT=gaudi_script_path, APP_LOG_FILE="Ganga_%s_%s.log" % (app.appname, app.version), INPUTDATA=input_data, PARAMETRIC_INPUTDATA=parametricinput_data, OUTPUT_SANDBOX=API_nullifier(outputsandbox), OUTPUTFILESSCRIPT=lhcb_dirac_outputfiles, # job.fqid,#outputdata_path, OUTPUT_PATH="", OUTPUT_SE=getConfig("DIRAC")["DiracOutputDataSE"], SETTINGS=diracAPI_script_settings(job.application), DIRAC_OPTS=job.backend.diracOpts, PLATFORM=app.platform, REPLICATE="True" if getConfig("DIRAC")["ReplicateOutputData"] else "", ANCESTOR_DEPTH=ancestor_depth, ## This is to be modified in the final 'submit' function in the backend ## The backend also handles the inputfiles DiracFiles ass appropriate INPUT_SANDBOX="##INPUT_SANDBOX##", ) logger.debug("prepare: LHCbGaudiDiracRunTimeHandler") return StandardJobConfig(dirac_script, inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
class Gaudi(Francesc): _name = 'Gaudi' __doc__ = GaudiDocString(_name) _category = 'applications' _exportmethods = ['getenv','getpack', 'make', 'cmt', 'register', 'add_output_dir', 'get_output_dir', 'add_dataset_name', 'get_dataset_name'] schema = get_common_gaudi_schema() docstr = 'The name of the optionsfile. Import statements in the file ' \ 'will be expanded at submission time and a full copy made' schema['optsfile'] = FileItem(sequence=1,strict_sequence=0,defvalue=[], doc=docstr) docstr = 'The name of the rec optionsfile. ' schema['recoptsfile'] = FileItem(sequence=1,strict_sequence=0,defvalue=[], doc=docstr) docstr = 'The name of the ana optionsfile. ' schema['anaoptsfile'] = FileItem(sequence=1,strict_sequence=0,defvalue=[], doc=docstr) docstr = 'The name of the Gaudi application (e.g. "DaVinci", "Gauss"...)' schema['appname'] = SimpleItem(defvalue=None,typelist=['str','type(None)'], hidden=1,doc=docstr) schema['configured'] = SimpleItem(defvalue=None,hidden=0,copyable=0, typelist=['str','type(None)']) docstr = 'A python configurable string that will be appended to the ' \ 'end of the options file. Can be multiline by using a ' \ 'notation like \nHistogramPersistencySvc().OutputFile = ' \ '\"myPlots.root"\\nEventSelector().PrintFreq = 100\n or by ' \ 'using triple quotes around a multiline string.' schema['extraopts'] = SimpleItem(defvalue=None, typelist=['str','type(None)'],doc=docstr) docstr = 'User metadata' schema['metadata'] = SimpleItem(defvalue={},doc=docstr) docstr = 'Task name' schema['taskname'] = SimpleItem(defvalue='',doc=docstr) docstr = 'Long idle job' schema['long_idle'] = SimpleItem(defvalue=False,doc=docstr) docstr = 'Create dataset' schema['create_dataset'] = SimpleItem(defvalue=True,doc=docstr) docstr = 'Auto add stream number' schema['auto_stream'] = SimpleItem(defvalue=False,doc=docstr) docstr = 'Use local random trigger files' schema['local_rantrg'] = SimpleItem(defvalue=True,doc=docstr) docstr = 'Patch files' schema['patch'] = SimpleItem(defvalue=[],doc=docstr) docstr = 'Use patch for BOSS' schema['use_boss_patch'] = SimpleItem(defvalue=True,doc=docstr) docstr = 'Auto upload files' schema['auto_upload'] = SimpleItem(defvalue=[],doc=docstr) docstr = 'User workarea' schema['user_workarea'] = SimpleItem(defvalue='',doc=docstr) docstr = 'Use custom packages' schema['use_custom_package'] = SimpleItem(defvalue=False,doc=docstr) docstr = 'Output directory' schema['output_dir'] = SimpleItem(defvalue='GangaBoss',doc=docstr) docstr = 'Output root directory' schema['output_rootdir'] = SimpleItem(defvalue='',doc=docstr) docstr = 'Output data type' schema['output_step'] = SimpleItem(defvalue=[],doc=docstr) _schema = Schema(Version(2, 1), schema) def _auto__init__(self): """bootstrap Gaudi applications. If called via a subclass set up some basic structure like version platform...""" if not self.appname: return self._init(self.appname,True) def master_configure(self): self._validate_version() job = self.getJobObject() self._master_configure() inputs = self._check_inputs() optsfiles = [fileitem.name for fileitem in self.optsfile] recoptsfiles = [fileitem.name for fileitem in self.recoptsfile] anaoptsfiles = [fileitem.name for fileitem in self.anaoptsfile] try: parser = PythonOptionsParser(optsfiles,self.extraopts,self.shell) if recoptsfiles: recparser = PythonOptionsParser(recoptsfiles,self.extraopts,self.shell) if anaoptsfiles: anaparser = PythonOptionsParser(anaoptsfiles,self.extraopts,self.shell) except ApplicationConfigurationError, e: debug_dir = job.getDebugWorkspace().getPath() f = open(debug_dir + '/gaudirun.stdout','w') f.write(e.message) f.close() msg = 'Unable to parse job options! Please check options ' \ 'files and extraopts. The output from gaudyrun.py can be ' \ 'found in %s. You can also view this from within ganga '\ 'by doing job.peek(\'../debug/gaudirun.stdout\').' % f.name #logger.error(msg) raise ApplicationConfigurationError(None,msg) self.extra.master_input_buffers['options.opts'] = parser.opts_str if recoptsfiles: self.extra.master_input_buffers['recoptions.opts'] = recparser.opts_str if anaoptsfiles: self.extra.master_input_buffers['anaoptions.opts'] = anaparser.opts_str inputdata = parser.get_input_data() # If user specified a dataset, ignore optsfile data but warn the user. if len(inputdata.files) > 0: if job.inputdata: msg = 'A dataset was specified for this job but one was ' \ 'also defined in the options file. Data in the options '\ 'file will be ignored...hopefully this is OK.' logger.warning(msg) else: logger.info('Using the inputdata defined in the options file.') self.extra.inputdata = inputdata # only output the last step if not self.output_step: if anaoptsfiles: self.output_step.append('ana') elif recoptsfiles: self.output_step.append('rec') else: self.output_step.append('sim') # data type for each step simoutputsandbox,simoutputdata = parser.get_output(job) for temp_output in simoutputdata: temp_data_type = os.path.splitext(temp_output)[-1][1:] temp_data_type = temp_data_type.strip() if temp_data_type in ['rtraw']: self.extra.data_type['sim'] = temp_data_type if recoptsfiles: recoutputsandbox,recoutputdata = recparser.get_output(job) for temp_output in recoutputdata: temp_data_type = os.path.splitext(temp_output)[-1][1:] temp_data_type = temp_data_type.strip() if temp_data_type in ['dst', 'rec']: self.extra.data_type['rec'] = temp_data_type if anaoptsfiles: anaoutputsandbox,anaoutputdata = anaparser.get_output(job) self.extra.ana_file_nos = anaparser.get_ana_file_nos() for temp_output in anaoutputdata: temp_data_type = os.path.splitext(temp_output)[-1][1:] temp_data_type = temp_data_type.strip() if temp_data_type in ['root']: self.extra.data_type['ana'] = temp_data_type logger.debug('The output step : %s' % self.output_step) logger.debug('The data_type : %s' % self.extra.data_type) # get output file name if anaoptsfiles: self.extra.outputsandbox,outputdata = anaoutputsandbox,anaoutputdata elif recoptsfiles: self.extra.outputsandbox,outputdata = recoutputsandbox,recoutputdata else: self.extra.outputsandbox,outputdata = simoutputsandbox,simoutputdata self.extra.outputdata.files += outputdata self.extra.outputdata.files = unique(self.extra.outputdata.files) self._validate_input() self._custom_package() self._auto_upload_workarea() if self.output_rootdir: bdr = BDRegister(self.extra.metadata) bdr.setRootDir(self.output_rootdir) self._prepare_metadata(parser) self._task_info() # write env into input dir input_dir = job.getInputWorkspace().getPath() file = gzip.GzipFile(input_dir + '/gaudi-env.py.gz','wb') file.write('gaudi_env = %s' % str(self.shell.env)) file.close() return (inputs, self.extra) # return (changed, extra)