def master_sandbox_prepare(app, appmasterconfig, sharedir_roots=None): if sharedir_roots is None: sharedir_roots = [''] logger.debug("RTUTils master_sandbox_prepare") # catch errors from not preparing properly if not hasattr(stripProxy(app), 'is_prepared') or app.is_prepared is None: logger.warning('Application is not prepared properly') if hasattr(stripProxy(app), 'is_prepared'): logger.warning("app.is_prepared: %s" % str(app.is_prepared)) import traceback traceback.print_stack() raise GangaException(None, 'Application not prepared properly') # Note EITHER the master inputsandbox OR the job.inputsandbox is added to # the subjob inputsandbox depending if the jobmasterconfig object is present # or not... Therefore combine the job.inputsandbox with appmasterconfig. job = stripProxy(app).getJobObject() # user added items from the interactive GPI from Ganga.Utility.Config import getConfig if not getConfig('Output')['ForbidLegacyInput']: inputsandbox = job.inputsandbox[:] else: if len(job.inputsandbox) > 0: from Ganga.GPIDev.Lib.Job import JobError raise JobError( "InputFiles have been requested but there are objects in the inputSandBox... Aborting Job Prepare!" ) inputsandbox = [] for filepattern in getInputFilesPatterns(job)[0]: inputsandbox.append(File(filepattern)) if len(inputsandbox) > 100: logger.warning( 'InputSandbox exceeds maximum size (100) supported by the Dirac backend' ) raise GangaException(None, 'InputSandbox exceed maximum size') outputsandbox = getOutputSandboxPatterns(job) # job.outputsandbox[:] # inputsandbox files stored in share_dir from prepare method sharedir_handler(app, sharedir_roots, inputsandbox) # Here add any sandbox files/data coming from the appmasterconfig # from master_configure. Catch the case where None is passed (as in tests) if appmasterconfig: inputsandbox += appmasterconfig.getSandboxFiles() outputsandbox += appmasterconfig.getOutputSandboxFiles() return unique(inputsandbox), unique(outputsandbox)
def master_sandbox_prepare(app, appmasterconfig, sharedir_roots=None): if sharedir_roots is None: sharedir_roots = [''] logger.debug("RTUTils master_sandbox_prepare") # catch errors from not preparing properly if not hasattr(stripProxy(app), 'is_prepared') or app.is_prepared is None: logger.warning('Application is not prepared properly') if hasattr(stripProxy(app), 'is_prepared'): logger.warning("app.is_prepared: %s" % str(app.is_prepared)) import traceback traceback.print_stack() raise GangaException(None, 'Application not prepared properly') # Note EITHER the master inputsandbox OR the job.inputsandbox is added to # the subjob inputsandbox depending if the jobmasterconfig object is present # or not... Therefore combine the job.inputsandbox with appmasterconfig. job = stripProxy(app).getJobObject() # user added items from the interactive GPI from Ganga.Utility.Config import getConfig if not getConfig('Output')['ForbidLegacyInput']: inputsandbox = job.inputsandbox[:] else: if len(job.inputsandbox) > 0: from Ganga.GPIDev.Lib.Job import JobError raise JobError("InputFiles have been requested but there are objects in the inputSandBox... Aborting Job Prepare!") inputsandbox = [] for filepattern in getInputFilesPatterns(job)[0]: inputsandbox.append(File(filepattern)) if len(inputsandbox) > 100: logger.warning('InputSandbox exceeds maximum size (100) supported by the Dirac backend') raise GangaException(None, 'InputSandbox exceed maximum size') outputsandbox = getOutputSandboxPatterns(job) # job.outputsandbox[:] # inputsandbox files stored in share_dir from prepare method sharedir_handler(app, sharedir_roots, inputsandbox) # Here add any sandbox files/data coming from the appmasterconfig # from master_configure. Catch the case where None is passed (as in tests) if appmasterconfig: inputsandbox += appmasterconfig.getSandboxFiles() outputsandbox += appmasterconfig.getOutputSandboxFiles() return unique(inputsandbox), unique(outputsandbox)
def preparejob(self, jobconfig, master_job_sandbox): '''Prepare the JDL''' script = self.__jobWrapperTemplate__() job = self.getJobObject() inpw = job.getInputWorkspace() wrapperlog = '__jobscript__.log' import Ganga.Core.Sandbox as Sandbox # FIXME: check what happens if 'stdout','stderr' are specified here script = script.replace( '###OUTPUTSANDBOX###', repr(jobconfig.outputbox)) script = script.replace( '###APPLICATION_NAME###', getName(job.application)) script = script.replace( '###APPLICATIONEXEC###', repr(jobconfig.getExeString())) script = script.replace( '###APPLICATIONARGS###', repr(jobconfig.getArguments())) from Ganga.GPIDev.Lib.File.OutputFileManager import getWNCodeForOutputPostprocessing, getWNCodeForDownloadingInputFiles script = script.replace( '###OUTPUTUPLOADSPOSTPROCESSING###', getWNCodeForOutputPostprocessing(job, ' ')) script = script.replace( '###DOWNLOADINPUTFILES###', getWNCodeForDownloadingInputFiles(job, ' ')) if jobconfig.env: script = script.replace( '###APPLICATIONENVS###', repr(jobconfig.env)) else: script = script.replace('###APPLICATIONENVS###', repr({})) script = script.replace('###WRAPPERLOG###', repr(wrapperlog)) import inspect script = script.replace( '###INLINEMODULES###', inspect.getsource(Sandbox.WNSandbox)) mon = job.getMonitoringService() self.monInfo = None # set the monitoring file by default to the stdout if isinstance(self.monInfo, dict): self.monInfo['remotefile'] = 'stdout' # try to print out the monitoring service information in debug mode try: logger.debug('job info of monitoring service: %s' % str(self.monInfo)) except: pass # prepare input/output sandboxes packed_files = jobconfig.getSandboxFiles() + Sandbox.getGangaModulesAsSandboxFiles(Sandbox.getDefaultModules()) sandbox_files = job.createPackedInputSandbox(packed_files) # sandbox of child jobs should include master's sandbox sandbox_files.extend(master_job_sandbox) # check the input file size and pre-upload larger inputs to the iocache lfc_host = '' input_sandbox_uris = [] input_sandbox_names = [] ick = True max_prestaged_fsize = 0 for f in sandbox_files: idx = self.__check_and_prestage_inputfile__(f) if not idx: logger.error('input sandbox preparation failed: %s' % f) ick = False break else: if idx['lfc_host']: lfc_host = idx['lfc_host'] if idx['remote']: abspath = os.path.abspath(f) fsize = os.path.getsize(abspath) if fsize > max_prestaged_fsize: max_prestaged_fsize = fsize input_sandbox_uris.append( idx['remote'][os.path.basename(f)]) input_sandbox_names.append( os.path.basename(urlparse(f)[2])) if idx['local']: input_sandbox_uris += idx['local'] input_sandbox_names.append(os.path.basename(f)) if not ick: logger.error('stop job submission') return None # determin the lcg-cp timeout according to the max_prestaged_fsize # - using the assumption of 1 MB/sec. max_prestaged_fsize = 0 lfc_host = '' transfer_timeout = config['SandboxTransferTimeout'] predict_timeout = int(math.ceil(max_prestaged_fsize / 1000000.0)) if predict_timeout > transfer_timeout: transfer_timeout = predict_timeout if transfer_timeout < 60: transfer_timeout = 60 script = script.replace( '###TRANSFERTIMEOUT###', '%d' % transfer_timeout) # update the job wrapper with the inputsandbox list script = script.replace( '###INPUTSANDBOX###', repr({'remote': {}, 'local': input_sandbox_names})) # write out the job wrapper and put job wrapper into job's inputsandbox scriptPath = inpw.writefile( FileBuffer('__jobscript_%s__' % job.getFQID('.'), script), executable=1) input_sandbox = input_sandbox_uris + [scriptPath] for isb in input_sandbox: logger.debug('ISB URI: %s' % isb) # compose output sandbox to include by default the following files: # - gzipped stdout (transferred only when the JobLogHandler is WMS) # - gzipped stderr (transferred only when the JobLogHandler is WMS) # - __jobscript__.log (job wrapper's log) output_sandbox = [wrapperlog] from Ganga.GPIDev.Lib.File.OutputFileManager import getOutputSandboxPatterns for outputSandboxPattern in getOutputSandboxPatterns(job): output_sandbox.append(outputSandboxPattern) if config['JobLogHandler'] in ['WMS']: output_sandbox += ['stdout.gz', 'stderr.gz'] if len(jobconfig.outputbox): output_sandbox += [Sandbox.OUTPUT_TARBALL_NAME] # compose LCG JDL jdl = { 'VirtualOrganisation': config['VirtualOrganisation'], 'Executable': os.path.basename(scriptPath), 'Environment': {'GANGA_LCG_VO': config['VirtualOrganisation'], 'GANGA_LOG_HANDLER': config['JobLogHandler'], 'LFC_HOST': lfc_host}, 'StdOutput': 'stdout', 'StdError': 'stderr', 'InputSandbox': input_sandbox, 'OutputSandbox': output_sandbox, 'OutputSandboxBaseDestURI': 'gsiftp://localhost' } jdl['Environment'].update({'GANGA_LCG_CE': self.CE}) jdl['Requirements'] = self.requirements.merge( jobconfig.requirements).convert() if self.jobtype.upper() in ['NORMAL', 'MPICH']: jdl['JobType'] = self.jobtype.upper() if self.jobtype.upper() == 'MPICH': #jdl['Requirements'].append('(other.GlueCEInfoTotalCPUs >= NodeNumber)') jdl['Requirements'].append( 'Member("MPICH",other.GlueHostApplicationSoftwareRunTimeEnvironment)') jdl['NodeNumber'] = self.requirements.nodenumber else: logger.warning('JobType "%s" not supported' % self.jobtype) return # additional settings from the job # if jobconfig.env: # jdl['Environment'].update(jobconfig.env) jdlText = Grid.expandjdl(jdl) logger.debug('subjob JDL: %s' % jdlText) return inpw.writefile(FileBuffer('__jdlfile__', jdlText))
def preparejob(self, jobconfig, master_job_sandbox): '''Prepare the JDL''' script = self.__jobWrapperTemplate__() job = self.getJobObject() inpw = job.getInputWorkspace() wrapperlog = '__jobscript__.log' import Ganga.Core.Sandbox as Sandbox # FIXME: check what happens if 'stdout','stderr' are specified here script = script.replace('###OUTPUTSANDBOX###', repr(jobconfig.outputbox)) script = script.replace('###APPLICATION_NAME###', getName(job.application)) script = script.replace('###APPLICATIONEXEC###', repr(jobconfig.getExeString())) script = script.replace('###APPLICATIONARGS###', repr(jobconfig.getArguments())) from Ganga.GPIDev.Lib.File.OutputFileManager import getWNCodeForOutputPostprocessing, getWNCodeForDownloadingInputFiles script = script.replace('###OUTPUTUPLOADSPOSTPROCESSING###', getWNCodeForOutputPostprocessing(job, ' ')) script = script.replace('###DOWNLOADINPUTFILES###', getWNCodeForDownloadingInputFiles(job, ' ')) if jobconfig.env: script = script.replace('###APPLICATIONENVS###', repr(jobconfig.env)) else: script = script.replace('###APPLICATIONENVS###', repr({})) script = script.replace('###WRAPPERLOG###', repr(wrapperlog)) import inspect script = script.replace('###INLINEMODULES###', inspect.getsource(Sandbox.WNSandbox)) mon = job.getMonitoringService() self.monInfo = None # set the monitoring file by default to the stdout if isinstance(self.monInfo, dict): self.monInfo['remotefile'] = 'stdout' # try to print out the monitoring service information in debug mode try: logger.debug('job info of monitoring service: %s' % str(self.monInfo)) except: pass # prepare input/output sandboxes import Ganga.Utility.files from Ganga.GPIDev.Lib.File import File from Ganga.Core.Sandbox.WNSandbox import PYTHON_DIR import inspect fileutils = File(inspect.getsourcefile(Ganga.Utility.files), subdir=PYTHON_DIR) packed_files = jobconfig.getSandboxFiles() + [fileutils] sandbox_files = job.createPackedInputSandbox(packed_files) # sandbox of child jobs should include master's sandbox sandbox_files.extend(master_job_sandbox) # check the input file size and pre-upload larger inputs to the iocache lfc_host = '' input_sandbox_uris = [] input_sandbox_names = [] ick = True max_prestaged_fsize = 0 for f in sandbox_files: idx = self.__check_and_prestage_inputfile__(f) if not idx: logger.error('input sandbox preparation failed: %s' % f) ick = False break else: if idx['lfc_host']: lfc_host = idx['lfc_host'] if idx['remote']: abspath = os.path.abspath(f) fsize = os.path.getsize(abspath) if fsize > max_prestaged_fsize: max_prestaged_fsize = fsize input_sandbox_uris.append( idx['remote'][os.path.basename(f)]) input_sandbox_names.append(os.path.basename( urlparse(f)[2])) if idx['local']: input_sandbox_uris += idx['local'] input_sandbox_names.append(os.path.basename(f)) if not ick: logger.error('stop job submission') return None # determin the lcg-cp timeout according to the max_prestaged_fsize # - using the assumption of 1 MB/sec. max_prestaged_fsize = 0 lfc_host = '' transfer_timeout = config['SandboxTransferTimeout'] predict_timeout = int(math.ceil(max_prestaged_fsize / 1000000.0)) if predict_timeout > transfer_timeout: transfer_timeout = predict_timeout if transfer_timeout < 60: transfer_timeout = 60 script = script.replace('###TRANSFERTIMEOUT###', '%d' % transfer_timeout) # update the job wrapper with the inputsandbox list script = script.replace( '###INPUTSANDBOX###', repr({ 'remote': {}, 'local': input_sandbox_names })) # write out the job wrapper and put job wrapper into job's inputsandbox scriptPath = inpw.writefile(FileBuffer( '__jobscript_%s__' % job.getFQID('.'), script), executable=1) input_sandbox = input_sandbox_uris + [scriptPath] for isb in input_sandbox: logger.debug('ISB URI: %s' % isb) # compose output sandbox to include by default the following files: # - gzipped stdout (transferred only when the JobLogHandler is WMS) # - gzipped stderr (transferred only when the JobLogHandler is WMS) # - __jobscript__.log (job wrapper's log) output_sandbox = [wrapperlog] from Ganga.GPIDev.Lib.File.OutputFileManager import getOutputSandboxPatterns for outputSandboxPattern in getOutputSandboxPatterns(job): output_sandbox.append(outputSandboxPattern) if config['JobLogHandler'] in ['WMS']: output_sandbox += ['stdout.gz', 'stderr.gz'] if len(jobconfig.outputbox): output_sandbox += [Sandbox.OUTPUT_TARBALL_NAME] # compose ARC XRSL xrsl = { #'VirtualOrganisation' : config['VirtualOrganisation'], 'executable': os.path.basename(scriptPath), 'environment': { 'GANGA_LCG_VO': config['VirtualOrganisation'], 'GANGA_LOG_HANDLER': config['JobLogHandler'], 'LFC_HOST': lfc_host }, #'stdout' : 'stdout', #'stderr' : 'stderr', 'inputFiles': input_sandbox, 'outputFiles': output_sandbox, #'OutputSandboxBaseDestURI': 'gsiftp://localhost' } xrsl['environment'].update({'GANGA_LCG_CE': self.CE}) #xrsl['Requirements'] = self.requirements.merge(jobconfig.requirements).convert() # if self.jobtype.upper() in ['NORMAL','MPICH']: #xrsl['JobType'] = self.jobtype.upper() # if self.jobtype.upper() == 'MPICH': #xrsl['Requirements'].append('(other.GlueCEInfoTotalCPUs >= NodeNumber)') # xrsl['Requirements'].append('Member("MPICH",other.GlueHostApplicationSoftwareRunTimeEnvironment)') #xrsl['NodeNumber'] = self.requirements.nodenumber # else: # logger.warning('JobType "%s" not supported' % self.jobtype) # return # additional settings from the job if jobconfig.env: xrsl['environment'].update(jobconfig.env) xrslText = Grid.expandxrsl(xrsl) # append any additional requirements from the requirements object xrslText += '\n'.join(self.requirements.other) logger.debug('subjob XRSL: %s' % xrslText) return inpw.writefile(FileBuffer('__xrslfile__', xrslText))
def preparejob(self, jobconfig, master_job_sandbox): """Prepare the JDL""" script = self.__jobWrapperTemplate__() job = self.getJobObject() inpw = job.getInputWorkspace() wrapperlog = "__jobscript__.log" import Ganga.Core.Sandbox as Sandbox # FIXME: check what happens if 'stdout','stderr' are specified here script = script.replace("###OUTPUTSANDBOX###", repr(jobconfig.outputbox)) script = script.replace("###APPLICATION_NAME###", job.application._name) script = script.replace("###APPLICATIONEXEC###", repr(jobconfig.getExeString())) script = script.replace("###APPLICATIONARGS###", repr(jobconfig.getArguments())) from Ganga.GPIDev.Lib.File.OutputFileManager import ( getWNCodeForOutputPostprocessing, getWNCodeForDownloadingInputFiles, ) script = script.replace("###OUTPUTUPLOADSPOSTPROCESSING###", getWNCodeForOutputPostprocessing(job, " ")) script = script.replace("###DOWNLOADINPUTFILES###", getWNCodeForDownloadingInputFiles(job, " ")) if jobconfig.env: script = script.replace("###APPLICATIONENVS###", repr(jobconfig.env)) else: script = script.replace("###APPLICATIONENVS###", repr({})) script = script.replace("###WRAPPERLOG###", repr(wrapperlog)) import inspect script = script.replace("###INLINEMODULES###", inspect.getsource(Sandbox.WNSandbox)) mon = job.getMonitoringService() self.monInfo = None # set the monitoring file by default to the stdout if isinstance(self.monInfo, dict): self.monInfo["remotefile"] = "stdout" # try to print out the monitoring service information in debug mode try: logger.debug("job info of monitoring service: %s" % str(self.monInfo)) except: pass script = script.replace("###MONITORING_SERVICE###", mon.getWrapperScriptConstructorText()) # prepare input/output sandboxes packed_files = ( jobconfig.getSandboxFiles() + Sandbox.getGangaModulesAsSandboxFiles(Sandbox.getDefaultModules()) + Sandbox.getGangaModulesAsSandboxFiles(mon.getSandboxModules()) ) sandbox_files = job.createPackedInputSandbox(packed_files) # sandbox of child jobs should include master's sandbox sandbox_files.extend(master_job_sandbox) # check the input file size and pre-upload larger inputs to the iocache lfc_host = "" input_sandbox_uris = [] input_sandbox_names = [] ick = True max_prestaged_fsize = 0 for f in sandbox_files: idx = self.__check_and_prestage_inputfile__(f) if not idx: logger.error("input sandbox preparation failed: %s" % f) ick = False break else: if idx["lfc_host"]: lfc_host = idx["lfc_host"] if idx["remote"]: abspath = os.path.abspath(f) fsize = os.path.getsize(abspath) if fsize > max_prestaged_fsize: max_prestaged_fsize = fsize input_sandbox_uris.append(idx["remote"][os.path.basename(f)]) input_sandbox_names.append(os.path.basename(urlparse(f)[2])) if idx["local"]: input_sandbox_uris += idx["local"] input_sandbox_names.append(os.path.basename(f)) if not ick: logger.error("stop job submission") return None # determin the lcg-cp timeout according to the max_prestaged_fsize # - using the assumption of 1 MB/sec. max_prestaged_fsize = 0 lfc_host = "" transfer_timeout = config["SandboxTransferTimeout"] predict_timeout = int(math.ceil(max_prestaged_fsize / 1000000.0)) if predict_timeout > transfer_timeout: transfer_timeout = predict_timeout if transfer_timeout < 60: transfer_timeout = 60 script = script.replace("###TRANSFERTIMEOUT###", "%d" % transfer_timeout) # update the job wrapper with the inputsandbox list script = script.replace("###INPUTSANDBOX###", repr({"remote": {}, "local": input_sandbox_names})) # write out the job wrapper and put job wrapper into job's inputsandbox scriptPath = inpw.writefile(FileBuffer("__jobscript_%s__" % job.getFQID("."), script), executable=1) input_sandbox = input_sandbox_uris + [scriptPath] for isb in input_sandbox: logger.debug("ISB URI: %s" % isb) # compose output sandbox to include by default the following files: # - gzipped stdout (transferred only when the JobLogHandler is WMS) # - gzipped stderr (transferred only when the JobLogHandler is WMS) # - __jobscript__.log (job wrapper's log) output_sandbox = [wrapperlog] from Ganga.GPIDev.Lib.File.OutputFileManager import getOutputSandboxPatterns for outputSandboxPattern in getOutputSandboxPatterns(job): output_sandbox.append(outputSandboxPattern) if config["JobLogHandler"] in ["WMS"]: output_sandbox += ["stdout.gz", "stderr.gz"] if len(jobconfig.outputbox): output_sandbox += [Sandbox.OUTPUT_TARBALL_NAME] # compose LCG JDL jdl = { "VirtualOrganisation": config["VirtualOrganisation"], "Executable": os.path.basename(scriptPath), "Environment": { "GANGA_LCG_VO": config["VirtualOrganisation"], "GANGA_LOG_HANDLER": config["JobLogHandler"], "LFC_HOST": lfc_host, }, "StdOutput": "stdout", "StdError": "stderr", "InputSandbox": input_sandbox, "OutputSandbox": output_sandbox, "OutputSandboxBaseDestURI": "gsiftp://localhost", } jdl["Environment"].update({"GANGA_LCG_CE": self.CE}) jdl["Requirements"] = self.requirements.merge(jobconfig.requirements).convert() if self.jobtype.upper() in ["NORMAL", "MPICH"]: jdl["JobType"] = self.jobtype.upper() if self.jobtype.upper() == "MPICH": # jdl['Requirements'].append('(other.GlueCEInfoTotalCPUs >= NodeNumber)') jdl["Requirements"].append('Member("MPICH",other.GlueHostApplicationSoftwareRunTimeEnvironment)') jdl["NodeNumber"] = self.requirements.nodenumber else: logger.warning('JobType "%s" not supported' % self.jobtype) return # additional settings from the job # if jobconfig.env: # jdl['Environment'].update(jobconfig.env) jdlText = Grid.expandjdl(jdl) logger.debug("subjob JDL: %s" % jdlText) return inpw.writefile(FileBuffer("__jdlfile__", jdlText))