Example #1
0
    def master_prepare(self, app, appmasterconfig):
        """
        Prepare the RTHandler for the master job so that applications to be submitted
        Args:
            app (GaudiExec): This application is only expected to handle GaudiExec Applications here
            appmasterconfig (unknown): Output passed from the application master configuration call
        """

        inputsandbox, outputsandbox = master_sandbox_prepare(app, appmasterconfig)


        if not isinstance(app.uploadedInput, DiracFile):
            generateDiracInput(app)
            assert isinstance(app.uploadedInput, DiracFile), "Failed to upload needed file, aborting submit"
        
        rep_data = app.uploadedInput.getReplicas()
        assert rep_data != {}, "Failed to find a replica, aborting submit"


        if isinstance(app.jobScriptArchive, (DiracFile, LocalFile)):
            app.jobScriptArchive = None

        generateDiracScripts(app)

        assert isinstance(app.jobScriptArchive, DiracFile), "Failed to upload needed file, aborting submit"
        rep_data = app.jobScriptArchive.getReplicas()
        assert rep_data != {}, "Failed to find a replica, aborting submit"

        return StandardJobConfig(inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
Example #2
0
    def master_prepare(self, app, appmasterconfig):
        """
        Prepare the RTHandler for the master job so that applications to be submitted
        Args:
            app (GaudiExec): This application is only expected to handle GaudiExec Applications here
            appmasterconfig (unknown): Output passed from the application master configuration call
        """

        inputsandbox, outputsandbox = master_sandbox_prepare(app, appmasterconfig)


        if not isinstance(app.uploadedInput, DiracFile):
            generateDiracInput(app)
            assert isinstance(app.uploadedInput, DiracFile), "Failed to upload needed file, aborting submit. Tried to upload to: %s\nIf your Ganga installation is not at CERN your username may be trying to create a non-existent LFN. Try setting the 'DIRAC' configuration 'DiracLFNBase' to your grid user path.\n" % DiracFile.diracLFNBase()
        
        rep_data = app.uploadedInput.getReplicas()
        assert rep_data != {}, "Failed to find a replica, aborting submit"


        if isinstance(app.jobScriptArchive, (DiracFile, LocalFile)):
            app.jobScriptArchive = None

        generateDiracScripts(app)

        assert isinstance(app.jobScriptArchive, DiracFile), "Failed to upload needed file, aborting submit"
        rep_data = app.jobScriptArchive.getReplicas()
        assert rep_data != {}, "Failed to find a replica, aborting submit"

        return StandardJobConfig(inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
Example #3
0
    def configure(self, master_appconfig):
        
        ## strip leading and trailing blanks from arguments 
        self.arguments = [ a.strip() for a in self.arguments ]

        ## strip leading and trailing blanks from the command 
        self.commands  = [ a.strip() for a in self.commands  ]
        
        ## the script layout
        the_script    = layout.format (
            scripts   = [ os.path.join ( f.subdir , os.path.basename ( f.name ) ) for f in self.scripts ] , 
            arguments = self.arguments  ,
            command   = self.commands    
            )

        # add summary.xml
        outputsandbox_temp  = XMLPostProcessor._XMLJobFiles()
        outputsandbox_temp += unique(self.getJobObject().outputsandbox)
        outputsandbox       = unique(outputsandbox_temp)
        
        input_files  = []
        input_files += [ FileBuffer('gaudipython-wrapper.py', the_script ) ]
        logger.debug("Returning StandardJobConfig")
        return (None, StandardJobConfig(inputbox=input_files,
                                        outputbox=outputsandbox))
Example #4
0
def sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig):

    logger.debug("RTUTils sandbox_prepare")
    job = stripProxy(app).getJobObject()

    # Add the job.in/outputsandbox as splitters create subjobs that are
    # seperate Job objects and therefore have their own job.in/outputsandbox
    # which is NOT in general copied from the master in/outputsandbox
    # inputsandbox = job.inputsandbox[:] # copied in splitter
    #outputsandbox = job.outputsandbox[:]
    inputsandbox = []
    outputsandbox = []

    # Here add any sandbox files coming from the appsubconfig
    # currently none. masterjobconfig inputsandbox added automatically
    if appsubconfig:
        inputsandbox += appsubconfig.getSandboxFiles()

    # Strangly NEITHER the master outputsandbox OR job.outputsandbox
    # are added automatically.
    if jobmasterconfig:
        outputsandbox += jobmasterconfig.getOutputSandboxFiles()
    if appsubconfig:
        outputsandbox += appsubconfig.getOutputSandboxFiles()

    return unique(inputsandbox), unique(outputsandbox)
Example #5
0
    def master_prepare(self, app, appmasterconfig):
        """
        Prepare the RTHandler for the master job so that applications to be submitted
        Args:
            app (GaudiExec): This application is only expected to handle GaudiExec Applications here
            appmasterconfig (unknown): Output passed from the application master configuration call
        """

        inputsandbox, outputsandbox = master_sandbox_prepare(
            app, appmasterconfig)

        if isinstance(app.jobScriptArchive, LocalFile):
            app.jobScriptArchive = None

        generateJobScripts(app, appendJobScripts=True)

        scriptArchive = os.path.join(app.jobScriptArchive.localDir,
                                     app.jobScriptArchive.namePattern)

        inputsandbox.append(File(name=scriptArchive))

        if app.getMetadata:
            logger.info("Adding options to make the summary.xml")
            inputsandbox.append(
                FileBuffer(
                    'summary.py',
                    "\nfrom Gaudi.Configuration import *\nfrom Configurables import LHCbApp\nLHCbApp().XMLSummary='summary.xml'"
                ))

        return StandardJobConfig(inputbox=unique(inputsandbox),
                                 outputbox=unique(outputsandbox))
Example #6
0
    def configure(self, master_appconfig):

        # self._configure()
        modulename = split(self.module.name)[-1].split('.')[0]
        script = """
from copy import deepcopy
from Gaudi.Configuration import *
importOptions('data.py')
import %s as USERMODULE
EventSelectorInput = deepcopy(EventSelector().Input)
FileCatalogCatalogs = deepcopy(FileCatalog().Catalogs)
EventSelector().Input=[]
FileCatalog().Catalogs=[]\n""" % modulename

        script_configure = "USERMODULE.configure(EventSelectorInput,FileCatalogCatalogs%s)\n"
        if self.params:
            param_string = ",params=%s" % self.params
        else:
            param_string = ""

        script_configure = script_configure % param_string
        script += script_configure

        script += "USERMODULE.run(%d)\n" % self.events
        script += getXMLSummaryScript()
        # add summary.xml
        outputsandbox_temp = XMLPostProcessor._XMLJobFiles()
        outputsandbox_temp += unique(self.getJobObject().outputsandbox)
        outputsandbox = unique(outputsandbox_temp)

        input_files = []
        input_files += [FileBuffer('gaudipython-wrapper.py', script)]
        logger.debug("Returning StandardJobConfig")
        return (None, StandardJobConfig(inputbox=input_files,
                                        outputbox=outputsandbox))
    def master_prepare(self, app, appmasterconfig):
        """
        Prepare the RTHandler for the master job so that applications to be submitted
        Args:
            app (GaudiExec): This application is only expected to handle GaudiExec Applications here
            appmasterconfig (unknown): Output passed from the application master configuration call
        """

        inputsandbox, outputsandbox = master_sandbox_prepare(app, appmasterconfig)

        # If we are getting the metadata we need to make sure the summary.xml is added to the output sandbox if not there already.
        if app.getMetadata and not 'summary.xml' in outputsandbox:
            outputsandbox += ['summary.xml']


        if not isinstance(app.uploadedInput, DiracFile):
            generateDiracInput(app)
            assert isinstance(app.uploadedInput, DiracFile), "Failed to upload needed file, aborting submit. Tried to upload to: %s\nIf your Ganga installation is not at CERN your username may be trying to create a non-existent LFN. Try setting the 'DIRAC' configuration 'DiracLFNBase' to your grid user path.\n" % DiracFile.diracLFNBase()
        
        rep_data = app.uploadedInput.getReplicas()
        assert rep_data != {}, "Failed to find a replica, aborting submit"


        if isinstance(app.jobScriptArchive, (DiracFile, LocalFile)):
            app.jobScriptArchive = None

        generateDiracScripts(app)

        assert isinstance(app.jobScriptArchive, DiracFile), "Failed to upload needed file, aborting submit"
        rep_data = app.jobScriptArchive.getReplicas()
        assert rep_data != {}, "Failed to find a replica, aborting submit"

        return StandardJobConfig(inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
Example #8
0
def sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig):

    logger.debug("RTUTils sandbox_prepare")
    job = app.getJobObject()

    # Add the job.in/outputsandbox as splitters create subjobs that are
    # seperate Job objects and therefore have their own job.in/outputsandbox
    # which is NOT in general copied from the master in/outputsandbox
    # inputsandbox = job.inputsandbox[:] # copied in splitter
    # outputsandbox = job.outputsandbox[:]
    inputsandbox = []
    outputsandbox = []

    # Here add any sandbox files coming from the appsubconfig
    # currently none. masterjobconfig inputsandbox added automatically
    if appsubconfig:
        inputsandbox += appsubconfig.getSandboxFiles()

    # Strangly NEITHER the master outputsandbox OR job.outputsandbox
    # are added automatically.
    if jobmasterconfig:
        outputsandbox += jobmasterconfig.getOutputSandboxFiles()
    if appsubconfig:
        outputsandbox += appsubconfig.getOutputSandboxFiles()

    return unique(inputsandbox), unique(outputsandbox)
Example #9
0
    def configure(self, master_appconfig):
        
        ## strip leading and trailing blanks from arguments 
        self.arguments = [ a.strip() for a in self.arguments ]

        ## strip leading and trailing blanks from the command 
        self.commands  = [ a.strip() for a in self.commands  ]
        
        ## the script layout
        the_script    = layout.format (
            scripts   = [ os.path.join ( f.subdir , os.path.basename ( f.name ) ) for f in self.scripts ] , 
            imports   = [ os.path.join ( f.subdir , os.path.basename ( f.name ) ) for f in self.imports ] , 
            arguments = self.arguments  ,
            command   = self.commands    
            )

        print 'SCRIPT:\n', the_script
        
        # add summary.xml
        outputsandbox_temp  = XMLPostProcessor._XMLJobFiles()
        outputsandbox_temp += unique(self.getJobObject().outputsandbox)
        outputsandbox       = unique(outputsandbox_temp)
        
        input_files  = []
        input_files += [ FileBuffer('gaudipython-wrapper.py', the_script ) ]
        logger.debug("Returning StandardJobConfig")
        return (None, StandardJobConfig(inputbox=input_files,
                                        outputbox=outputsandbox))
Example #10
0
    def master_prepare(self, app, appmasterconfig):
        inputsandbox, outputsandbox = master_sandbox_prepare(app, appmasterconfig, ['inputsandbox'])

        # add summary.xml
        outputsandbox += ['summary.xml', '__parsedxmlsummary__']

        return StandardJobConfig(inputbox=unique(inputsandbox),
                                 outputbox=unique(outputsandbox))
    def master_prepare(self, app, appmasterconfig):
        inputsandbox, outputsandbox = master_sandbox_prepare(app, appmasterconfig, ['inputsandbox'])

        # add summary.xml
        outputsandbox += ['summary.xml', '__parsedxmlsummary__']

        return StandardJobConfig(inputbox=unique(inputsandbox),
                                 outputbox=unique(outputsandbox))
 def master_prepare(self, app, appmasterconfig):
     """
     This function prepares the application of a master job during submit. A priori we aren't doing anything with this in Im3ShapeApp but until this is understood I'd rather not remove it
     Args:
         app (IApplication): This is the application given in the master job
         appasterconfig (tuple): This is the configuration which is to prepare the app in the master job # TODO check type and this interface
     """
     inputsandbox, outputsandbox = master_sandbox_prepare(app, appmasterconfig)
     return StandardJobConfig(inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
Example #13
0
    def master_prepare(self, app, appmasterconfig):
        """
        Prepare the RTHandler for the master job so that applications to be submitted
        Args:
            app (GaudiExec): This application is only expected to handle GaudiExec Applications here
            appmasterconfig (unknown): Output passed from the application master configuration call
        """

        cred_req = app.getJobObject().backend.credential_requirements
        check_creds(cred_req)

        inputsandbox, outputsandbox = master_sandbox_prepare(app, appmasterconfig)

        # If we are getting the metadata we need to make sure the summary.xml is added to the output sandbox if not there already.
        if app.getMetadata and not 'summary.xml' in outputsandbox:
            outputsandbox += ['summary.xml']

        # Check a previously uploaded input is there in case of a job copy
        if isinstance(app.uploadedInput, DiracFile):
            if app.uploadedInput.getReplicas() == {}:
                app.uploadedInput = None
                logger.info("Previously uploaded cmake target missing from Dirac. Uploading it again.")

        if not isinstance(app.uploadedInput, DiracFile):
            generateDiracInput(app)
            try:
                assert isinstance(app.uploadedInput, DiracFile)
            except AssertionError:
                raise ApplicationPrepareError("Failed to upload needed file, aborting submit. Tried to upload to: %s\nIf your Ganga installation is not at CERN your username may be trying to create a non-existent LFN. Try setting the 'DIRAC' configuration 'DiracLFNBase' to your grid user path.\n" % DiracFile.diracLFNBase(cred_req))
        
        rep_data = app.uploadedInput.getReplicas()
        try:
            assert rep_data != {}
        except AssertionError:
            raise ApplicationPrepareError("Failed to find a replica of uploaded file, aborting submit")


        if isinstance(app.jobScriptArchive, (DiracFile, LocalFile)):
            app.jobScriptArchive = None

        generateDiracScripts(app)

        try:
            assert isinstance(app.jobScriptArchive, DiracFile)
        except AssertionError:
            raise ApplicationPrepareError("Failed to upload needed file, aborting submit")
        rep_data = app.jobScriptArchive.getReplicas()
        try:
            assert rep_data != {}
        except AssertionError:
            raise ApplicationPrepareError("Failed to find a replica, aborting submit")

        #Create a replica of the job and scripts files
        replicateJobFile(app.jobScriptArchive)
        replicateJobFile(app.uploadedInput)

        return StandardJobConfig(inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
Example #14
0
 def master_prepare(self, app, appmasterconfig):
     """
     This function prepares the application of a master job during submit. A priori we aren't doing anything with this in Im3ShapeApp but until this is understood I'd rather not remove it
     Args:
         app (IApplication): This is the application given in the master job
         appasterconfig (tuple): This is the configuration which is to prepare the app in the master job # TODO check type and this interface
     """
     inputsandbox, outputsandbox = master_sandbox_prepare(app, appmasterconfig)
     return StandardJobConfig(inputbox=unique(inputsandbox),
             outputbox=unique(outputsandbox))
    def master_prepare(self, app, appmasterconfig):

        inputsandbox, outputsandbox = master_sandbox_prepare(app, appmasterconfig, ["inputsandbox"])

        # add summary.xml
        outputsandbox += ["summary.xml", "__parsedxmlsummary__"]

        logger.debug("Master Prepare LHCbGaudiDiracRunTimeHandler")

        return StandardJobConfig(inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
Example #16
0
    def master_prepare(self, app, appmasterconfig):
        inputsandbox, outputsandbox = master_sandbox_prepare(app, appmasterconfig)
        if type(app.exe) == File:
            input_dir = app.getJobObject().getInputWorkspace().getPath()
            exefile = os.path.join(input_dir, os.path.basename(app.exe.name))
            if not os.path.exists(exefile):
                msg = 'Executable: "%s" must exist!' % str(exefile)
                raise ApplicationConfigurationError(None, msg)

            os.system('chmod +x %s' % exefile)
        return StandardJobConfig(inputbox=unique(inputsandbox),
                                 outputbox=unique(outputsandbox))
    def master_prepare(self, app, appmasterconfig):
        """
        Prepare the RTHandler for the master job so that applications to be submitted
        Args:
            app (GaudiRun): This application is only expected to handle GaudiRun Applications here
            appmasterconfig (unknown): Output passed from the application master configuration call
        """

        WarnUsers()

        inputsandbox, outputsandbox = master_sandbox_prepare(app, appmasterconfig)
        return StandardJobConfig(inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
Example #18
0
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):

        inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig)

        run_script = self.__create_run_script(
            app, appsubconfig, appmasterconfig, jobmasterconfig, inputsandbox, outputsandbox
        )
        return StandardJobConfig(
            FileBuffer("gaudi-script.py", run_script, executable=1),
            inputbox=unique(inputsandbox),
            outputbox=unique(outputsandbox),
        )
Example #19
0
    def master_prepare(self, app, appmasterconfig):
        inputsandbox, outputsandbox = master_sandbox_prepare(
            app, appmasterconfig)
        if type(app.exe) == File:
            exefile = os.path.join(get_share_path(app),
                                   os.path.basename(app.exe.name))
            if not os.path.exists(exefile):
                msg = 'Executable must exist!'
                raise ApplicationConfigurationError(None, msg)

            os.system('chmod +x %s' % exefile)
        return StandardJobConfig(inputbox=unique(inputsandbox),
                                 outputbox=unique(outputsandbox))
Example #20
0
    def master_prepare(self, app, appmasterconfig):
        inputsandbox, outputsandbox = master_sandbox_prepare(
            app, appmasterconfig)
        if type(app.exe) == File:
            input_dir = app.getJobObject().getInputWorkspace().getPath()
            exefile = os.path.join(input_dir, os.path.basename(app.exe.name))
            if not os.path.exists(exefile):
                msg = 'Executable: "%s" must exist!' % str(exefile)
                raise ApplicationConfigurationError(None, msg)

            os.system('chmod +x %s' % exefile)
        return StandardJobConfig(inputbox=unique(inputsandbox),
                                 outputbox=unique(outputsandbox))
Example #21
0
    def master_prepare(self, app, appmasterconfig):

        logger.debug("Master Prepare")
        inputsandbox, outputsandbox = master_sandbox_prepare(
            app, appmasterconfig, ['inputsandbox'])

        # add summary.xml
        outputsandbox += ['summary.xml', '__parsedxmlsummary__']

        logger.debug("Master Prepare LHCbGaudiDiracRunTimeHandler")

        return StandardJobConfig(inputbox=unique(inputsandbox),
                                 outputbox=unique(outputsandbox))
Example #22
0
def master_sandbox_prepare(app, appmasterconfig, sharedir_roots=None):

    if sharedir_roots is None:
        sharedir_roots = ['']

    logger.debug("RTUTils master_sandbox_prepare")

    # catch errors from not preparing properly
    if not hasattr(stripProxy(app), 'is_prepared') or app.is_prepared is None:
        logger.warning('Application is not prepared properly')
        if hasattr(stripProxy(app), 'is_prepared'):
            logger.warning("app.is_prepared: %s" % str(app.is_prepared))
        import traceback
        traceback.print_stack()
        raise GangaException(None, 'Application not prepared properly')

    # Note EITHER the master inputsandbox OR the job.inputsandbox is added to
    # the subjob inputsandbox depending if the jobmasterconfig object is present
    # or not... Therefore combine the job.inputsandbox with appmasterconfig.
    job = stripProxy(app).getJobObject()

    # user added items from the interactive GPI
    from Ganga.Utility.Config import getConfig
    if not getConfig('Output')['ForbidLegacyInput']:
        inputsandbox = job.inputsandbox[:]
    else:
        if len(job.inputsandbox) > 0:
            from Ganga.GPIDev.Lib.Job import JobError
            raise JobError(
                "InputFiles have been requested but there are objects in the inputSandBox... Aborting Job Prepare!"
            )
        inputsandbox = []
        for filepattern in getInputFilesPatterns(job)[0]:
            inputsandbox.append(File(filepattern))

    if len(inputsandbox) > 100:
        logger.warning(
            'InputSandbox exceeds maximum size (100) supported by the Dirac backend'
        )
        raise GangaException(None, 'InputSandbox exceed maximum size')
    outputsandbox = getOutputSandboxPatterns(job)  # job.outputsandbox[:]

    # inputsandbox files stored in share_dir from prepare method
    sharedir_handler(app, sharedir_roots, inputsandbox)
    # Here add any sandbox files/data coming from the appmasterconfig
    # from master_configure. Catch the case where None is passed (as in tests)
    if appmasterconfig:
        inputsandbox += appmasterconfig.getSandboxFiles()
        outputsandbox += appmasterconfig.getOutputSandboxFiles()

    return unique(inputsandbox), unique(outputsandbox)
Example #23
0
    def get_output(self, job):
        '''Builds lists of output files and output data.'''

        outputdata = [
            f.namePattern for f in job.outputfiles
            if outputFilePostProcessingOnWN(job, getName(f))
        ]
        outsandbox = [
            f.namePattern for f in job.outputfiles
            if not outputFilePostProcessingOnWN(job, getName(f))
        ]

        # if user put any files in both, remove them from the sandbox
        for f in outsandbox:
            if outputdata.count(f) != 0:
                outsandbox.remove(f)
                msg = 'User placed the file %s in both the outputsandbox and '
                msg += 'outputdata. It will be removed from the sandbox.'
                logger.warning(msg, f)

        gaudi_outsandbox, gaudi_outputdata = self.get_output_files()

        # handle (as best we can) any user supplied wildcards
        datalist = []  # files in sandbox that match pattern in data
        for f in outputdata:
            datalist += fnmatch.filter(gaudi_outsandbox, f)

        sandlist = []  # files in data that match sandbox pattern
        for f in outsandbox:
            sandlist += fnmatch.filter(gaudi_outputdata, f)

        datadatalist = []  # files in data that match patterns in data
        for f in outputdata:
            datadatalist += fnmatch.filter(gaudi_outputdata, f)

        # files in sandbox which match patterns in data -> data
        for f in datalist:
            gaudi_outputdata.append(f)
            if f in gaudi_outsandbox:
                gaudi_outsandbox.remove(f)
        # files in data which match patterns in sandbox but not data -> sandbox
        for f in sandlist:
            if datalist.count(f) == 0 and datadatalist.count(f) == 0:
                gaudi_outsandbox.append(f)
                if f in gaudi_outputdata:
                    gaudi_outputdata.remove(f)

        outsandbox += gaudi_outsandbox
        outputdata += gaudi_outputdata

        return unique(outsandbox), unique(outputdata)
Example #24
0
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):

        inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig,
                                                      appmasterconfig,
                                                      jobmasterconfig)

        run_script = self.__create_run_script(app, appsubconfig,
                                              appmasterconfig, jobmasterconfig,
                                              inputsandbox, outputsandbox)
        return StandardJobConfig(FileBuffer('gaudi-script.py',
                                            run_script,
                                            executable=1),
                                 inputbox=unique(inputsandbox),
                                 outputbox=unique(outputsandbox))
Example #25
0
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):
        inputsandbox, outputsandbox = sandbox_prepare(
            app, appsubconfig, appmasterconfig, jobmasterconfig)
        input_data,   parametricinput_data = dirac_inputdata(app)

        job = app.getJobObject()
        outputfiles = [this_file.namePattern for this_file in job.outputfiles if isinstance(this_file, DiracFile)]

        gaudi_script_path = os.path.join(
            job.getInputWorkspace().getPath(), "gaudi-script.py")
        script_generator(gaudi_script_template(),
                         #remove_unreplaced = False,
                         outputfile_path=gaudi_script_path,
                         PLATFORM=app.platform,
                         COMMAND='gaudirun.py'  # ,
                         #OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, '    ')
                         )

        dirac_script = script_generator(diracAPI_script_template(),
                                        DIRAC_IMPORT='from DIRAC.Interfaces.API.Dirac import Dirac',
                                        DIRAC_JOB_IMPORT='from DIRAC.Interfaces.API.Job import Job',
                                        DIRAC_OBJECT='Dirac()',
                                        JOB_OBJECT='Job()',
                                        NAME=mangle_job_name(app),
                                        EXE=gaudi_script_path,
                                        EXE_ARG_STR=' '.join(
                                            [str(arg) for arg in app.args]),
                                        EXE_LOG_FILE='Ganga_%s_%s.log' % (
                                            app.appname, app.version),
                                        INPUTDATA=input_data,
                                        PARAMETRIC_INPUTDATA=parametricinput_data,
                                        OUTPUT_SANDBOX=outputsandbox,
                                        OUTPUTDATA=list(outputfiles),
                                        OUTPUT_PATH="",  # job.fqid,
                                        OUTPUT_SE=getConfig(
                                            'DIRAC')['DiracOutputDataSE'],
                                        SETTINGS=diracAPI_script_settings(app),
                                        DIRAC_OPTS=job.backend.diracOpts,
                                        PLATFORM=app.platform,
                                        # leave the sandbox for altering later as needs
                                        # to be done in backend.submit to combine master.
                                        # Note only using 2 #s as auto-remove 3
                                        INPUT_SANDBOX='##INPUT_SANDBOX##'
                                        )

        return StandardJobConfig(dirac_script,
                                 inputbox=unique(inputsandbox),
                                 outputbox=unique(outputsandbox))
Example #26
0
def master_sandbox_prepare(app, appmasterconfig, sharedir_roots=None):

    if sharedir_roots is None:
        sharedir_roots = ['']

    logger.debug("RTUTils master_sandbox_prepare")

    # catch errors from not preparing properly
    if not hasattr(stripProxy(app), 'is_prepared') or app.is_prepared is None:
        logger.warning('Application is not prepared properly')
        if hasattr(stripProxy(app), 'is_prepared'):
            logger.warning("app.is_prepared: %s" % str(app.is_prepared))
        import traceback
        traceback.print_stack()
        raise GangaException(None, 'Application not prepared properly')

    # Note EITHER the master inputsandbox OR the job.inputsandbox is added to
    # the subjob inputsandbox depending if the jobmasterconfig object is present
    # or not... Therefore combine the job.inputsandbox with appmasterconfig.
    job = stripProxy(app).getJobObject()

    # user added items from the interactive GPI
    from Ganga.Utility.Config import getConfig
    if not getConfig('Output')['ForbidLegacyInput']:
        inputsandbox = job.inputsandbox[:]
    else:
        if len(job.inputsandbox) > 0:
            from Ganga.GPIDev.Lib.Job import JobError
            raise JobError("InputFiles have been requested but there are objects in the inputSandBox... Aborting Job Prepare!")
        inputsandbox = []
        for filepattern in getInputFilesPatterns(job)[0]:
            inputsandbox.append(File(filepattern))

    if len(inputsandbox) > 100:
        logger.warning('InputSandbox exceeds maximum size (100) supported by the Dirac backend')
        raise GangaException(None, 'InputSandbox exceed maximum size')
    outputsandbox = getOutputSandboxPatterns(job)  # job.outputsandbox[:]

    # inputsandbox files stored in share_dir from prepare method
    sharedir_handler(app, sharedir_roots, inputsandbox)
    # Here add any sandbox files/data coming from the appmasterconfig
    # from master_configure. Catch the case where None is passed (as in tests)
    if appmasterconfig:
        inputsandbox += appmasterconfig.getSandboxFiles()
        outputsandbox += appmasterconfig.getOutputSandboxFiles()

    return unique(inputsandbox), unique(outputsandbox)
    def master_prepare(self, app, appmasterconfig):
        inputsandbox, outputsandbox = master_sandbox_prepare(
            app, appmasterconfig)
        # check file is set OK
        if not app.script.name:
            msg = 'Root.script.name must be set.'
            raise ApplicationConfigurationError(msg)

        sharedir_scriptpath = os.path.join(get_share_path(app),
                                           os.path.basename(app.script.name))

        if not os.path.exists(sharedir_scriptpath):
            msg = 'Script must exist!'
            raise ApplicationConfigurationError(msg)

        return StandardJobConfig(inputbox=unique(inputsandbox),
                                 outputbox=unique(outputsandbox))
    def master_prepare(self, app, appmasterconfig):
        inputsandbox, outputsandbox = master_sandbox_prepare(
            app, appmasterconfig)
        # check file is set OK
        if not app.script.name:
            msg = 'Root.script.name must be set.'
            raise ApplicationConfigurationError(None, msg)

        sharedir_scriptpath = os.path.join(get_share_path(app),
                                           os.path.basename(app.script.name))

        if not os.path.exists(sharedir_scriptpath):
            msg = 'Script must exist!'
            raise ApplicationConfigurationError(None, msg)

        return StandardJobConfig(inputbox=unique(inputsandbox),
                                 outputbox=unique(outputsandbox))
Example #29
0
def dirac_inputdata(app, hasOtherInputData=False):
    """ Construct the JDL component which requests the inputdata for a job
    Args:
        app (IApplication): app which belongs to the job of interest
        hasOtherInputData (bool): This is used to stop BannedSites being added to the JDL structure through backend.settings
    """
    job = app.getJobObject()
    input_data = None
    parametricinput_data = None

    inputLFNs = []

    input_data = None
    parametricinput_data = None

    if not job.inputdata and (not job.master or not job.master.inputdata):
        return input_data, parametricinput_data

    wanted_job = job
    if not job.inputdata and job.master and job.master.inputdata is not None and job.master.inputdata:
        wanted_job = job.master

    inputLFNs = [
        'LFN:' + this_file.lfn for this_file in wanted_job.inputdata
        if isinstance(this_file, DiracFile)
    ]

    # master job with a splitter reaching prepare, hence bulk submit
    if not job.master and job.splitter:
        parametricinput_data = dirac_parametric_split(app)
        if parametricinput_data is not None and len(
                parametricinput_data) > getConfig('DIRAC')['MaxDiracBulkJobs']:
            raise BackendError(
                'Dirac',
                'Number of bulk submission jobs \'%s\' exceeds the maximum allowed \'%s\' if more are needed please modify your config. Note there is a hard limit in Dirac of currently 1000.'
                % (len(parametricinput_data),
                   getConfig('DIRAC')['MaxDiracBulkJobs']))
        # master job with no splitter or subjob already split proceed as normal
        else:
            input_data = inputLFNs

    if 'Destination' not in job.backend.settings and not inputLFNs and not hasOtherInputData:
        t1_sites = getConfig('DIRAC')['noInputDataBannedSites']
        logger.info(
            'Job has no inputdata (T1 sites will be banned to help avoid overloading them).'
        )
        if 'BannedSites' in job.backend.settings:
            job.backend.settings['BannedSites'].extend(t1_sites)
            job.backend.settings['BannedSites'] = unique(
                job.backend.settings['BannedSites'])
        else:
            if t1_sites:
                job.backend.settings['BannedSites'] = t1_sites[:]

    if not input_data and not parametricinput_data:
        input_data = inputLFNs

    return input_data, parametricinput_data
Example #30
0
    def get_output(self, job):
        '''Builds lists of output files and output data.'''

        outputdata = [f.namePattern for f in job.outputfiles if outputFilePostProcessingOnWN(job, getName(f)) ]
        outsandbox = [f.namePattern for f in job.outputfiles if not outputFilePostProcessingOnWN(job, getName(f)) ]

        # if user put any files in both, remove them from the sandbox
        for f in outsandbox:
            if outputdata.count(f) != 0:
                outsandbox.remove(f)
                msg = 'User placed the file %s in both the outputsandbox and '
                msg += 'outputdata. It will be removed from the sandbox.'
                logger.warning(msg, f)

        gaudi_outsandbox, gaudi_outputdata = self.get_output_files()

        # handle (as best we can) any user supplied wildcards
        datalist = []  # files in sandbox that match pattern in data
        for f in outputdata:
            datalist += fnmatch.filter(gaudi_outsandbox, f)

        sandlist = []  # files in data that match sandbox pattern
        for f in outsandbox:
            sandlist += fnmatch.filter(gaudi_outputdata, f)

        datadatalist = []  # files in data that match patterns in data
        for f in outputdata:
            datadatalist += fnmatch.filter(gaudi_outputdata, f)

        # files in sandbox which match patterns in data -> data
        for f in datalist:
            gaudi_outputdata.append(f)
            if f in gaudi_outsandbox:
                gaudi_outsandbox.remove(f)
        # files in data which match patterns in sandbox but not data -> sandbox
        for f in sandlist:
            if datalist.count(f) == 0 and datadatalist.count(f) == 0:
                gaudi_outsandbox.append(f)
                if f in gaudi_outputdata:
                    gaudi_outputdata.remove(f)

        outsandbox += gaudi_outsandbox
        outputdata += gaudi_outputdata

        return unique(outsandbox), unique(outputdata)
Example #31
0
def dirac_inputdata(app):
    job = stripProxy(app).getJobObject()
    input_data = None
    parametricinput_data = None

    inputLFNs = []

    if hasattr(job.inputdata, "getLFNs"):
        inputLFNs = job.inputdata.getLFNs()

    if job.master:
        logger.debug("job.master.inputdata: %s " % str(job.master.inputdata))
    logger.debug("job.inputdata: %s" % str(job.inputdata))
    if hasattr(job.inputdata, "getLFNs"):
        logger.debug("getLFNs(): %s" % job.inputdata.getLFNs())

    has_input_DiracFile = False
    for this_file in job.inputfiles:
        if isType(this_file, DiracFile):
            has_input_DiracFile = True
            break
    if job.master and not has_input_DiracFile:
        for this_file in job.master.inputfiles:
            if isType(this_file, DiracFile):
                has_input_DiracFile = True
                break

    if len(inputLFNs) > 0:
        # master job with a splitter reaching prepare, hence bulk submit
        if not job.master and job.splitter:
            parametricinput_data = dirac_parametric_split(app)
            if parametricinput_data is not None and len(parametricinput_data) > getConfig("DIRAC")["MaxDiracBulkJobs"]:
                raise BackendError(
                    "Dirac",
                    "Number of bulk submission jobs '%s' exceeds the maximum allowed '%s' if more are needed please modify your config. Note there is a hard limit in Dirac of currently 1000."
                    % (len(parametricinput_data), getConfig("DIRAC")["MaxDiracBulkJobs"]),
                )
        # master job with no splitter or subjob already split proceed as normal
        else:
            input_data = job.inputdata.getLFNs()

    elif "Destination" not in job.backend.settings and not has_input_DiracFile:
        ##THIS IS NOT VERY DIRAC CENTRIC
        ##PLEASE WHEN TIME MOVE TO LHCBDIRAC where T1 is more applicable rcurrie
        ##Also editing the settings on the fly is asking for potential problems, should avoid
        t1_sites = getConfig("DIRAC")["noInputDataBannedSites"]
        logger.info("Job has no inputdata (T1 sites will be banned to help avoid overloading them).")
        if "BannedSites" in job.backend.settings:
            job.backend.settings["BannedSites"].extend(t1_sites)
            job.backend.settings["BannedSites"] = unique(job.backend.settings["BannedSites"])
        else:
            job.backend.settings["BannedSites"] = t1_sites[:]

    # import traceback
    # traceback.print_stack()

    return input_data, parametricinput_data
Example #32
0
    def master_prepare(self, app, appmasterconfig):
        """
        Prepare the RTHandler for the master job so that applications to be submitted
        Args:
            app (GaudiExec): This application is only expected to handle GaudiExec Applications here
            appmasterconfig (unknown): Output passed from the application master configuration call
        """

        inputsandbox, outputsandbox = master_sandbox_prepare(app, appmasterconfig)

        if isinstance(app.jobScriptArchive, LocalFile):
            app.jobScriptArchive = None

        generateJobScripts(app, appendJobScripts=True)

        scriptArchive = os.path.join(app.jobScriptArchive.localDir, app.jobScriptArchive.namePattern)

        inputsandbox.append(File(name=scriptArchive))
        return StandardJobConfig(inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
Example #33
0
    def configure(self, master_appconfig):
        # self._configure()
        name = join('.', self.script[0].subdir, split(self.script[0].name)[-1])
        script = "from Gaudi.Configuration import *\n"
        if self.args:
            script += 'import sys\nsys.argv += %s\n' % str(self.args)
        script += "importOptions('data.py')\n"
        script += "execfile(\'%s\')\n" % name

        # add summary.xml
        outputsandbox_temp = XMLPostProcessor._XMLJobFiles()
        outputsandbox_temp += unique(self.getJobObject().outputsandbox)
        outputsandbox = unique(outputsandbox_temp)

        input_files = []
        input_files += [FileBuffer('gaudipython-wrapper.py', script)]
        logger.debug("Returning Job Configuration")
        return (None, StandardJobConfig(inputbox=input_files,
                                        outputbox=outputsandbox))
Example #34
0
    def master_prepare(self, app, appmasterconfig):
        """
        Prepare the RTHandler for the master job so that applications to be submitted
        Args:
            app (GaudiExec): This application is only expected to handle GaudiExec Applications here
            appmasterconfig (unknown): Output passed from the application master configuration call
        """

        inputsandbox, outputsandbox = master_sandbox_prepare(app, appmasterconfig)

        if isinstance(app.jobScriptArchive, LocalFile):
            app.jobScriptArchive = None

        generateJobScripts(app, appendJobScripts=True)

        scriptArchive = os.path.join(app.jobScriptArchive.localDir, app.jobScriptArchive.namePattern)

        inputsandbox.append(File(name=scriptArchive))
        return StandardJobConfig(inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
Example #35
0
    def configure(self, master_appconfig):
        # self._configure()
        name = join('.', self.script[0].subdir, split(self.script[0].name)[-1])
        script = "from Gaudi.Configuration import *\n"
        if self.args:
            script += 'import sys\nsys.argv += %s\n' % str(self.args)
        script += "importOptions('data.py')\n"
        script += "execfile(\'%s\')\n" % name

        # add summary.xml
        outputsandbox_temp = XMLPostProcessor._XMLJobFiles()
        outputsandbox_temp += unique(self.getJobObject().outputsandbox)
        outputsandbox = unique(outputsandbox_temp)

        input_files = []
        input_files += [FileBuffer('gaudipython-wrapper.py', script)]
        logger.debug("Returning Job Configuration")
        return (None,
                StandardJobConfig(inputbox=input_files,
                                  outputbox=outputsandbox))
Example #36
0
def sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig):

    logger.debug("RTUTils sandbox_prepare")

    inputsandbox = []
    outputsandbox = []

    # Here add any sandbox files coming from the appsubconfig
    # currently none. masterjobconfig inputsandbox added automatically
    if appsubconfig:
        inputsandbox += appsubconfig.getSandboxFiles()

    # Strangly NEITHER the master outputsandbox OR job.outputsandbox
    # are added automatically.
    if jobmasterconfig:
        outputsandbox += jobmasterconfig.getOutputSandboxFiles()
    if appsubconfig:
        outputsandbox += appsubconfig.getOutputSandboxFiles()

    return unique(inputsandbox), unique(outputsandbox)
Example #37
0
 def configure(self, master_appconfig):
     self._configure()
     name = join('.', self.script[0].subdir, split(self.script[0].name)[-1])
     script = "from Gaudi.Configuration import *\n"
     if self.args:
         script += 'import sys\nsys.argv += %s\n' % str(self.args)
     script += "importOptions('data.py')\n"
     script += "execfile(\'%s\')\n" % name
     self.extra.input_buffers['gaudipython-wrapper.py'] = script
     outsb = self.getJobObject().outputsandbox
     self.extra.outputsandbox = unique(outsb)
     return (None, self.extra)
Example #38
0
 def configure(self,master_appconfig):
     self._configure()
     name = join('.',self.script[0].subdir,split(self.script[0].name)[-1])
     script =  "from Gaudi.Configuration import *\n"
     if self.args:
         script += 'import sys\nsys.argv += %s\n' % str(self.args)
     script += "importOptions('data.py')\n"
     script += "execfile(\'%s\')\n" % name
     self.extra.input_buffers['gaudipython-wrapper.py'] = script
     outsb = self.getJobObject().outputsandbox
     self.extra.outputsandbox = unique(outsb)
     return (None,self.extra)
Example #39
0
    def master_prepare(self, app, appmasterconfig):
        """
        Prepare the RTHandler for the master job so that applications to be submitted
        Args:
            app (GaudiExec): This application is only expected to handle GaudiExec Applications here
            appmasterconfig (unknown): Output passed from the application master configuration call
        """

        inputsandbox, outputsandbox = master_sandbox_prepare(app, appmasterconfig)

        if isinstance(app.jobScriptArchive, LocalFile):
            app.jobScriptArchive = None

        generateJobScripts(app, appendJobScripts=True)

        scriptArchive = os.path.join(app.jobScriptArchive.localDir, app.jobScriptArchive.namePattern)

        inputsandbox.append(File(name=scriptArchive))

        if app.getMetadata:
            logger.info("Adding options to make the summary.xml")
            inputsandbox.append(FileBuffer('summary.py', "\nfrom Gaudi.Configuration import *\nfrom Configurables import LHCbApp\nLHCbApp().XMLSummary='summary.xml'"))

        return StandardJobConfig(inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
Example #40
0
    def configure(self, master_appconfig):

        # self._configure()
        modulename = split(self.module.name)[-1].split('.')[0]
        script = """
from copy import deepcopy
from Gaudi.Configuration import *
importOptions('data.py')
import %s as USERMODULE
EventSelectorInput = deepcopy(EventSelector().Input)
FileCatalogCatalogs = deepcopy(FileCatalog().Catalogs)
EventSelector().Input=[]
FileCatalog().Catalogs=[]\n""" % modulename

        script_configure = "USERMODULE.configure(EventSelectorInput,FileCatalogCatalogs%s)\n"
        if self.params:
            param_string = ",params=%s" % self.params
        else:
            param_string = ""

        script_configure = script_configure % param_string
        script += script_configure

        script += "USERMODULE.run(%d)\n" % self.events
        script += getXMLSummaryScript()
        # add summary.xml
        outputsandbox_temp = XMLPostProcessor._XMLJobFiles()
        outputsandbox_temp += unique(self.getJobObject().outputsandbox)
        outputsandbox = unique(outputsandbox_temp)

        input_files = []
        input_files += [FileBuffer('gaudipython-wrapper.py', script)]
        logger.debug("Returning StandardJobConfig")
        return (None,
                StandardJobConfig(inputbox=input_files,
                                  outputbox=outputsandbox))
Example #41
0
def dirac_inputdata(app, hasOtherInputData=False):
    """ Construct the JDL component which requests the inputdata for a job
    Args:
        app (IApplication): app which belongs to the job of interest
        hasOtherInputData (bool): This is used to stop BannedSites being added to the JDL structure through backend.settings
    """
    job = app.getJobObject()
    input_data = None
    parametricinput_data = None

    inputLFNs = []

    input_data = None
    parametricinput_data = None

    if not job.inputdata and (not job.master or not job.master.inputdata):
        return input_data, parametricinput_data

    wanted_job = job
    if not job.inputdata and job.master and job.master.inputdata is not None and job.master.inputdata:
        wanted_job = job.master

    inputLFNs = ['LFN:'+this_file.lfn for this_file in wanted_job.inputdata if isinstance(this_file, DiracFile)]

    # master job with a splitter reaching prepare, hence bulk submit
    if not job.master and job.splitter:
        parametricinput_data = dirac_parametric_split(app)
        if parametricinput_data is not None and len(parametricinput_data) > getConfig('DIRAC')['MaxDiracBulkJobs']:
            raise BackendError('Dirac', 'Number of bulk submission jobs \'%s\' exceeds the maximum allowed \'%s\' if more are needed please modify your config. Note there is a hard limit in Dirac of currently 1000.' % (
                len(parametricinput_data), getConfig('DIRAC')['MaxDiracBulkJobs']))
        # master job with no splitter or subjob already split proceed as normal
        else:
            input_data = inputLFNs

    if 'Destination' not in job.backend.settings and not inputLFNs and not hasOtherInputData:
        t1_sites = getConfig('DIRAC')['noInputDataBannedSites']
        logger.info('Job has no inputdata (T1 sites will be banned to help avoid overloading them).')
        if 'BannedSites' in job.backend.settings:
            job.backend.settings['BannedSites'].extend(t1_sites)
            job.backend.settings['BannedSites'] = unique(job.backend.settings['BannedSites'])
        else:
            if t1_sites:
                job.backend.settings['BannedSites'] = t1_sites[:]

    if not input_data and not parametricinput_data:
        input_data = inputLFNs

    return input_data, parametricinput_data
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):
        inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig,
                                                      appmasterconfig,
                                                      jobmasterconfig)
        input_data, parametricinput_data = dirac_inputdata(app)
        logger.debug("input_data: " + str(input_data))
        job = app.getJobObject()
        outputfiles = [
            this_file for this_file in job.outputfiles
            if isType(this_file, DiracFile)
        ]

        lhcb_dirac_outputfiles = lhcbdirac_outputfile_jdl(outputfiles)

        # NOTE special case for replicas: replicate string must be empty for no
        # replication
        params = {
            'DIRAC_IMPORT':
            'from LHCbDIRAC.Interfaces.API.DiracLHCb import DiracLHCb',
            'DIRAC_JOB_IMPORT':
            'from LHCbDIRAC.Interfaces.API.LHCbJob import LHCbJob',
            'DIRAC_OBJECT': 'DiracLHCb()',
            'JOB_OBJECT': 'LHCbJob()',
            'NAME': mangle_job_name(app),
            'INPUTDATA': input_data,
            'PARAMETRIC_INPUTDATA': parametricinput_data,
            'OUTPUT_SANDBOX': API_nullifier(outputsandbox),
            'OUTPUTFILESSCRIPT': lhcb_dirac_outputfiles,
            'OUTPUT_PATH': "",  # job.fqid,
            'SETTINGS': diracAPI_script_settings(app),
            'DIRAC_OPTS': job.backend.diracOpts,
            'PLATFORM': getConfig('ROOT')['arch'],
            'REPLICATE':
            'True' if getConfig('DIRAC')['ReplicateOutputData'] else '',
            # leave the sandbox for altering later as needs
            # to be done in backend.submit to combine master.
            # Note only using 2 #s as auto-remove 3
            'INPUT_SANDBOX': '##INPUT_SANDBOX##'
        }

        scriptpath = os.path.join(get_share_path(app),
                                  os.path.basename(app.script.name))

        wrapper_path = os.path.join(
            job.getInputWorkspace(create=True).getPath(), 'script_wrapper.py')
        python_wrapper =\
"""#!/usr/bin/env python
import os, sys
def formatVar(var):
    try:
        float(var)
        return str(var)
    except ValueError as v:
        return '\\\"%s\\\"' % str(var)

script_args = '###SCRIPT_ARGS###'

del sys.argv[sys.argv.index('script_wrapper.py')]
###FIXARGS###
if script_args == []: script_args = ''
os.system('###COMMAND###' % script_args)
###INJECTEDCODE###
"""

        python_wrapper = python_wrapper.replace(
            '###SCRIPT_ARGS###',
            str('###JOINER###'.join([str(a) for a in app.args])))

        params.update({
            'APP_NAME': 'Root',
            'APP_VERSION': app.version,
            'APP_SCRIPT': wrapper_path,
            'APP_LOG_FILE': 'Ganga_Root.log'
        })

        #params.update({'ROOTPY_SCRIPT': wrapper_path,
        #               'ROOTPY_VERSION': app.version,
        #               'ROOTPY_LOG_FILE': 'Ganga_Root.log',
        #               'ROOTPY_ARGS': [str(a) for a in app.args]})

        f = open(wrapper_path, 'w')
        if app.usepython:
            python_wrapper = script_generator(
                python_wrapper,
                remove_unreplaced=False,
                FIXARGS='',
                COMMAND='/usr/bin/env python %s %s' %
                (os.path.basename(app.script.name), '%s'),
                JOINER=' ',
                #INJECTEDCODE = getWNCodeForOutputPostprocessing(job,'')
            )

        else:
            python_wrapper = script_generator(
                python_wrapper,
                remove_unreplaced=False,
                FIXARGS='script_args=[formatVar(v) for v in script_args]',
                COMMAND=
                'export DISPLAY=\"localhoast:0.0\" && root -l -q \"%s(%s)\"' %
                (os.path.basename(app.script.name), '%s'),
                JOINER=',',
                #INJECTEDCODE = getWNCodeForOutputPostprocessing(job,'')
            )

        f.write(python_wrapper)
        f.close()

        dirac_script = script_generator(lhcbdiracAPI_script_template(),
                                        **params)
        return StandardJobConfig(dirac_script,
                                 inputbox=unique(inputsandbox),
                                 outputbox=unique(outputsandbox))
Example #43
0
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):
        inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig,
                                                      appmasterconfig,
                                                      jobmasterconfig)
        input_data, parametricinput_data = dirac_inputdata(app)
        #        outputdata,   outputdata_path      = dirac_ouputdata(app)

        job = stripProxy(app).getJobObject()
        outputfiles = [
            this_file for this_file in job.outputfiles
            if isType(this_file, DiracFile)
        ]

        commandline = []
        commandline.append(app.exe)
        if isType(app.exe, File):
            #logger.info("app: %s" % str(app.exe.name))
            #fileName = os.path.join(get_share_path(app), os.path.basename(app.exe.name))
            #logger.info("EXE: %s" % str(fileName))
            #inputsandbox.append(File(name=fileName))
            inputsandbox.append(app.exe)
            commandline[0] = os.path.join('.', os.path.basename(app.exe.name))
        commandline.extend([str(arg) for arg in app.args])
        logger.debug('Command line: %s: ', commandline)

        #exe_script_path = os.path.join(job.getInputWorkspace().getPath(), "exe-script.py")
        exe_script_name = 'exe-script.py'

        logger.info("Setting Command to be: '%s'" % repr(commandline))

        inputsandbox.append(
            FileBuffer(
                name=exe_script_name,
                contents=script_generator(
                    exe_script_template(),
                    #remove_unreplaced = False,
                    # ,
                    COMMAND=repr(commandline),
                    OUTPUTFILESINJECTEDCODE=getWNCodeForOutputPostprocessing(
                        job, '    ')),
                executable=True))

        contents = script_generator(
            exe_script_template(),
            COMMAND=repr(commandline),
            OUTPUTFILESINJECTEDCODE=getWNCodeForOutputPostprocessing(
                job, '    '))

        #logger.info("Script is: %s" % str(contents))

        from os.path import abspath, expanduser

        for this_file in job.inputfiles:
            if isinstance(this_file, LocalFile):
                for name in this_file.getFilenameList():
                    inputsandbox.append(File(abspath(expanduser(name))))
            elif isinstance(this_file, DiracFile):
                name = this_file.lfn
                if isinstance(input_data, list):
                    input_data.append(name)
                else:
                    input_data = [name]

        dirac_outputfiles = dirac_outputfile_jdl(outputfiles,
                                                 config['RequireDefaultSE'])

        # NOTE special case for replicas: replicate string must be empty for no
        # replication
        dirac_script = script_generator(
            diracAPI_script_template(),
            DIRAC_IMPORT='from DIRAC.Interfaces.API.Dirac import Dirac',
            DIRAC_JOB_IMPORT='from DIRAC.Interfaces.API.Job import Job',
            DIRAC_OBJECT='Dirac()',
            JOB_OBJECT='Job()',
            NAME=mangle_job_name(app),
            # os.path.basename(exe_script_path),
            EXE=exe_script_name,
            # ' '.join([str(arg) for arg in app.args]),
            EXE_ARG_STR='',
            EXE_LOG_FILE='Ganga_Executable.log',
            ENVIRONMENT=None,  # app.env,
            INPUTDATA=input_data,
            PARAMETRIC_INPUTDATA=parametricinput_data,
            OUTPUT_SANDBOX=API_nullifier(outputsandbox),
            OUTPUTFILESSCRIPT=dirac_outputfiles,
            OUTPUT_PATH="",  # job.fqid,
            SETTINGS=diracAPI_script_settings(app),
            DIRAC_OPTS=job.backend.diracOpts,
            REPLICATE='True' if config['ReplicateOutputData'] else '',
            # leave the sandbox for altering later as needs
            # to be done in backend.submit to combine master.
            # Note only using 2 #s as auto-remove 3
            INPUT_SANDBOX='##INPUT_SANDBOX##')

        #logger.info("dirac_script: %s" % dirac_script)

        #logger.info("inbox: %s" % str(unique(inputsandbox)))
        #logger.info("outbox: %s" % str(unique(outputsandbox)))

        return StandardJobConfig(dirac_script,
                                 inputbox=unique(inputsandbox),
                                 outputbox=unique(outputsandbox))
Example #44
0
def get_user_dlls(appname, version, user_release_area, platform, env):

    user_ra = user_release_area
    update_project_path(user_release_area)
    from Ganga.Utility.files import fullpath
    full_user_ra = fullpath(user_ra)  # expand any symbolic links

    # Work our way through the CMTPROJECTPATH until we find a cmt directory
    if 'CMTPROJECTPATH' not in env:
        return [], [], []
    projectdirs = env['CMTPROJECTPATH'].split(os.pathsep)
    appveruser = os.path.join(appname + '_' + version, 'cmt')
    appverrelease = os.path.join(
        appname.upper(), appname.upper() + '_' + version, 'cmt')

    for projectdir in projectdirs:
        projectDir = fullpath(os.path.join(projectdir, appveruser))
        logger.debug('Looking for projectdir %s' % projectDir)
        if os.path.exists(projectDir):
            break
        projectDir = fullpath(os.path.join(projectdir, appverrelease))
        logger.debug('Looking for projectdir %s' % projectDir)
        if os.path.exists(projectDir):
            break
    logger.debug('Using the CMT directory %s for identifying projects' % projectDir)
    # rc, showProj, m = shell.cmd1('cd ' + projectDir +';cmt show projects',
    # capture_stderr=True)
    from GangaGaudi.Lib.Applications.GaudiUtils import shellEnv_cmd
    rc, showProj, m = shellEnv_cmd('cmt show projects', env, projectDir)

    logger.debug(showProj)

    libs = []
    merged_pys = []
    subdir_pys = {}
    project_areas = []
    py_project_areas = []

    for line in showProj.split('\n'):
        for entry in line.split():
            if entry.startswith(user_ra) or entry.startswith(full_user_ra):
                tmp = entry.rstrip('\)')
                libpath = fullpath(os.path.join(tmp, 'InstallArea', platform, 'lib'))
                logger.debug(libpath)
                project_areas.append(libpath)
                pypath = fullpath(os.path.join(tmp, 'InstallArea', 'python'))
                logger.debug(pypath)
                py_project_areas.append(pypath)
                pypath = fullpath(os.path.join(tmp, 'InstallArea', platform, 'python'))
                logger.debug(pypath)
                py_project_areas.append(pypath)

    # savannah 47793 (remove multiple copies of the same areas)
    from Ganga.Utility.util import unique
    project_areas = unique(project_areas)
    py_project_areas = unique(py_project_areas)

    ld_lib_path = []
    if 'LD_LIBRARY_PATH' in env:
        ld_lib_path = env['LD_LIBRARY_PATH'].split(':')
        project_areas_dict = {}
    for area in project_areas:
        if area in ld_lib_path:
            project_areas_dict[area] = ld_lib_path.index(area)
        else:
            project_areas_dict[area] = 666
    from operator import itemgetter
    sorted_project_areas = []
    for item in sorted(project_areas_dict.items(), key=itemgetter(1)):
        sorted_project_areas.append(item[0])

    lib_names = []
    for libpath in sorted_project_areas:
        if os.path.exists(libpath):
            for f in os.listdir(libpath):
                if lib_names.count(f) > 0:
                    continue
                fpath = os.path.join(libpath, f)
                if os.path.exists(fpath):
                    lib_names.append(f)
                    libs.append(fpath)
                else:
                    logger.warning("File %s in %s does not exist. Skipping...", str(f), str(libpath))

    for pypath in py_project_areas:
        if os.path.exists(pypath):
            from GangaGaudi.Lib.Applications.GaudiUtils import pyFileCollector
            from Ganga.Utility.Config import getConfig
            configGaudi = getConfig('GAUDI')
            pyFileCollector(
                pypath, merged_pys, subdir_pys, configGaudi['pyFileCollectionDepth'])

    import pprint
    logger.debug("%s", pprint.pformat(libs))
    logger.debug("%s", pprint.pformat(merged_pys))
    logger.debug("%s", pprint.pformat(subdir_pys))

    return libs, merged_pys, subdir_pys
Example #45
0
 def getOutputSandboxFiles(self):
     """Get all output sandbox files. The duplicates are removed. """
     from Ganga.Utility.util import unique
     return unique(self.outputbox)
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):
        inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig)
        input_data,   parametricinput_data = dirac_inputdata(app)
        logger.debug("input_data: " + str(input_data))
        job = app.getJobObject()
        outputfiles = [this_file for this_file in job.outputfiles if isType(this_file, DiracFile)]

        lhcb_dirac_outputfiles = lhcbdirac_outputfile_jdl(outputfiles)

        # NOTE special case for replicas: replicate string must be empty for no
        # replication
        params = {'DIRAC_IMPORT': 'from LHCbDIRAC.Interfaces.API.DiracLHCb import DiracLHCb',
                  'DIRAC_JOB_IMPORT': 'from LHCbDIRAC.Interfaces.API.LHCbJob import LHCbJob',
                  'DIRAC_OBJECT': 'DiracLHCb()',
                  'JOB_OBJECT': 'LHCbJob()',
                  'NAME': mangle_job_name(app),
                  'INPUTDATA': input_data,
                  'PARAMETRIC_INPUTDATA': parametricinput_data,
                  'OUTPUT_SANDBOX': API_nullifier(outputsandbox),
                  'OUTPUTFILESSCRIPT' : lhcb_dirac_outputfiles,
                  'OUTPUT_PATH': "",  # job.fqid,
                  'OUTPUT_SE': getConfig('DIRAC')['DiracOutputDataSE'],
                  'SETTINGS': diracAPI_script_settings(app),
                  'DIRAC_OPTS': job.backend.diracOpts,
                  'PLATFORM': getConfig('ROOT')['arch'],
                  'REPLICATE': 'True' if getConfig('DIRAC')['ReplicateOutputData'] else '',
                  # leave the sandbox for altering later as needs
                  # to be done in backend.submit to combine master.
                  # Note only using 2 #s as auto-remove 3
                  'INPUT_SANDBOX': '##INPUT_SANDBOX##'
                  }

        scriptpath = os.path.join(get_share_path(app),
                                  os.path.basename(app.script.name))

        wrapper_path = os.path.join(job.getInputWorkspace(create=True).getPath(),
                                    'script_wrapper.py')
        python_wrapper =\
"""#!/usr/bin/env python
import os, sys
def formatVar(var):
    try:
        float(var)
        return str(var)
    except ValueError as v:
        return '\\\"%s\\\"' % str(var)

script_args = '###SCRIPT_ARGS###'

del sys.argv[sys.argv.index('script_wrapper.py')]
###FIXARGS###
if script_args == []: script_args = ''
os.system('###COMMAND###' % script_args)
###INJECTEDCODE###
"""

        python_wrapper = python_wrapper.replace('###SCRIPT_ARGS###', str('###JOINER###'.join([str(a) for a in app.args])))

        params.update({ 'APP_NAME' : 'Root',
                        'APP_VERSION' : app.version,
                        'APP_SCRIPT' : wrapper_path,
                        'APP_LOG_FILE' : 'Ganga_Root.log' })

        #params.update({'ROOTPY_SCRIPT': wrapper_path,
        #               'ROOTPY_VERSION': app.version,
        #               'ROOTPY_LOG_FILE': 'Ganga_Root.log',
        #               'ROOTPY_ARGS': [str(a) for a in app.args]})

        f = open(wrapper_path, 'w')
        if app.usepython:
            python_wrapper = script_generator(python_wrapper,
                                              remove_unreplaced=False,
                                              FIXARGS='',
                                              COMMAND='/usr/bin/env python %s %s' % (os.path.basename(app.script.name), '%s'),
                                              JOINER=' ',
                                              #INJECTEDCODE = getWNCodeForOutputPostprocessing(job,'')
                                              )


        else:
            python_wrapper = script_generator(python_wrapper,
                                              remove_unreplaced=False,
                                              FIXARGS='script_args=[formatVar(v) for v in script_args]',
                                              COMMAND='export DISPLAY=\"localhoast:0.0\" && root -l -q \"%s(%s)\"' % (os.path.basename(app.script.name), '%s'),
                                              JOINER=',',
                                              #INJECTEDCODE = getWNCodeForOutputPostprocessing(job,'')
                                              )

        f.write(python_wrapper)
        f.close()

        dirac_script = script_generator(lhcbdiracAPI_script_template(), **params)
        return StandardJobConfig(dirac_script,
                                 inputbox=unique(inputsandbox),
                                 outputbox=unique(outputsandbox))
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):
        """
        This function prepares the application of the actual job being submitted, master or not
        Args:
            app (IApplication): This is the application actually being submitted belonging to the master or sub job being configured
            appsubconfig (tuple): This is used to prepare the inputsandbox according to the configuration for each subjob if it varies
            appmasterconfig (tuple): This is also used to prepare the inputsandbox but contains the config of the app for the master job
            jobmasterconfig (StandardJobConfig): This is the configuration of the master job which may or may not be the same job as owning the app
        """

        # Construct some common objects used in job submission here
        inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig)
        input_data, parametricinput_data = dirac_inputdata(app, hasOtherInputData=True)

        job = app.getJobObject()

        # Construct the im3shape-script which is used by this job. i.e. the script and full command line to be used in this job
        exe_script_name = "im3shape-script.py"
        output_filename = os.path.basename(job.inputdata[0].lfn) + "." + str(app.rank) + "." + str(app.size)
        im3shape_args = " ".join(
            [
                os.path.basename(job.inputdata[0].lfn),
                os.path.basename(app.ini_location.namePattern),  # input.fz, config.ini
                app.catalog,
                output_filename,  # catalog, output
                str(app.rank),
                str(app.size),
            ]
        )

        full_cmd = app.exe_name + " " + im3shape_args

        outputfiles = [this_file for this_file in job.outputfiles if isinstance(this_file, DiracFile)]

        inputsandbox.append(
            FileBuffer(
                name=exe_script_name,
                contents=script_generator(
                    Im3Shape_script_template(),
                    ## ARGS for app from job.app
                    RUN_DIR=app.run_dir,
                    BLACKLIST=os.path.basename(app.blacklist.namePattern),
                    COMMAND=full_cmd,
                    ## Stuff for Ganga
                    OUTPUTFILES=repr([this_file.namePattern for this_file in job.outputfiles]),
                    OUTPUTFILESINJECTEDCODE=getWNCodeForOutputPostprocessing(job, "    "),
                ),
                executable=True,
            )
        )

        # TODO once there is a common, IApplication.getMeFilesForThisApp function replace this list with a getter ad it shouldn't really be hard-coded
        app_file_list = [app.im3_location, app.ini_location, app.blacklist]

        app_file_list = [this_file for this_file in app_file_list if isinstance(this_file, DiracFile)]
        job.inputfiles.extend(app_file_list)

        # Slightly mis-using this here but it would be nice to have these files
        # job.inputfiles.extend(job.inputdata)

        # NOTE special case for replicas: replicate string must be empty for no
        # replication
        dirac_script = script_generator(
            diracAPI_script_template(),
            DIRAC_IMPORT="from DIRAC.Interfaces.API.Dirac import Dirac",
            DIRAC_JOB_IMPORT="from DIRAC.Interfaces.API.Job import Job",
            DIRAC_OBJECT="Dirac()",
            JOB_OBJECT="Job()",
            NAME=mangle_job_name(app),
            EXE=exe_script_name,
            EXE_ARG_STR="",
            EXE_LOG_FILE="Ganga_Executable.log",
            ENVIRONMENT=None,
            INPUTDATA=input_data,
            PARAMETRIC_INPUTDATA=parametricinput_data,
            OUTPUT_SANDBOX=API_nullifier(outputsandbox),
            OUTPUTFILESSCRIPT=dirac_outputfile_jdl(outputfiles, False),
            OUTPUT_PATH="",  # job.fqid,
            SETTINGS=diracAPI_script_settings(app),
            DIRAC_OPTS=job.backend.diracOpts,
            REPLICATE="True" if getConfig("DIRAC")["ReplicateOutputData"] else "",
            # leave the sandbox for altering later as needs
            # to be done in backend.submit to combine master.
            # Note only using 2 #s as auto-remove 3
            INPUT_SANDBOX="##INPUT_SANDBOX##",
        )

        return StandardJobConfig(dirac_script, inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
Example #48
0
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):

        logger.debug("Prepare")

        inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig,
                                                      appmasterconfig,
                                                      jobmasterconfig)

        job = stripProxy(app).getJobObject()

        if job.inputdata:
            if not job.splitter:
                if len(job.inputdata) > 100:
                    raise BackendError(
                        "You're submitting a job to Dirac with no splitter and more than 100 files, please add a splitter and try again!"
                    )

        outputfiles = [
            this_file for this_file in job.outputfiles
            if isType(this_file, DiracFile)
        ]

        data_str = 'import os\n'
        data_str += 'execfile(\'data.py\')\n'

        if hasattr(job, '_splitter_data'):
            data_str += job._splitter_data
        inputsandbox.append(FileBuffer('data-wrapper.py', data_str))

        input_data = []

        # Cant wait to get rid of this when people no-longer specify
        # inputdata in options file
        #######################################################################
        # splitters ensure that subjobs pick up inputdata from job over that in
        # optsfiles but need to take care of unsplit jobs
        if not job.master:
            share_path = os.path.join(get_share_path(app), 'inputdata',
                                      'options_data.pkl')

            if not job.inputdata:
                if os.path.exists(share_path):
                    f = open(share_path, 'r+b')
                    job.inputdata = pickle.load(f)
                    f.close()

        #######################################################################

        # Cant wait to get rid of this when people no-longer specify
        # outputsandbox or outputdata in options file
        #######################################################################
        share_path = os.path.join(get_share_path(app), 'output',
                                  'options_parser.pkl')

        if os.path.exists(share_path):
            #        if not os.path.exists(share_path):
            # raise GangaException('could not find the parser')
            f = open(share_path, 'r+b')
            parser = pickle.load(f)
            f.close()

            outbox, outdata = parser.get_output(job)

            from Ganga.GPIDev.Lib.File import FileUtils
            from Ganga.GPIDev.Base.Filters import allComponentFilters

            fileTransform = allComponentFilters['gangafiles']
            outdata_files = [
                fileTransform(this_file, None) for this_file in outdata
                if not FileUtils.doesFileExist(this_file, job.outputfiles)
            ]
            job.non_copyable_outputfiles.extend([
                output_file for output_file in outdata_files
                if not isType(output_file, DiracFile)
            ])
            outbox_files = [
                fileTransform(this_file, None) for this_file in outbox
                if not FileUtils.doesFileExist(this_file, job.outputfiles)
            ]
            job.non_copyable_outputfiles.extend([
                outbox_file for outbox_file in outbox_files
                if not isType(outbox_file, DiracFile)
            ])

            outputsandbox = [
                f.namePattern for f in job.non_copyable_outputfiles
            ]

            outputsandbox.extend([
                f.namePattern for f in job.outputfiles
                if not isType(f, DiracFile)
            ])
            outputsandbox = unique(outputsandbox)  # + outbox[:])
        #######################################################################

        input_data_dirac, parametricinput_data = dirac_inputdata(
            job.application)

        if input_data_dirac is not None:
            for f in input_data_dirac:
                if isType(f, DiracFile):
                    input_data.append(f.lfn)
                elif isType(f, str):
                    input_data.append(f)
                else:
                    raise ApplicationConfigurationError(
                        "Don't know How to handle anythig other than DiracFiles or strings to LFNs!"
                    )

        commandline = "python ./gaudipython-wrapper.py"
        if is_gaudi_child(app):
            commandline = 'gaudirun.py '
            commandline += ' '.join([str(arg) for arg in app.args])
            commandline += ' options.pkl data-wrapper.py'
        logger.debug('Command line: %s: ', commandline)

        gaudi_script_path = os.path.join(job.getInputWorkspace().getPath(),
                                         "gaudi-script.py")

        script_generator(
            gaudi_script_template(),
            #remove_unreplaced = False,
            outputfile_path=gaudi_script_path,
            PLATFORM=app.platform,
            COMMAND=commandline,
            XMLSUMMARYPARSING=getXMLSummaryScript()  # ,
            #OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, '    ')
        )

        #logger.debug( "input_data %s" % str( input_data ) )

        # We want to propogate the ancestor depth to DIRAC when we have
        # inputdata set
        if job.inputdata is not None and isType(job.inputdata, LHCbDataset):

            # As the RT Handler we already know we have a Dirac backend
            if type(job.backend.settings) is not dict:
                raise ApplicationConfigurationError(
                    None, 'backend.settings should be a dict')

            if 'AncestorDepth' in job.backend.settings:
                ancestor_depth = job.backend.settings['AncestorDepth']
            else:
                ancestor_depth = job.inputdata.depth
        else:
            ancestor_depth = 0

        lhcbdirac_script_template = lhcbdiracAPI_script_template()

        lhcb_dirac_outputfiles = lhcbdirac_outputfile_jdl(outputfiles)

        # not necessary to use lhcbdiracAPI_script_template any more as doing our own uploads to Dirac
        # remove after Ganga6 release
        # NOTE special case for replicas: replicate string must be empty for no
        # replication
        dirac_script = script_generator(
            lhcbdirac_script_template,
            DIRAC_IMPORT=
            'from LHCbDIRAC.Interfaces.API.DiracLHCb import DiracLHCb',
            DIRAC_JOB_IMPORT=
            'from LHCbDIRAC.Interfaces.API.LHCbJob import LHCbJob',
            DIRAC_OBJECT='DiracLHCb()',
            JOB_OBJECT='LHCbJob()',
            NAME=mangle_job_name(app),
            APP_NAME=stripProxy(app).appname,
            APP_VERSION=app.version,
            APP_SCRIPT=gaudi_script_path,
            APP_LOG_FILE='Ganga_%s_%s.log' %
            (stripProxy(app).appname, app.version),
            INPUTDATA=input_data,
            PARAMETRIC_INPUTDATA=parametricinput_data,
            OUTPUT_SANDBOX=API_nullifier(outputsandbox),
            OUTPUTFILESSCRIPT=lhcb_dirac_outputfiles,
            # job.fqid,#outputdata_path,
            OUTPUT_PATH="",
            OUTPUT_SE=getConfig('DIRAC')['DiracOutputDataSE'],
            SETTINGS=diracAPI_script_settings(job.application),
            DIRAC_OPTS=job.backend.diracOpts,
            PLATFORM=app.platform,
            REPLICATE='True'
            if getConfig('DIRAC')['ReplicateOutputData'] else '',
            ANCESTOR_DEPTH=ancestor_depth,
            ## This is to be modified in the final 'submit' function in the backend
            ## The backend also handles the inputfiles DiracFiles ass appropriate
            INPUT_SANDBOX='##INPUT_SANDBOX##')
        logger.debug("prepare: LHCbGaudiDiracRunTimeHandler")

        return StandardJobConfig(dirac_script,
                                 inputbox=unique(inputsandbox),
                                 outputbox=unique(outputsandbox))
Example #49
0
def dirac_inputdata(app):
    job = stripProxy(app).getJobObject()
    input_data = None
    parametricinput_data = None

    inputLFNs = []

    if hasattr(job.inputdata, 'getLFNs'):
        inputLFNs = job.inputdata.getLFNs()

    if job.master:
        logger.debug("job.master.inputdata: %s " % str(job.master.inputdata))
    logger.debug("job.inputdata: %s" % str(job.inputdata))
    if hasattr(job.inputdata, 'getLFNs'):
        logger.debug("getLFNs(): %s" % job.inputdata.getLFNs())

    has_input_DiracFile = False
    for this_file in job.inputfiles:
        if isType(this_file, DiracFile):
            has_input_DiracFile = True
            break
    if job.master and not has_input_DiracFile:
        for this_file in job.master.inputfiles:
            if isType(this_file, DiracFile):
                has_input_DiracFile = True
                break

    if len(inputLFNs) > 0:
        # master job with a splitter reaching prepare, hence bulk submit
        if not job.master and job.splitter:
            parametricinput_data = dirac_parametric_split(app)
            if parametricinput_data is not None and len(
                    parametricinput_data) > getConfig(
                        'DIRAC')['MaxDiracBulkJobs']:
                raise BackendError(
                    'Dirac',
                    'Number of bulk submission jobs \'%s\' exceeds the maximum allowed \'%s\' if more are needed please modify your config. Note there is a hard limit in Dirac of currently 1000.'
                    % (len(parametricinput_data),
                       getConfig('DIRAC')['MaxDiracBulkJobs']))
        # master job with no splitter or subjob already split proceed as normal
        else:
            input_data = job.inputdata.getLFNs()

    elif 'Destination' not in job.backend.settings and not has_input_DiracFile:
        ##THIS IS NOT VERY DIRAC CENTRIC
        ##PLEASE WHEN TIME MOVE TO LHCBDIRAC where T1 is more applicable rcurrie
        ##Also editing the settings on the fly is asking for potential problems, should avoid
        t1_sites = getConfig('DIRAC')['noInputDataBannedSites']
        logger.info(
            'Job has no inputdata (T1 sites will be banned to help avoid overloading them).'
        )
        if 'BannedSites' in job.backend.settings:
            job.backend.settings['BannedSites'].extend(t1_sites)
            job.backend.settings['BannedSites'] = unique(
                job.backend.settings['BannedSites'])
        else:
            job.backend.settings['BannedSites'] = t1_sites[:]

    #import traceback
    # traceback.print_stack()

    return input_data, parametricinput_data
Example #50
0
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):
        """
        This function prepares the application of the actual job being submitted, master or not
        Args:
            app (IApplication): This is the application actually being submitted belonging to the master or sub job being configured
            appsubconfig (tuple): This is used to prepare the inputsandbox according to the configuration for each subjob if it varies
            appmasterconfig (tuple): This is also used to prepare the inputsandbox but contains the config of the app for the master job
            jobmasterconfig (StandardJobConfig): This is the configuration of the master job which may or may not be the same job as owning the app
        """

        # Construct some common objects used in job submission here
        inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig)
        input_data,   parametricinput_data = dirac_inputdata(app, hasOtherInputData=True)


        job = app.getJobObject()

        # Construct the im3shape-script which is used by this job. i.e. the script and full command line to be used in this job
        exe_script_name = 'im3shape-script.py'
        output_filename = os.path.basename(job.inputdata[0].lfn) + '.' + str(app.rank) + '.' + str(app.size)
        im3shape_args = ' '.join([ os.path.basename(job.inputdata[0].lfn), os.path.basename(app.ini_location.namePattern), # input.fz, config.ini
                                   app.catalog, output_filename, # catalog, output
                                   str(app.rank), str(app.size) ])

        full_cmd = app.exe_name + ' ' + im3shape_args

        outputfiles = [this_file for this_file in job.outputfiles if isinstance(this_file, DiracFile)]

        inputsandbox.append(FileBuffer( name=exe_script_name,
                                        contents=script_generator(Im3Shape_script_template(),
                                                                  ## ARGS for app from job.app
                                                                  RUN_DIR = app.run_dir,
                                                                  BLACKLIST = os.path.basename(app.blacklist.namePattern),
                                                                  COMMAND = full_cmd,
                                                                  ## Stuff for Ganga
                                                                  OUTPUTFILES = repr([this_file.namePattern for this_file in job.outputfiles]),
                                                                  OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, '    '),
                                                                 ),
                                        executable=True)
                            )

        # TODO once there is a common, IApplication.getMeFilesForThisApp function replace this list with a getter ad it shouldn't really be hard-coded
        app_file_list = [app.im3_location, app.ini_location, app.blacklist]

        app_file_list = [this_file for this_file in app_file_list if isinstance(this_file, DiracFile)]
        job.inputfiles.extend(app_file_list)

        # Slightly mis-using this here but it would be nice to have these files
        #job.inputfiles.extend(job.inputdata)

        # NOTE special case for replicas: replicate string must be empty for no
        # replication
        dirac_script = script_generator(diracAPI_script_template(),
                DIRAC_IMPORT = 'from DIRAC.Interfaces.API.Dirac import Dirac',
                DIRAC_JOB_IMPORT = 'from DIRAC.Interfaces.API.Job import Job',
                DIRAC_OBJECT = 'Dirac()',
                JOB_OBJECT = 'Job()',
                NAME = mangle_job_name(app),
                EXE = exe_script_name,
                EXE_ARG_STR = '',
                EXE_LOG_FILE = 'Ganga_Executable.log',
                ENVIRONMENT = None,
                INPUTDATA = input_data,
                PARAMETRIC_INPUTDATA = parametricinput_data,
                OUTPUT_SANDBOX = API_nullifier(outputsandbox),
                OUTPUTFILESSCRIPT = dirac_outputfile_jdl(outputfiles, False),
                OUTPUT_PATH = "",  # job.fqid,
                SETTINGS = diracAPI_script_settings(app),
                DIRAC_OPTS = job.backend.diracOpts,
                REPLICATE = 'True' if getConfig('DIRAC')['ReplicateOutputData'] else '',
                # leave the sandbox for altering later as needs
                # to be done in backend.submit to combine master.
                # Note only using 2 #s as auto-remove 3
                INPUT_SANDBOX = '##INPUT_SANDBOX##'
                )


        return StandardJobConfig(dirac_script,
                inputbox=unique(inputsandbox),
                outputbox=unique(outputsandbox))
Example #51
0
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):

        logger.debug("Prepare")

        inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig,
                                                      appmasterconfig,
                                                      jobmasterconfig)

        job = app.getJobObject()

        logger.debug("Loading pickle files")

        #outputfiles=set([file.namePattern for file in job.outputfiles]).difference(set(getOutputSandboxPatterns(job)))
        # Cant wait to get rid of this when people no-longer specify
        # inputdata in options file
        #######################################################################
        # splitters ensure that subjobs pick up inputdata from job over that in
        # optsfiles but need to take sare of unsplit jobs
        if not job.master:
            share_path = os.path.join(get_share_path(app), 'inputdata',
                                      'options_data.pkl')

            if not job.inputdata:
                if os.path.exists(share_path):
                    f = open(share_path, 'r+b')
                    job.inputdata = pickle.load(f)
                    f.close()

        #######################################################################
        # Cant wait to get rid of this when people no-longer specify
        # outputsandbox or outputdata in options file
        #######################################################################
        share_path = os.path.join(get_share_path(app), 'output',
                                  'options_parser.pkl')

        logger.debug("Adding info from pickle files")

        if os.path.exists(share_path):
            f = open(share_path, 'r+b')
            parser = pickle.load(f)
            f.close()

            outbox, outdata = parser.get_output(job)

            from Ganga.GPIDev.Lib.File import FileUtils
            from Ganga.GPIDev.Base.Filters import allComponentFilters

            fileTransform = allComponentFilters['gangafiles']
            job.non_copyable_outputfiles.extend([
                fileTransform(this_file, None) for this_file in outdata
                if not FileUtils.doesFileExist(this_file, job.outputfiles)
            ])
            job.non_copyable_outputfiles.extend([
                fileTransform(this_file, None) for this_file in outbox
                if not FileUtils.doesFileExist(this_file, job.outputfiles)
            ])

            outputsandbox.extend(
                [f.namePattern for f in job.non_copyable_outputfiles])

            outputsandbox.extend([f.namePattern for f in job.outputfiles])
            outputsandbox = unique(outputsandbox)
        #######################################################################

        logger.debug("Doing XML Catalog stuff")

        data = job.inputdata
        data_str = ''
        if data:
            logger.debug("Returning options String")
            data_str = data.optionsString()
            if data.hasLFNs():
                logger.debug("Returning Catalogue")
                inputsandbox.append(
                    FileBuffer('catalog.xml', data.getCatalog()))
                cat_opts = '\nfrom Gaudi.Configuration import FileCatalog\nFileCatalog().Catalogs = ["xmlcatalog_file:catalog.xml"]\n'
                data_str += cat_opts

        logger.debug("Doing splitter_data stuff")
        if hasattr(job, '_splitter_data'):
            data_str += job._splitter_data
        inputsandbox.append(FileBuffer('data.py', data_str))

        logger.debug("Doing GaudiPython stuff")

        cmd = 'python ./gaudipython-wrapper.py'
        opts = ''
        if is_gaudi_child(job.application):
            opts = 'options.pkl'
            cmd = 'gaudirun.py ' + \
                ' '.join(job.application.args) + ' %s data.py' % opts

        logger.debug("Setting up script")

        script = script_generator(
            create_runscript(job.application.newStyleApp),
            remove_unreplaced=False,
            OPTS=opts,
            PROJECT_OPTS=job.application.setupProjectOptions,
            APP_NAME=job.application.appname,
            APP_VERSION=job.application.version,
            APP_PACKAGE=job.application.package,
            PLATFORM=job.application.platform,
            CMDLINE=cmd,
            XMLSUMMARYPARSING=getXMLSummaryScript())  # ,
        # OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, ''))

        logger.debug("Returning StandardJobConfig")

        return StandardJobConfig(FileBuffer('gaudi-script.py',
                                            script,
                                            executable=1),
                                 inputbox=unique(inputsandbox),
                                 outputbox=unique(outputsandbox))
Example #52
0
    def master_prepare(self, app, appmasterconfig):
        """
        Prepare the RTHandler for the master job so that applications to be submitted
        Args:
            app (GaudiExec): This application is only expected to handle GaudiExec Applications here
            appmasterconfig (unknown): Output passed from the application master configuration call
        """

        cred_req = app.getJobObject().backend.credential_requirements
        check_creds(cred_req)

        inputsandbox, outputsandbox = master_sandbox_prepare(
            app, appmasterconfig)

        # If we are getting the metadata we need to make sure the summary.xml is added to the output sandbox if not there already.
        if app.getMetadata and not 'summary.xml' in outputsandbox:
            outputsandbox += ['summary.xml']

        # Check a previously uploaded input is there in case of a job copy
        if isinstance(app.uploadedInput, DiracFile):
            if app.uploadedInput.getReplicas() == {}:
                app.uploadedInput = None
                logger.info(
                    "Previously uploaded cmake target missing from Dirac. Uploading it again."
                )

        if not isinstance(app.uploadedInput, DiracFile):
            generateDiracInput(app)
            try:
                assert isinstance(app.uploadedInput, DiracFile)
            except AssertionError:
                raise ApplicationPrepareError(
                    "Failed to upload needed file, aborting submit. Tried to upload to: %s\nIf your Ganga installation is not at CERN your username may be trying to create a non-existent LFN. Try setting the 'DIRAC' configuration 'DiracLFNBase' to your grid user path.\n"
                    % DiracFile.diracLFNBase(cred_req))

        rep_data = app.uploadedInput.getReplicas()
        try:
            assert rep_data != {}
        except AssertionError:
            raise ApplicationPrepareError(
                "Failed to find a replica of uploaded file, aborting submit")

        if isinstance(app.jobScriptArchive, (DiracFile, LocalFile)):
            app.jobScriptArchive = None

        generateDiracScripts(app)

        try:
            assert isinstance(app.jobScriptArchive, DiracFile)
        except AssertionError:
            raise ApplicationPrepareError(
                "Failed to upload needed file, aborting submit")
        rep_data = app.jobScriptArchive.getReplicas()
        try:
            assert rep_data != {}
        except AssertionError:
            raise ApplicationPrepareError(
                "Failed to find a replica, aborting submit")

        #Create a replica of the job and scripts files
        replicateJobFile(app.jobScriptArchive)
        replicateJobFile(app.uploadedInput)

        return StandardJobConfig(inputbox=unique(inputsandbox),
                                 outputbox=unique(outputsandbox))
Example #53
0
def get_user_dlls(appname,version,user_release_area,platform,shell):

  user_ra = user_release_area
  update_cmtproject_path(user_release_area)
  full_user_ra = fullpath(user_ra) # expand any symbolic links

  # Work our way through the CMTPATH until we find a cmt directory
  if not shell.env.has_key('CMTPATH'): return [], [], []
  projectdirs = shell.env['CMTPATH'].split(os.pathsep)
  #appveruser = os.path.join(appname + '_' + version,'cmt')
  appveruser = os.path.join(version,'cmt')
  #appverrelease = os.path.join(appname.upper(),appname.upper() + '_' + version,
  appverrelease = os.path.join('cmt')

  for projectdir in projectdirs:
    dir = fullpath(os.path.join(projectdir,appveruser))
    logger.error('Looking for projectdir %s' % dir)
    if os.path.exists(dir):
      break
    dir = fullpath(os.path.join(projectdir,appverrelease))
    logger.error('Looking for projectdir %s' % dir)
    if os.path.exists(dir):
      break

  logger.error('Using the CMT directory %s for identifying projects' % dir)
  rc, showProj, m = shell.cmd1('cd ' + dir +';cmt show projects', 
                               capture_stderr=True)

  logger.error(showProj)

  libs=[]
  merged_pys = []
  subdir_pys = {}
  project_areas = []
  py_project_areas = []

  for line in showProj.split('\n'):
    for entry in line.split():
      if entry.startswith(user_ra) or entry.startswith(full_user_ra):
        tmp = entry.rstrip('\)')
        libpath = fullpath(os.path.join(tmp,'InstallArea',platform,'lib'))
        logger.debug(libpath)
        project_areas.append(libpath)
        pypath = fullpath(os.path.join(tmp,'InstallArea','python'))
        logger.debug(pypath)
        py_project_areas.append(pypath)
        pypath = fullpath(os.path.join(tmp,'InstallArea',platform,'python'))
        logger.debug(pypath)
        py_project_areas.append(pypath)

  # savannah 47793 (remove multiple copies of the same areas)
  project_areas = unique(project_areas)
  py_project_areas = unique(py_project_areas)

  ld_lib_path = []
  if shell.env.has_key('LD_LIBRARY_PATH'):
    ld_lib_path = shell.env['LD_LIBRARY_PATH'].split(':')
  project_areas_dict = {}
  for area in project_areas:
    if area in ld_lib_path:
      project_areas_dict[area] = ld_lib_path.index(area)
    else:
      project_areas_dict[area] = 666
  from operator import itemgetter
  sorted_project_areas = []
  for item in sorted(project_areas_dict.items(),key=itemgetter(1)):
    sorted_project_areas.append(item[0])
  
  lib_names = []  
  for libpath in sorted_project_areas:
    if os.path.exists(libpath):
      for f in os.listdir(libpath):
        if lib_names.count(f) > 0: continue
        fpath = os.path.join(libpath,f)
        if os.path.exists(fpath):
          lib_names.append(f)
          libs.append(fpath)
        else:
          logger.warning("File %s in %s does not exist. Skipping...",
                         str(f),str(libpath))

  for pypath in py_project_areas:
    if os.path.exists( pypath):
      for f in os.listdir( pypath):
        confDB_path = os.path.join( pypath, f)
        if confDB_path.endswith( '.py'):
          if os.path.exists( confDB_path):
            merged_pys.append( confDB_path)
          else:
            logger.warning("File %s in %s does not exist. Skipping...",
                           str(f),str(confDB_path))
        elif os.path.isdir(confDB_path):
          pyfiles = []
          for g in os.listdir(confDB_path):
            file_path = os.path.join(confDB_path, g)
            if (file_path.endswith('.py')):
              if os.path.exists(file_path):
                pyfiles.append(file_path)
              else:
                logger.warning("File %s in %s does not exist. Skipping...",
                               str(g),str(f))
          subdir_pys[ f] = pyfiles
                    
  logger.debug("%s",pprint.pformat( libs))
  logger.debug("%s",pprint.pformat( merged_pys))
  logger.debug("%s",pprint.pformat( subdir_pys))

  return libs, merged_pys, subdir_pys
Example #54
0
 def master_prepare(self, app, appmasterconfig):
     inputsandbox, outputsandbox = master_sandbox_prepare(
         app, appmasterconfig, ['inputsandbox'])
     return StandardJobConfig(inputbox=unique(inputsandbox),
                              outputbox=unique(outputsandbox))
Example #55
0
 def master_prepare(self, app, appmasterconfig):
     inputsandbox, outputsandbox = master_sandbox_prepare(app, appmasterconfig, ['inputsandbox'])
     return StandardJobConfig(inputbox=unique(inputsandbox),
                              outputbox=unique(outputsandbox))
Example #56
0
 def getOutputSandboxFiles(self):
     """Get all output sandbox files. The duplicates are removed. """
     from Ganga.Utility.util import unique
     return unique(self.outputbox)
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):

        inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig)

        job = app.getJobObject()

        if job.inputdata:
            if not job.splitter:
                if len(job.inputdata) > 100:
                    raise BackendError(
                        "You're submitting a job to Dirac with no splitter and more than 100 files, please add a splitter and try again!"
                    )

        outputfiles = [this_file for this_file in job.outputfiles if isType(this_file, DiracFile)]

        data_str = "import os\n"
        data_str += "execfile('data.py')\n"

        if hasattr(job, "_splitter_data"):
            data_str += job._splitter_data
        inputsandbox.append(FileBuffer("data-wrapper.py", data_str))

        input_data = []

        # Cant wait to get rid of this when people no-longer specify
        # inputdata in options file
        #######################################################################
        # splitters ensure that subjobs pick up inputdata from job over that in
        # optsfiles but need to take care of unsplit jobs
        if not job.master:
            share_path = os.path.join(get_share_path(app), "inputdata", "options_data.pkl")

            if not job.inputdata:
                if os.path.exists(share_path):
                    f = open(share_path, "r+b")
                    job.inputdata = pickle.load(f)
                    f.close()

        #######################################################################

        # Cant wait to get rid of this when people no-longer specify
        # outputsandbox or outputdata in options file
        #######################################################################
        share_path = os.path.join(get_share_path(app), "output", "options_parser.pkl")

        if os.path.exists(share_path):
            #        if not os.path.exists(share_path):
            # raise GangaException('could not find the parser')
            f = open(share_path, "r+b")
            parser = pickle.load(f)
            f.close()

            outbox, outdata = parser.get_output(job)

            from Ganga.GPIDev.Lib.File import FileUtils
            from Ganga.GPIDev.Base.Filters import allComponentFilters

            fileTransform = allComponentFilters["gangafiles"]
            outdata_files = [
                fileTransform(this_file, None)
                for this_file in outdata
                if not FileUtils.doesFileExist(this_file, job.outputfiles)
            ]
            job.non_copyable_outputfiles.extend(
                [output_file for output_file in outdata_files if not isType(output_file, DiracFile)]
            )
            outbox_files = [
                fileTransform(this_file, None)
                for this_file in outbox
                if not FileUtils.doesFileExist(this_file, job.outputfiles)
            ]
            job.non_copyable_outputfiles.extend(
                [outbox_file for outbox_file in outbox_files if not isType(outbox_file, DiracFile)]
            )

            outputsandbox = [f.namePattern for f in job.non_copyable_outputfiles]

            outputsandbox.extend([f.namePattern for f in job.outputfiles if not isType(f, DiracFile)])
            outputsandbox = unique(outputsandbox)  # + outbox[:])
        #######################################################################

        input_data_dirac, parametricinput_data = dirac_inputdata(job.application)

        if input_data_dirac is not None:
            for f in input_data_dirac:
                if isType(f, DiracFile):
                    input_data.append(f.lfn)
                elif isType(f, str):
                    input_data.append(f)
                else:
                    raise ApplicationConfigurationError(
                        "Don't know How to handle anythig other than DiracFiles or strings to LFNs!"
                    )

        commandline = "python ./gaudipython-wrapper.py"
        if is_gaudi_child(app):
            commandline = "gaudirun.py "
            commandline += " ".join([str(arg) for arg in app.args])
            commandline += " options.pkl data-wrapper.py"
        logger.debug("Command line: %s: ", commandline)

        gaudi_script_path = os.path.join(job.getInputWorkspace().getPath(), "gaudi-script.py")

        script_generator(
            gaudi_script_template(),
            # remove_unreplaced = False,
            outputfile_path=gaudi_script_path,
            PLATFORM=app.platform,
            COMMAND=commandline,
            XMLSUMMARYPARSING=getXMLSummaryScript()  # ,
            # OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, '    ')
        )

        # logger.debug( "input_data %s" % str( input_data ) )

        # We want to propogate the ancestor depth to DIRAC when we have
        # inputdata set
        if job.inputdata is not None and isType(job.inputdata, LHCbDataset):

            # As the RT Handler we already know we have a Dirac backend
            if type(job.backend.settings) is not dict:
                raise ApplicationConfigurationError(None, "backend.settings should be a dict")

            if "AncestorDepth" in job.backend.settings:
                ancestor_depth = job.backend.settings["AncestorDepth"]
            else:
                ancestor_depth = job.inputdata.depth
        else:
            ancestor_depth = 0

        lhcbdirac_script_template = lhcbdiracAPI_script_template()

        lhcb_dirac_outputfiles = lhcbdirac_outputfile_jdl(outputfiles)

        # not necessary to use lhcbdiracAPI_script_template any more as doing our own uploads to Dirac
        # remove after Ganga6 release
        # NOTE special case for replicas: replicate string must be empty for no
        # replication
        dirac_script = script_generator(
            lhcbdirac_script_template,
            DIRAC_IMPORT="from LHCbDIRAC.Interfaces.API.DiracLHCb import DiracLHCb",
            DIRAC_JOB_IMPORT="from LHCbDIRAC.Interfaces.API.LHCbJob import LHCbJob",
            DIRAC_OBJECT="DiracLHCb()",
            JOB_OBJECT="LHCbJob()",
            NAME=mangle_job_name(app),
            APP_NAME=app.appname,
            APP_VERSION=app.version,
            APP_SCRIPT=gaudi_script_path,
            APP_LOG_FILE="Ganga_%s_%s.log" % (app.appname, app.version),
            INPUTDATA=input_data,
            PARAMETRIC_INPUTDATA=parametricinput_data,
            OUTPUT_SANDBOX=API_nullifier(outputsandbox),
            OUTPUTFILESSCRIPT=lhcb_dirac_outputfiles,
            # job.fqid,#outputdata_path,
            OUTPUT_PATH="",
            OUTPUT_SE=getConfig("DIRAC")["DiracOutputDataSE"],
            SETTINGS=diracAPI_script_settings(job.application),
            DIRAC_OPTS=job.backend.diracOpts,
            PLATFORM=app.platform,
            REPLICATE="True" if getConfig("DIRAC")["ReplicateOutputData"] else "",
            ANCESTOR_DEPTH=ancestor_depth,
            ## This is to be modified in the final 'submit' function in the backend
            ## The backend also handles the inputfiles DiracFiles ass appropriate
            INPUT_SANDBOX="##INPUT_SANDBOX##",
        )
        logger.debug("prepare: LHCbGaudiDiracRunTimeHandler")

        return StandardJobConfig(dirac_script, inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
Example #58
0
class Gaudi(Francesc):

    _name = 'Gaudi'
    __doc__ = GaudiDocString(_name)
    _category = 'applications'
    _exportmethods = ['getenv','getpack', 'make', 'cmt', 'register', 'add_output_dir', 'get_output_dir', 'add_dataset_name', 'get_dataset_name']

    schema = get_common_gaudi_schema()
    docstr = 'The name of the optionsfile. Import statements in the file ' \
             'will be expanded at submission time and a full copy made'
    schema['optsfile'] =  FileItem(sequence=1,strict_sequence=0,defvalue=[],
                                   doc=docstr)
    docstr = 'The name of the rec optionsfile. '
    schema['recoptsfile'] =  FileItem(sequence=1,strict_sequence=0,defvalue=[],
                                   doc=docstr)
    docstr = 'The name of the ana optionsfile. '
    schema['anaoptsfile'] =  FileItem(sequence=1,strict_sequence=0,defvalue=[],
                                   doc=docstr)
    docstr = 'The name of the Gaudi application (e.g. "DaVinci", "Gauss"...)'
    schema['appname'] = SimpleItem(defvalue=None,typelist=['str','type(None)'],
                                   hidden=1,doc=docstr)
    schema['configured'] = SimpleItem(defvalue=None,hidden=0,copyable=0,
                                      typelist=['str','type(None)'])
    docstr = 'A python configurable string that will be appended to the '  \
             'end of the options file. Can be multiline by using a '  \
             'notation like \nHistogramPersistencySvc().OutputFile = '  \
             '\"myPlots.root"\\nEventSelector().PrintFreq = 100\n or by '  \
             'using triple quotes around a multiline string.'
    schema['extraopts'] = SimpleItem(defvalue=None,
                                     typelist=['str','type(None)'],doc=docstr)
    docstr = 'User metadata'
    schema['metadata'] = SimpleItem(defvalue={},doc=docstr)
    docstr = 'Task name'
    schema['taskname'] = SimpleItem(defvalue='',doc=docstr)
    docstr = 'Long idle job'
    schema['long_idle'] = SimpleItem(defvalue=False,doc=docstr)
    docstr = 'Create dataset'
    schema['create_dataset'] = SimpleItem(defvalue=True,doc=docstr)
    docstr = 'Auto add stream number'
    schema['auto_stream'] = SimpleItem(defvalue=False,doc=docstr)
    docstr = 'Use local random trigger files'
    schema['local_rantrg'] = SimpleItem(defvalue=True,doc=docstr)
    docstr = 'Patch files'
    schema['patch'] = SimpleItem(defvalue=[],doc=docstr)
    docstr = 'Use patch for BOSS'
    schema['use_boss_patch'] = SimpleItem(defvalue=True,doc=docstr)
    docstr = 'Auto upload files'
    schema['auto_upload'] = SimpleItem(defvalue=[],doc=docstr)
    docstr = 'User workarea'
    schema['user_workarea'] = SimpleItem(defvalue='',doc=docstr)
    docstr = 'Use custom packages'
    schema['use_custom_package'] = SimpleItem(defvalue=False,doc=docstr)
    docstr = 'Output directory'
    schema['output_dir'] = SimpleItem(defvalue='GangaBoss',doc=docstr)
    docstr = 'Output root directory'
    schema['output_rootdir'] = SimpleItem(defvalue='',doc=docstr)
    docstr = 'Output data type'
    schema['output_step'] = SimpleItem(defvalue=[],doc=docstr)

    _schema = Schema(Version(2, 1), schema)

    def _auto__init__(self):
        """bootstrap Gaudi applications. If called via a subclass
        set up some basic structure like version platform..."""
        if not self.appname: return
        self._init(self.appname,True)


    def master_configure(self):
        self._validate_version()

        job = self.getJobObject()
        self._master_configure()
        inputs = self._check_inputs()
        optsfiles = [fileitem.name for fileitem in self.optsfile]
        recoptsfiles = [fileitem.name for fileitem in self.recoptsfile]
        anaoptsfiles = [fileitem.name for fileitem in self.anaoptsfile]
        try:
            parser = PythonOptionsParser(optsfiles,self.extraopts,self.shell)
            if recoptsfiles:
                recparser = PythonOptionsParser(recoptsfiles,self.extraopts,self.shell)
                if anaoptsfiles:
                    anaparser = PythonOptionsParser(anaoptsfiles,self.extraopts,self.shell)
        except ApplicationConfigurationError, e:
            debug_dir = job.getDebugWorkspace().getPath()
            f = open(debug_dir + '/gaudirun.stdout','w')
            f.write(e.message)
            f.close()
            msg = 'Unable to parse job options! Please check options ' \
                  'files and extraopts. The output from gaudyrun.py can be ' \
                  'found in %s. You can also view this from within ganga '\
                  'by doing job.peek(\'../debug/gaudirun.stdout\').' % f.name
            #logger.error(msg)
            raise ApplicationConfigurationError(None,msg)

        self.extra.master_input_buffers['options.opts'] = parser.opts_str
        if recoptsfiles:
            self.extra.master_input_buffers['recoptions.opts'] = recparser.opts_str
            if anaoptsfiles:
                self.extra.master_input_buffers['anaoptions.opts'] = anaparser.opts_str
        inputdata = parser.get_input_data()

        # If user specified a dataset, ignore optsfile data but warn the user.
        if len(inputdata.files) > 0:
            if job.inputdata:
                msg = 'A dataset was specified for this job but one was ' \
                      'also defined in the options file. Data in the options '\
                      'file will be ignored...hopefully this is OK.'
                logger.warning(msg)
            else:
                logger.info('Using the inputdata defined in the options file.')
                self.extra.inputdata = inputdata

        # only output the last step
        if not self.output_step:
            if anaoptsfiles:
                self.output_step.append('ana')
            elif recoptsfiles:
                self.output_step.append('rec')
            else:
                self.output_step.append('sim')

        # data type for each step
        simoutputsandbox,simoutputdata = parser.get_output(job)
        for temp_output in simoutputdata:
            temp_data_type = os.path.splitext(temp_output)[-1][1:]
            temp_data_type = temp_data_type.strip()
            if temp_data_type in ['rtraw']:
                self.extra.data_type['sim'] = temp_data_type

        if recoptsfiles:
            recoutputsandbox,recoutputdata = recparser.get_output(job)
            for temp_output in recoutputdata:
                temp_data_type = os.path.splitext(temp_output)[-1][1:]
                temp_data_type = temp_data_type.strip()
                if temp_data_type in ['dst', 'rec']:
                    self.extra.data_type['rec'] = temp_data_type

            if anaoptsfiles:
                anaoutputsandbox,anaoutputdata = anaparser.get_output(job)
                self.extra.ana_file_nos = anaparser.get_ana_file_nos()
                for temp_output in anaoutputdata:
                    temp_data_type = os.path.splitext(temp_output)[-1][1:]
                    temp_data_type = temp_data_type.strip()
                    if temp_data_type in ['root']:
                        self.extra.data_type['ana'] = temp_data_type

        logger.debug('The output step : %s' % self.output_step)
        logger.debug('The data_type : %s' % self.extra.data_type)

        # get output file name
        if anaoptsfiles:
           self.extra.outputsandbox,outputdata = anaoutputsandbox,anaoutputdata
        elif recoptsfiles:
           self.extra.outputsandbox,outputdata = recoutputsandbox,recoutputdata
        else:
           self.extra.outputsandbox,outputdata = simoutputsandbox,simoutputdata
        self.extra.outputdata.files += outputdata
        self.extra.outputdata.files = unique(self.extra.outputdata.files)

        self._validate_input()
        self._custom_package()
        self._auto_upload_workarea()

        if self.output_rootdir:
            bdr = BDRegister(self.extra.metadata)
            bdr.setRootDir(self.output_rootdir)

        self._prepare_metadata(parser)

        self._task_info()

        # write env into input dir
        input_dir = job.getInputWorkspace().getPath()
        file = gzip.GzipFile(input_dir + '/gaudi-env.py.gz','wb')
        file.write('gaudi_env = %s' % str(self.shell.env))
        file.close()

        return (inputs, self.extra) # return (changed, extra)