Exemple #1
0
    def master_prepare(self, app, appmasterconfig):
        inputsandbox, outputsandbox = master_sandbox_prepare(
            app, appmasterconfig)
        if type(app.exe) == File:
            exefile = os.path.join(get_share_path(app),
                                   os.path.basename(app.exe.name))
            if not os.path.exists(exefile):
                msg = 'Executable must exist!'
                raise ApplicationConfigurationError(None, msg)

            os.system('chmod +x %s' % exefile)
        return StandardJobConfig(inputbox=unique(inputsandbox),
                                 outputbox=unique(outputsandbox))
    def master_prepare(self, app, appmasterconfig):
        inputsandbox, outputsandbox = master_sandbox_prepare(
            app, appmasterconfig)
        # check file is set OK
        if not app.script.name:
            msg = 'Root.script.name must be set.'
            raise ApplicationConfigurationError(None, msg)

        sharedir_scriptpath = os.path.join(get_share_path(app),
                                           os.path.basename(app.script.name))

        if not os.path.exists(sharedir_scriptpath):
            msg = 'Script must exist!'
            raise ApplicationConfigurationError(None, msg)

        return StandardJobConfig(inputbox=unique(inputsandbox),
                                 outputbox=unique(outputsandbox))
    def master_prepare(self, app, appmasterconfig):
        inputsandbox, outputsandbox = master_sandbox_prepare(
            app, appmasterconfig)
        # check file is set OK
        if not app.script.name:
            msg = 'Root.script.name must be set.'
            raise ApplicationConfigurationError(msg)

        sharedir_scriptpath = os.path.join(get_share_path(app),
                                           os.path.basename(app.script.name))

        if not os.path.exists(sharedir_scriptpath):
            msg = 'Script must exist!'
            raise ApplicationConfigurationError(msg)

        return StandardJobConfig(inputbox=unique(inputsandbox),
                                 outputbox=unique(outputsandbox))
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):

        inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig)

        job = app.getJobObject()

        if job.inputdata:
            if not job.splitter:
                if len(job.inputdata) > 100:
                    raise BackendError(
                        "You're submitting a job to Dirac with no splitter and more than 100 files, please add a splitter and try again!"
                    )

        outputfiles = [this_file for this_file in job.outputfiles if isType(this_file, DiracFile)]

        data_str = "import os\n"
        data_str += "execfile('data.py')\n"

        if hasattr(job, "_splitter_data"):
            data_str += job._splitter_data
        inputsandbox.append(FileBuffer("data-wrapper.py", data_str))

        input_data = []

        # Cant wait to get rid of this when people no-longer specify
        # inputdata in options file
        #######################################################################
        # splitters ensure that subjobs pick up inputdata from job over that in
        # optsfiles but need to take care of unsplit jobs
        if not job.master:
            share_path = os.path.join(get_share_path(app), "inputdata", "options_data.pkl")

            if not job.inputdata:
                if os.path.exists(share_path):
                    f = open(share_path, "r+b")
                    job.inputdata = pickle.load(f)
                    f.close()

        #######################################################################

        # Cant wait to get rid of this when people no-longer specify
        # outputsandbox or outputdata in options file
        #######################################################################
        share_path = os.path.join(get_share_path(app), "output", "options_parser.pkl")

        if os.path.exists(share_path):
            #        if not os.path.exists(share_path):
            # raise GangaException('could not find the parser')
            f = open(share_path, "r+b")
            parser = pickle.load(f)
            f.close()

            outbox, outdata = parser.get_output(job)

            from Ganga.GPIDev.Lib.File import FileUtils
            from Ganga.GPIDev.Base.Filters import allComponentFilters

            fileTransform = allComponentFilters["gangafiles"]
            outdata_files = [
                fileTransform(this_file, None)
                for this_file in outdata
                if not FileUtils.doesFileExist(this_file, job.outputfiles)
            ]
            job.non_copyable_outputfiles.extend(
                [output_file for output_file in outdata_files if not isType(output_file, DiracFile)]
            )
            outbox_files = [
                fileTransform(this_file, None)
                for this_file in outbox
                if not FileUtils.doesFileExist(this_file, job.outputfiles)
            ]
            job.non_copyable_outputfiles.extend(
                [outbox_file for outbox_file in outbox_files if not isType(outbox_file, DiracFile)]
            )

            outputsandbox = [f.namePattern for f in job.non_copyable_outputfiles]

            outputsandbox.extend([f.namePattern for f in job.outputfiles if not isType(f, DiracFile)])
            outputsandbox = unique(outputsandbox)  # + outbox[:])
        #######################################################################

        input_data_dirac, parametricinput_data = dirac_inputdata(job.application)

        if input_data_dirac is not None:
            for f in input_data_dirac:
                if isType(f, DiracFile):
                    input_data.append(f.lfn)
                elif isType(f, str):
                    input_data.append(f)
                else:
                    raise ApplicationConfigurationError(
                        "Don't know How to handle anythig other than DiracFiles or strings to LFNs!"
                    )

        commandline = "python ./gaudipython-wrapper.py"
        if is_gaudi_child(app):
            commandline = "gaudirun.py "
            commandline += " ".join([str(arg) for arg in app.args])
            commandline += " options.pkl data-wrapper.py"
        logger.debug("Command line: %s: ", commandline)

        gaudi_script_path = os.path.join(job.getInputWorkspace().getPath(), "gaudi-script.py")

        script_generator(
            gaudi_script_template(),
            # remove_unreplaced = False,
            outputfile_path=gaudi_script_path,
            PLATFORM=app.platform,
            COMMAND=commandline,
            XMLSUMMARYPARSING=getXMLSummaryScript()  # ,
            # OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, '    ')
        )

        # logger.debug( "input_data %s" % str( input_data ) )

        # We want to propogate the ancestor depth to DIRAC when we have
        # inputdata set
        if job.inputdata is not None and isType(job.inputdata, LHCbDataset):

            # As the RT Handler we already know we have a Dirac backend
            if type(job.backend.settings) is not dict:
                raise ApplicationConfigurationError(None, "backend.settings should be a dict")

            if "AncestorDepth" in job.backend.settings:
                ancestor_depth = job.backend.settings["AncestorDepth"]
            else:
                ancestor_depth = job.inputdata.depth
        else:
            ancestor_depth = 0

        lhcbdirac_script_template = lhcbdiracAPI_script_template()

        lhcb_dirac_outputfiles = lhcbdirac_outputfile_jdl(outputfiles)

        # not necessary to use lhcbdiracAPI_script_template any more as doing our own uploads to Dirac
        # remove after Ganga6 release
        # NOTE special case for replicas: replicate string must be empty for no
        # replication
        dirac_script = script_generator(
            lhcbdirac_script_template,
            DIRAC_IMPORT="from LHCbDIRAC.Interfaces.API.DiracLHCb import DiracLHCb",
            DIRAC_JOB_IMPORT="from LHCbDIRAC.Interfaces.API.LHCbJob import LHCbJob",
            DIRAC_OBJECT="DiracLHCb()",
            JOB_OBJECT="LHCbJob()",
            NAME=mangle_job_name(app),
            APP_NAME=app.appname,
            APP_VERSION=app.version,
            APP_SCRIPT=gaudi_script_path,
            APP_LOG_FILE="Ganga_%s_%s.log" % (app.appname, app.version),
            INPUTDATA=input_data,
            PARAMETRIC_INPUTDATA=parametricinput_data,
            OUTPUT_SANDBOX=API_nullifier(outputsandbox),
            OUTPUTFILESSCRIPT=lhcb_dirac_outputfiles,
            # job.fqid,#outputdata_path,
            OUTPUT_PATH="",
            OUTPUT_SE=getConfig("DIRAC")["DiracOutputDataSE"],
            SETTINGS=diracAPI_script_settings(job.application),
            DIRAC_OPTS=job.backend.diracOpts,
            PLATFORM=app.platform,
            REPLICATE="True" if getConfig("DIRAC")["ReplicateOutputData"] else "",
            ANCESTOR_DEPTH=ancestor_depth,
            ## This is to be modified in the final 'submit' function in the backend
            ## The backend also handles the inputfiles DiracFiles ass appropriate
            INPUT_SANDBOX="##INPUT_SANDBOX##",
        )
        logger.debug("prepare: LHCbGaudiDiracRunTimeHandler")

        return StandardJobConfig(dirac_script, inputbox=unique(inputsandbox), outputbox=unique(outputsandbox))
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):
        inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig)
        input_data,   parametricinput_data = dirac_inputdata(app)
        logger.debug("input_data: " + str(input_data))
        job = app.getJobObject()
        outputfiles = [this_file for this_file in job.outputfiles if isType(this_file, DiracFile)]

        lhcb_dirac_outputfiles = lhcbdirac_outputfile_jdl(outputfiles)

        # NOTE special case for replicas: replicate string must be empty for no
        # replication
        params = {'DIRAC_IMPORT': 'from LHCbDIRAC.Interfaces.API.DiracLHCb import DiracLHCb',
                  'DIRAC_JOB_IMPORT': 'from LHCbDIRAC.Interfaces.API.LHCbJob import LHCbJob',
                  'DIRAC_OBJECT': 'DiracLHCb()',
                  'JOB_OBJECT': 'LHCbJob()',
                  'NAME': mangle_job_name(app),
                  'INPUTDATA': input_data,
                  'PARAMETRIC_INPUTDATA': parametricinput_data,
                  'OUTPUT_SANDBOX': API_nullifier(outputsandbox),
                  'OUTPUTFILESSCRIPT' : lhcb_dirac_outputfiles,
                  'OUTPUT_PATH': "",  # job.fqid,
                  'OUTPUT_SE': getConfig('DIRAC')['DiracOutputDataSE'],
                  'SETTINGS': diracAPI_script_settings(app),
                  'DIRAC_OPTS': job.backend.diracOpts,
                  'PLATFORM': getConfig('ROOT')['arch'],
                  'REPLICATE': 'True' if getConfig('DIRAC')['ReplicateOutputData'] else '',
                  # leave the sandbox for altering later as needs
                  # to be done in backend.submit to combine master.
                  # Note only using 2 #s as auto-remove 3
                  'INPUT_SANDBOX': '##INPUT_SANDBOX##'
                  }

        scriptpath = os.path.join(get_share_path(app),
                                  os.path.basename(app.script.name))

        wrapper_path = os.path.join(job.getInputWorkspace(create=True).getPath(),
                                    'script_wrapper.py')
        python_wrapper =\
"""#!/usr/bin/env python
import os, sys
def formatVar(var):
    try:
        float(var)
        return str(var)
    except ValueError as v:
        return '\\\"%s\\\"' % str(var)

script_args = '###SCRIPT_ARGS###'

del sys.argv[sys.argv.index('script_wrapper.py')]
###FIXARGS###
if script_args == []: script_args = ''
os.system('###COMMAND###' % script_args)
###INJECTEDCODE###
"""

        python_wrapper = python_wrapper.replace('###SCRIPT_ARGS###', str('###JOINER###'.join([str(a) for a in app.args])))

        params.update({ 'APP_NAME' : 'Root',
                        'APP_VERSION' : app.version,
                        'APP_SCRIPT' : wrapper_path,
                        'APP_LOG_FILE' : 'Ganga_Root.log' })

        #params.update({'ROOTPY_SCRIPT': wrapper_path,
        #               'ROOTPY_VERSION': app.version,
        #               'ROOTPY_LOG_FILE': 'Ganga_Root.log',
        #               'ROOTPY_ARGS': [str(a) for a in app.args]})

        f = open(wrapper_path, 'w')
        if app.usepython:
            python_wrapper = script_generator(python_wrapper,
                                              remove_unreplaced=False,
                                              FIXARGS='',
                                              COMMAND='/usr/bin/env python %s %s' % (os.path.basename(app.script.name), '%s'),
                                              JOINER=' ',
                                              #INJECTEDCODE = getWNCodeForOutputPostprocessing(job,'')
                                              )


        else:
            python_wrapper = script_generator(python_wrapper,
                                              remove_unreplaced=False,
                                              FIXARGS='script_args=[formatVar(v) for v in script_args]',
                                              COMMAND='export DISPLAY=\"localhoast:0.0\" && root -l -q \"%s(%s)\"' % (os.path.basename(app.script.name), '%s'),
                                              JOINER=',',
                                              #INJECTEDCODE = getWNCodeForOutputPostprocessing(job,'')
                                              )

        f.write(python_wrapper)
        f.close()

        dirac_script = script_generator(lhcbdiracAPI_script_template(), **params)
        return StandardJobConfig(dirac_script,
                                 inputbox=unique(inputsandbox),
                                 outputbox=unique(outputsandbox))
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):
        inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig,
                                                      appmasterconfig,
                                                      jobmasterconfig)
        input_data, parametricinput_data = dirac_inputdata(app)
        logger.debug("input_data: " + str(input_data))
        job = app.getJobObject()
        outputfiles = [
            this_file for this_file in job.outputfiles
            if isType(this_file, DiracFile)
        ]

        lhcb_dirac_outputfiles = lhcbdirac_outputfile_jdl(outputfiles)

        # NOTE special case for replicas: replicate string must be empty for no
        # replication
        params = {
            'DIRAC_IMPORT':
            'from LHCbDIRAC.Interfaces.API.DiracLHCb import DiracLHCb',
            'DIRAC_JOB_IMPORT':
            'from LHCbDIRAC.Interfaces.API.LHCbJob import LHCbJob',
            'DIRAC_OBJECT': 'DiracLHCb()',
            'JOB_OBJECT': 'LHCbJob()',
            'NAME': mangle_job_name(app),
            'INPUTDATA': input_data,
            'PARAMETRIC_INPUTDATA': parametricinput_data,
            'OUTPUT_SANDBOX': API_nullifier(outputsandbox),
            'OUTPUTFILESSCRIPT': lhcb_dirac_outputfiles,
            'OUTPUT_PATH': "",  # job.fqid,
            'SETTINGS': diracAPI_script_settings(app),
            'DIRAC_OPTS': job.backend.diracOpts,
            'PLATFORM': getConfig('ROOT')['arch'],
            'REPLICATE':
            'True' if getConfig('DIRAC')['ReplicateOutputData'] else '',
            # leave the sandbox for altering later as needs
            # to be done in backend.submit to combine master.
            # Note only using 2 #s as auto-remove 3
            'INPUT_SANDBOX': '##INPUT_SANDBOX##'
        }

        scriptpath = os.path.join(get_share_path(app),
                                  os.path.basename(app.script.name))

        wrapper_path = os.path.join(
            job.getInputWorkspace(create=True).getPath(), 'script_wrapper.py')
        python_wrapper =\
"""#!/usr/bin/env python
import os, sys
def formatVar(var):
    try:
        float(var)
        return str(var)
    except ValueError as v:
        return '\\\"%s\\\"' % str(var)

script_args = '###SCRIPT_ARGS###'

del sys.argv[sys.argv.index('script_wrapper.py')]
###FIXARGS###
if script_args == []: script_args = ''
os.system('###COMMAND###' % script_args)
###INJECTEDCODE###
"""

        python_wrapper = python_wrapper.replace(
            '###SCRIPT_ARGS###',
            str('###JOINER###'.join([str(a) for a in app.args])))

        params.update({
            'APP_NAME': 'Root',
            'APP_VERSION': app.version,
            'APP_SCRIPT': wrapper_path,
            'APP_LOG_FILE': 'Ganga_Root.log'
        })

        #params.update({'ROOTPY_SCRIPT': wrapper_path,
        #               'ROOTPY_VERSION': app.version,
        #               'ROOTPY_LOG_FILE': 'Ganga_Root.log',
        #               'ROOTPY_ARGS': [str(a) for a in app.args]})

        f = open(wrapper_path, 'w')
        if app.usepython:
            python_wrapper = script_generator(
                python_wrapper,
                remove_unreplaced=False,
                FIXARGS='',
                COMMAND='/usr/bin/env python %s %s' %
                (os.path.basename(app.script.name), '%s'),
                JOINER=' ',
                #INJECTEDCODE = getWNCodeForOutputPostprocessing(job,'')
            )

        else:
            python_wrapper = script_generator(
                python_wrapper,
                remove_unreplaced=False,
                FIXARGS='script_args=[formatVar(v) for v in script_args]',
                COMMAND=
                'export DISPLAY=\"localhoast:0.0\" && root -l -q \"%s(%s)\"' %
                (os.path.basename(app.script.name), '%s'),
                JOINER=',',
                #INJECTEDCODE = getWNCodeForOutputPostprocessing(job,'')
            )

        f.write(python_wrapper)
        f.close()

        dirac_script = script_generator(lhcbdiracAPI_script_template(),
                                        **params)
        return StandardJobConfig(dirac_script,
                                 inputbox=unique(inputsandbox),
                                 outputbox=unique(outputsandbox))
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):

        logger.debug("Prepare")

        inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig, appmasterconfig, jobmasterconfig)

        job = app.getJobObject()

        logger.debug("Loading pickle files")

        #outputfiles=set([file.namePattern for file in job.outputfiles]).difference(set(getOutputSandboxPatterns(job)))
        # Cant wait to get rid of this when people no-longer specify
        # inputdata in options file
        #######################################################################
        # splitters ensure that subjobs pick up inputdata from job over that in
        # optsfiles but need to take sare of unsplit jobs
        if not job.master:
            share_path = os.path.join(get_share_path(app),
                                      'inputdata',
                                      'options_data.pkl')

            if not job.inputdata:
                if os.path.exists(share_path):
                    f = open(share_path, 'r+b')
                    job.inputdata = pickle.load(f)
                    f.close()

        #######################################################################
        # Cant wait to get rid of this when people no-longer specify
        # outputsandbox or outputdata in options file
        #######################################################################
        share_path = os.path.join(get_share_path(app),
                                  'output',
                                  'options_parser.pkl')

        logger.debug("Adding info from pickle files")

        if os.path.exists(share_path):
            f = open(share_path, 'r+b')
            parser = pickle.load(f)
            f.close()

            outbox, outdata = parser.get_output(job)

            from Ganga.GPIDev.Lib.File import FileUtils
            from Ganga.GPIDev.Base.Filters import allComponentFilters

            fileTransform = allComponentFilters['gangafiles']
            job.non_copyable_outputfiles.extend([fileTransform(this_file, None) for this_file in outdata if not FileUtils.doesFileExist(this_file, job.outputfiles)])
            job.non_copyable_outputfiles.extend([fileTransform(this_file, None) for this_file in outbox if not FileUtils.doesFileExist(this_file, job.outputfiles)])

            outputsandbox = [f.namePattern for f in job.non_copyable_outputfiles]

            outputsandbox.extend([f.namePattern for f in job.outputfiles])
            outputsandbox = unique(outputsandbox)
        #######################################################################

        logger.debug("Doing XML Catalog stuff")

        data = job.inputdata
        data_str = ''
        if data:
            logger.debug("Returning options String")
            data_str = data.optionsString()
            if data.hasLFNs():
                logger.debug("Returning Catalogue")
                inputsandbox.append(
                    FileBuffer('catalog.xml', data.getCatalog()))
                cat_opts = '\nfrom Gaudi.Configuration import FileCatalog\nFileCatalog().Catalogs = ["xmlcatalog_file:catalog.xml"]\n'
                data_str += cat_opts

        logger.debug("Doing splitter_data stuff")
        if hasattr(job, '_splitter_data'):
            data_str += job._splitter_data
        inputsandbox.append(FileBuffer('data.py', data_str))

        logger.debug("Doing GaudiPython stuff")

        cmd = 'python ./gaudipython-wrapper.py'
        opts = ''
        if is_gaudi_child(job.application):
            opts = 'options.pkl'
            cmd = 'gaudirun.py ' + \
                ' '.join(job.application.args) + ' %s data.py' % opts

        logger.debug("Setting up script")

        script = script_generator(create_runscript(job.application.newStyleApp),
                                  remove_unreplaced=False,
                                  OPTS=opts,
                                  PROJECT_OPTS=job.application.setupProjectOptions,
                                  APP_NAME=job.application.appname,
                                  APP_VERSION=job.application.version,
                                  APP_PACKAGE=job.application.package,
                                  PLATFORM=job.application.platform,
                                  CMDLINE=cmd,
                                  XMLSUMMARYPARSING=getXMLSummaryScript())  # ,
                                  # OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, ''))

        logger.debug("Returning StandardJobConfig")

        return StandardJobConfig(FileBuffer('gaudi-script.py', script, executable=1),
                                 inputbox=unique(inputsandbox),
                                 outputbox=unique(outputsandbox))
Exemple #8
0
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):

        logger.debug("Prepare")

        inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig,
                                                      appmasterconfig,
                                                      jobmasterconfig)

        job = app.getJobObject()

        logger.debug("Loading pickle files")

        #outputfiles=set([file.namePattern for file in job.outputfiles]).difference(set(getOutputSandboxPatterns(job)))
        # Cant wait to get rid of this when people no-longer specify
        # inputdata in options file
        #######################################################################
        # splitters ensure that subjobs pick up inputdata from job over that in
        # optsfiles but need to take sare of unsplit jobs
        if not job.master:
            share_path = os.path.join(get_share_path(app), 'inputdata',
                                      'options_data.pkl')

            if not job.inputdata:
                if os.path.exists(share_path):
                    f = open(share_path, 'r+b')
                    job.inputdata = pickle.load(f)
                    f.close()

        #######################################################################
        # Cant wait to get rid of this when people no-longer specify
        # outputsandbox or outputdata in options file
        #######################################################################
        share_path = os.path.join(get_share_path(app), 'output',
                                  'options_parser.pkl')

        logger.debug("Adding info from pickle files")

        if os.path.exists(share_path):
            f = open(share_path, 'r+b')
            parser = pickle.load(f)
            f.close()

            outbox, outdata = parser.get_output(job)

            from Ganga.GPIDev.Lib.File import FileUtils
            from Ganga.GPIDev.Base.Filters import allComponentFilters

            fileTransform = allComponentFilters['gangafiles']
            job.non_copyable_outputfiles.extend([
                fileTransform(this_file, None) for this_file in outdata
                if not FileUtils.doesFileExist(this_file, job.outputfiles)
            ])
            job.non_copyable_outputfiles.extend([
                fileTransform(this_file, None) for this_file in outbox
                if not FileUtils.doesFileExist(this_file, job.outputfiles)
            ])

            outputsandbox.extend(
                [f.namePattern for f in job.non_copyable_outputfiles])

            outputsandbox.extend([f.namePattern for f in job.outputfiles])
            outputsandbox = unique(outputsandbox)
        #######################################################################

        logger.debug("Doing XML Catalog stuff")

        data = job.inputdata
        data_str = ''
        if data:
            logger.debug("Returning options String")
            data_str = data.optionsString()
            if data.hasLFNs():
                logger.debug("Returning Catalogue")
                inputsandbox.append(
                    FileBuffer('catalog.xml', data.getCatalog()))
                cat_opts = '\nfrom Gaudi.Configuration import FileCatalog\nFileCatalog().Catalogs = ["xmlcatalog_file:catalog.xml"]\n'
                data_str += cat_opts

        logger.debug("Doing splitter_data stuff")
        if hasattr(job, '_splitter_data'):
            data_str += job._splitter_data
        inputsandbox.append(FileBuffer('data.py', data_str))

        logger.debug("Doing GaudiPython stuff")

        cmd = 'python ./gaudipython-wrapper.py'
        opts = ''
        if is_gaudi_child(job.application):
            opts = 'options.pkl'
            cmd = 'gaudirun.py ' + \
                ' '.join(job.application.args) + ' %s data.py' % opts

        logger.debug("Setting up script")

        script = script_generator(
            create_runscript(job.application.newStyleApp),
            remove_unreplaced=False,
            OPTS=opts,
            PROJECT_OPTS=job.application.setupProjectOptions,
            APP_NAME=job.application.appname,
            APP_VERSION=job.application.version,
            APP_PACKAGE=job.application.package,
            PLATFORM=job.application.platform,
            CMDLINE=cmd,
            XMLSUMMARYPARSING=getXMLSummaryScript())  # ,
        # OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, ''))

        logger.debug("Returning StandardJobConfig")

        return StandardJobConfig(FileBuffer('gaudi-script.py',
                                            script,
                                            executable=1),
                                 inputbox=unique(inputsandbox),
                                 outputbox=unique(outputsandbox))
Exemple #9
0
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):
        inputsandbox, outputsandbox = sandbox_prepare(
            app, appsubconfig, appmasterconfig, jobmasterconfig)
        input_data,   parametricinput_data = dirac_inputdata(app)
#        outputdata,   outputdata_path      = dirac_ouputdata(app)

        job = app.getJobObject()
        outputfiles = [this_file for this_file in job.outputfiles if isType(this_file, DiracFile)]

        commandline = app.exe
        if type(app.exe) == File:
            inputsandbox.append(File(name=os.path.join(get_share_path(app),
                                                       os.path.basename(app.exe.name))))
            commandline = os.path.basename(app.exe.name)
        commandline += ' '
        commandline += ' '.join([str(arg) for arg in app.args])
        logger.debug('Command line: %s: ', commandline)

        #exe_script_path = os.path.join(job.getInputWorkspace().getPath(), "exe-script.py")
        exe_script_name = 'exe-script.py'

        inputsandbox.append(FileBuffer(name=exe_script_name,
                                       contents=script_generator(exe_script_template(),
                                                                 #remove_unreplaced = False,
                                                                 # ,
                                                                 COMMAND=commandline
                                                                 #OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, '    ')
                                                                 ),
                                       executable=True))

        dirac_outputfiles = dirac_outputfile_jdl(outputfiles)

        # NOTE special case for replicas: replicate string must be empty for no
        # replication
        dirac_script = script_generator(diracAPI_script_template(),
                                        DIRAC_IMPORT='from DIRAC.Interfaces.API.Dirac import Dirac',
                                        DIRAC_JOB_IMPORT='from DIRAC.Interfaces.API.Job import Job',
                                        DIRAC_OBJECT='Dirac()',
                                        JOB_OBJECT='Job()',
                                        NAME=mangle_job_name(app),
                                        # os.path.basename(exe_script_path),
                                        EXE=exe_script_name,
                                        # ' '.join([str(arg) for arg in app.args]),
                                        EXE_ARG_STR='',
                                        EXE_LOG_FILE='Ganga_Executable.log',
                                        ENVIRONMENT=None,  # app.env,
                                        INPUTDATA=input_data,
                                        PARAMETRIC_INPUTDATA=parametricinput_data,
                                        OUTPUT_SANDBOX=API_nullifier(outputsandbox),
                                        OUTPUTFILESSCRIPT=dirac_outputfiles,
                                        OUTPUT_PATH="",  # job.fqid,
                                        OUTPUT_SE=getConfig('DIRAC')['DiracOutputDataSE'],
                                        SETTINGS=diracAPI_script_settings(app),
                                        DIRAC_OPTS=job.backend.diracOpts,
                                        REPLICATE='True' if getConfig('DIRAC')['ReplicateOutputData'] else '',
                                        # leave the sandbox for altering later as needs
                                        # to be done in backend.submit to combine master.
                                        # Note only using 2 #s as auto-remove 3
                                        INPUT_SANDBOX='##INPUT_SANDBOX##'
                                        )

        return StandardJobConfig(dirac_script,
                                 inputbox=unique(inputsandbox),
                                 outputbox=unique(outputsandbox))
Exemple #10
0
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):

        logger.debug("Prepare")

        inputsandbox, outputsandbox = sandbox_prepare(app, appsubconfig,
                                                      appmasterconfig,
                                                      jobmasterconfig)

        job = stripProxy(app).getJobObject()

        if job.inputdata:
            if not job.splitter:
                if len(job.inputdata) > 100:
                    raise BackendError(
                        "You're submitting a job to Dirac with no splitter and more than 100 files, please add a splitter and try again!"
                    )

        outputfiles = [
            this_file for this_file in job.outputfiles
            if isType(this_file, DiracFile)
        ]

        data_str = 'import os\n'
        data_str += 'execfile(\'data.py\')\n'

        if hasattr(job, '_splitter_data'):
            data_str += job._splitter_data
        inputsandbox.append(FileBuffer('data-wrapper.py', data_str))

        input_data = []

        # Cant wait to get rid of this when people no-longer specify
        # inputdata in options file
        #######################################################################
        # splitters ensure that subjobs pick up inputdata from job over that in
        # optsfiles but need to take care of unsplit jobs
        if not job.master:
            share_path = os.path.join(get_share_path(app), 'inputdata',
                                      'options_data.pkl')

            if not job.inputdata:
                if os.path.exists(share_path):
                    f = open(share_path, 'r+b')
                    job.inputdata = pickle.load(f)
                    f.close()

        #######################################################################

        # Cant wait to get rid of this when people no-longer specify
        # outputsandbox or outputdata in options file
        #######################################################################
        share_path = os.path.join(get_share_path(app), 'output',
                                  'options_parser.pkl')

        if os.path.exists(share_path):
            #        if not os.path.exists(share_path):
            # raise GangaException('could not find the parser')
            f = open(share_path, 'r+b')
            parser = pickle.load(f)
            f.close()

            outbox, outdata = parser.get_output(job)

            from Ganga.GPIDev.Lib.File import FileUtils
            from Ganga.GPIDev.Base.Filters import allComponentFilters

            fileTransform = allComponentFilters['gangafiles']
            outdata_files = [
                fileTransform(this_file, None) for this_file in outdata
                if not FileUtils.doesFileExist(this_file, job.outputfiles)
            ]
            job.non_copyable_outputfiles.extend([
                output_file for output_file in outdata_files
                if not isType(output_file, DiracFile)
            ])
            outbox_files = [
                fileTransform(this_file, None) for this_file in outbox
                if not FileUtils.doesFileExist(this_file, job.outputfiles)
            ]
            job.non_copyable_outputfiles.extend([
                outbox_file for outbox_file in outbox_files
                if not isType(outbox_file, DiracFile)
            ])

            outputsandbox = [
                f.namePattern for f in job.non_copyable_outputfiles
            ]

            outputsandbox.extend([
                f.namePattern for f in job.outputfiles
                if not isType(f, DiracFile)
            ])
            outputsandbox = unique(outputsandbox)  # + outbox[:])
        #######################################################################

        input_data_dirac, parametricinput_data = dirac_inputdata(
            job.application)

        if input_data_dirac is not None:
            for f in input_data_dirac:
                if isType(f, DiracFile):
                    input_data.append(f.lfn)
                elif isType(f, str):
                    input_data.append(f)
                else:
                    raise ApplicationConfigurationError(
                        "Don't know How to handle anythig other than DiracFiles or strings to LFNs!"
                    )

        commandline = "python ./gaudipython-wrapper.py"
        if is_gaudi_child(app):
            commandline = 'gaudirun.py '
            commandline += ' '.join([str(arg) for arg in app.args])
            commandline += ' options.pkl data-wrapper.py'
        logger.debug('Command line: %s: ', commandline)

        gaudi_script_path = os.path.join(job.getInputWorkspace().getPath(),
                                         "gaudi-script.py")

        script_generator(
            gaudi_script_template(),
            #remove_unreplaced = False,
            outputfile_path=gaudi_script_path,
            PLATFORM=app.platform,
            COMMAND=commandline,
            XMLSUMMARYPARSING=getXMLSummaryScript()  # ,
            #OUTPUTFILESINJECTEDCODE = getWNCodeForOutputPostprocessing(job, '    ')
        )

        #logger.debug( "input_data %s" % str( input_data ) )

        # We want to propogate the ancestor depth to DIRAC when we have
        # inputdata set
        if job.inputdata is not None and isType(job.inputdata, LHCbDataset):

            # As the RT Handler we already know we have a Dirac backend
            if type(job.backend.settings) is not dict:
                raise ApplicationConfigurationError(
                    None, 'backend.settings should be a dict')

            if 'AncestorDepth' in job.backend.settings:
                ancestor_depth = job.backend.settings['AncestorDepth']
            else:
                ancestor_depth = job.inputdata.depth
        else:
            ancestor_depth = 0

        lhcbdirac_script_template = lhcbdiracAPI_script_template()

        lhcb_dirac_outputfiles = lhcbdirac_outputfile_jdl(outputfiles)

        # not necessary to use lhcbdiracAPI_script_template any more as doing our own uploads to Dirac
        # remove after Ganga6 release
        # NOTE special case for replicas: replicate string must be empty for no
        # replication
        dirac_script = script_generator(
            lhcbdirac_script_template,
            DIRAC_IMPORT=
            'from LHCbDIRAC.Interfaces.API.DiracLHCb import DiracLHCb',
            DIRAC_JOB_IMPORT=
            'from LHCbDIRAC.Interfaces.API.LHCbJob import LHCbJob',
            DIRAC_OBJECT='DiracLHCb()',
            JOB_OBJECT='LHCbJob()',
            NAME=mangle_job_name(app),
            APP_NAME=stripProxy(app).appname,
            APP_VERSION=app.version,
            APP_SCRIPT=gaudi_script_path,
            APP_LOG_FILE='Ganga_%s_%s.log' %
            (stripProxy(app).appname, app.version),
            INPUTDATA=input_data,
            PARAMETRIC_INPUTDATA=parametricinput_data,
            OUTPUT_SANDBOX=API_nullifier(outputsandbox),
            OUTPUTFILESSCRIPT=lhcb_dirac_outputfiles,
            # job.fqid,#outputdata_path,
            OUTPUT_PATH="",
            OUTPUT_SE=getConfig('DIRAC')['DiracOutputDataSE'],
            SETTINGS=diracAPI_script_settings(job.application),
            DIRAC_OPTS=job.backend.diracOpts,
            PLATFORM=app.platform,
            REPLICATE='True'
            if getConfig('DIRAC')['ReplicateOutputData'] else '',
            ANCESTOR_DEPTH=ancestor_depth,
            ## This is to be modified in the final 'submit' function in the backend
            ## The backend also handles the inputfiles DiracFiles ass appropriate
            INPUT_SANDBOX='##INPUT_SANDBOX##')
        logger.debug("prepare: LHCbGaudiDiracRunTimeHandler")

        return StandardJobConfig(dirac_script,
                                 inputbox=unique(inputsandbox),
                                 outputbox=unique(outputsandbox))
    def test_master_prepare(self):

        # setup failing app jobs
        g = tempfile.NamedTemporaryFile(mode='w')
        j_fail = Job(application=Executable(exe=File(g.name)))
        j_fail.prepare()
        g.close()  # file now gone
        os.remove(os.path.join(
            get_share_path(j_fail.application._impl), os.path.split(g.name)[1]))
        ##
        j_prep_fail = Job(application=Executable(exe='ls'))

        # Start the testing for each app type
        for app in (j.application._impl for j in self.job_list):
            # check its a known app type
            self.assertTrue(isinstance(app.exe, str) or isinstance(
                app.exe, File._impl), 'Unknown app.exe type! %s' % type(app.exe))

            # run the method we are testing
            jobconfig = self._rthandler.master_prepare(
                app, self.appmasterconfig)

            # check the return value is of the right type
            self.assertTrue(isinstance(jobconfig, StandardJobConfig),
                            'Expected a StandardJobConfig object returned. Instead got %s' % repr(jobconfig))

            # create sets from the text string file names from the inputbox and
            # outputbox
            ipb = set(f.name for f in jobconfig.inputbox)
            opb = set(jobconfig.outputbox)

            # check that inputbox and outputbox contain only unique elements
            self.assertEqual(len(ipb), len(
                jobconfig.inputbox),  'Returned inputsandbox did not contain only unique elements')
            self.assertEqual(len(opb), len(
                jobconfig.outputbox), 'Returned outputsandbox did not contain only unique elements')

            # find the difference between the in/outputbox and those from the
            # defined job in/outputsandbox and appconfig_in/outputbox
            idiff = ipb.symmetric_difference(
                set([f.name for f in self.inputsandbox] + [f.name for f in self.appconfig_inputbox]))
            # added __postprocesslocations__
            odiff = opb.symmetric_difference(
                set(self.outputsandbox + self.appconfig_outputbox + ['__postprocesslocations__']))

            # expect that things placed in the sharedir on preparation will
            # feature in idiff so check and remove them
            for root, dirs, files in os.walk(get_share_path(app)):
                if files:
                    qualified_files = set(
                        [os.path.join(root, f) for f in files])
                    self.assertTrue(qualified_files.issubset(
                        idiff), 'Could not find the following prepared file(s) in jobconfig.inputbox: %s' % repr(qualified_files.difference(idiff)))
                    # once checked that they exist in the idiff then remove
                    # them for ultimate check next
                    idiff.difference_update(qualified_files)

            # check that no extra files, i.e. those not from the
            # job.in/outputsandbox or appconfig_in/outputbox or sharedir are
            # present
            self.assertEqual(idiff, set(
            ), 'jobconfig.inputbox != job.inputsandbox + appconfig.inputbox + prepared_sharedir_files: sym_diff = %s' % idiff)
            self.assertEqual(odiff, set(
            ), 'jobconfig.outputbox != job.outputsandbox + appconfig.outputbox: sym_diff = %s' % odiff)

        # check that the proper exception is raised in case of the exe file not
        # existing
        self.assertRaises(ApplicationConfigurationError,
                          self._rthandler.master_prepare,
                          j_fail.application._impl,
                          self.appmasterconfig,
                          msg="Checking that Exception raised if file doesn't exist")

        # check that the proper exception is raised in case of the app not
        # being prepared.
        self.assertRaises(GangaException,
                          self._rthandler.master_prepare,
                          j_prep_fail.application._impl,
                          self.appmasterconfig,
                          msg="Checking exception raised if app not prepared")
    def test_prepare(self):
        appsubconfig = StandardJobConfig(inputbox=[File('file1.txt')._impl, File('file2.txt')._impl],
                                         outputbox=['file3.txt', 'file4.txt'])
        jobmasterconfig = StandardJobConfig(inputbox=[File('file5.txt')._impl, File('file6.txt')._impl],
                                            outputbox=['file7.txt', 'file8.txt'])
        # Start the testing for each app type
        for app in (j.application._impl for j in self.job_list):
            jobsubconfig = self._rthandler.prepare(
                app, appsubconfig, self.appmasterconfig, jobmasterconfig)

            # create sets from the text string file names from the inputbox and
            # outputbox
            ipb = set(f.name for f in jobsubconfig.inputbox)
            opb = set(jobsubconfig.outputbox)

            # check that inputbox and outputbox contain only unique elements
            self.assertEqual(len(ipb), len(
                jobsubconfig.inputbox),  'Returned inputsandbox did not contain only unique elements')
            self.assertEqual(len(opb), len(
                jobsubconfig.outputbox), 'Returned outputsandbox did not contain only unique elements')

            # find the difference between the in/outputbox and those from the
            # defined job in/outputsandbox and appconfig_in/outputbox
            idiff = ipb.symmetric_difference(
                set([f.name for f in appsubconfig.inputbox] + ['exe-script.py']))
            odiff = opb.symmetric_difference(
                set(appsubconfig.outputbox + jobmasterconfig.outputbox))

            if isinstance(app.exe, File._impl):
                fname = os.path.join(get_share_path(app), self.tmp_filename)
                self.assertTrue(
                    fname in idiff, "Couldn't find the exe file in inputsandbox")
                # once checked that they exist in the idiff then remove them
                # for ultimate check next
                idiff.remove(fname)

            # check that no extra files, i.e. those not from the
            # job.in/outputsandbox or appconfig_in/outputbox or sharedir are
            # present
            self.assertEqual(idiff, set(
            ), 'jobsubconfig.inputbox != appsubconfig.inputbox + exe-script.py + exe file: sym_diff = %s' % idiff)
            self.assertEqual(odiff, set(
            ), 'jobsubconfig.outputbox != appsubconfig.outputbox + jobmasterconfig.outputbox: sym_diff = %s' % odiff)

            script = \
                """# dirac job created by ganga
from DIRAC.Core.Base.Script import parseCommandLine
parseCommandLine()
from DIRAC.Interfaces.API.Dirac import Dirac
from DIRAC.Interfaces.API.Job import Job
dirac = Dirac()
j = Job()

# default commands added by ganga
j.setName('{Ganga_Executable_(###JOB_ID###)}')
j.setExecutable('exe-script.py','','Ganga_Executable.log')
j.setInputSandbox(##INPUT_SANDBOX##)
j.setOutputSandbox(['file4.txt', 'file3.txt', 'file8.txt', 'file7.txt'])

# <-- user settings
j.setCPUTime(172800)

# user settings -->

# diracOpts added by user


# submit the job to dirac
result = dirac.submit(j)
output(result)"""
            self.assertEqual(jobsubconfig.exe,
                             script.replace(
                                 '###JOB_ID###', app._getParent().fqid),
                             'Dirac API script does not match, see diff below:\n' +
                             '\n'.join(difflib.unified_diff(jobsubconfig.exe.splitlines(),
                                                            script.replace(
                                                                '###JOB_ID###', app._getParent().fqid).splitlines(),
                                                            fromfile='Coming from prepare method',
                                                            tofile='What the test expected')))