Ejemplos de ATLASLocalDataset en Python

Lenguaje de programación: Python

Namespace/Package Name: GangaAtlas.Lib.ATLASDataset

Clase / Tipo: ATLASLocalDataset

Ejemplos en hotexamples.com: 4

Python ATLASLocalDataset - 4 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de GangaAtlas.Lib.ATLASDataset.ATLASLocalDataset extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

get_filenames(2)

Ejemplo n.º 1

Mostrar archivo

Archivo: AthenaLocalRTHandler.py Proyecto: pseyfert/ganga

    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):
        """prepare the subjob specific configuration"""

        job = app._getParent()  # Returns job or subjob object
        logger.debug("AthenaLocalRTHandler prepare called, %s", job.id)

        input_files = []
        input_guids = []
        input_tag_files = []
        input_tag_guids = []
        input_esd_files = []
        input_esd_guids = []

        # If job has inputdata
        if job.inputdata:

            # DQ2Dataset, ATLASLocalDataset and ATLASCastorDataset job splitting is done in AthenaSplitterJob

            if job._getRoot().subjobs:
                if job.inputdata._name == 'ATLASLocalDataset' or job.inputdata._name == 'ATLASCastorDataset':
                    if not job.inputdata.names:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')
                    input_files = job.inputdata.names

                elif job.inputdata._name == 'ATLASDataset':
                    if not job.inputdata.lfn:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')
                    input_files = job.inputdata.lfn

                elif job.inputdata._name == 'ATLASTier3Dataset':
                    if not job.inputdata.names:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')
                    if job.inputdata.names:
                        input_files = job.inputdata.names
                        input_guids = input_files

                elif job.inputdata._name == 'DQ2Dataset':
                    if not job.inputdata.names:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')
                    input_guids = job.inputdata.guids
                    input_files = job.inputdata.names
                    if not job.inputdata.type in [
                            'DQ2_LOCAL', 'FILE_STAGER', 'LFC', 'TAG',
                            'TNT_LOCAL', 'TNT_DOWNLOAD'
                    ]:
                        job.inputdata.type = 'DQ2_LOCAL'

            else:
                if job.inputdata._name == 'ATLASCastorDataset':
                    input_files = ATLASCastorDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASLocalDataset':
                    input_files = ATLASLocalDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASDataset':
                    input_files = ATLASDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASTier3Dataset':
                    if job.inputdata.names:
                        input_files = job.inputdata.names
                        input_guids = input_files
                    elif job.inputdata.pfnListFile:
                        logger.info('Loading file names from %s' %
                                    job.inputdata.pfnListFile.name)
                        pfnListFile = open(job.inputdata.pfnListFile.name)
                        job.inputdata.names = [
                            line.strip() for line in pfnListFile
                        ]
                        pfnListFile.close()
                        input_files = job.inputdata.names
                        input_guids = input_files
                    else:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')

                elif job.inputdata._name == 'DQ2Dataset':
                    if not job.inputdata.type in [
                            'DQ2_LOCAL', 'FILE_STAGER', 'LFC', 'TAG',
                            'TNT_LOCAL', 'TNT_DOWNLOAD'
                    ]:
                        job.inputdata.type = 'DQ2_LOCAL'

                    contents = job.inputdata.get_contents()
                    input_files = [lfn for guid, lfn in contents]
                    input_guids = [guid for guid, lfn in contents]

                    if job.inputdata.tagdataset:
                        tag_contents = job.inputdata.get_tag_contents()
                        input_tag_files = [lfn for guid, lfn in tag_contents]
                        input_tag_guids = [guid for guid, lfn in tag_contents]
                    if job.inputdata.use_aodesd_backnav:
                        esd_contents = job.inputdata.get_contents(backnav=True)
                        input_esd_files = [lfn for guid, lfn in esd_contents]
                        input_esd_guids = [guid for guid, lfn in esd_contents]

                    job.inputdata.names = input_files
                    job.inputdata.guids = input_guids

        # Outputdataset
        output_location = ''
        if job.outputdata:

            if job.outputdata._name == 'DQ2OutputDataset':

                if job.outputdata.location:
                    if isDQ2SRMSite(job.outputdata.location):
                        output_location = job.outputdata.location
                    else:
                        logger.warning('Unknown output location %s.',
                                       job.outputdata.location)
                elif job._getRoot().subjobs and job._getRoot(
                ).outputdata.location:
                    if isDQ2SRMSite(job._getRoot().outputdata.location):
                        output_location = job._getRoot().outputdata.location
                    else:
                        logger.warning('Unknown output location %s.',
                                       job.getRoot().outputdata.location)

                logger.debug('Output: %s,%s', output_location,
                             job.outputdata.location)

            elif job.outputdata.location == '' and job.outputdata._name == 'DQ2OutputDataset':
                output_location = ''
            elif job.outputdata.location:
                output_location = expandfilename(job.outputdata.location)
            else:
                try:
                    output_location = config['LocalOutputLocation']
                    if job.outputdata:
                        job.outputdata.location = expandfilename(
                            output_location)
                except ConfigError:
                    logger.warning(
                        'No default output location specified in the configuration.'
                    )
        else:
            try:
                output_location = config['LocalOutputLocation']
            except ConfigError:
                logger.warning(
                    'No default output location specified in the configuration.'
                )

        if job._getRoot().subjobs:
            jid = "%d.%d" % (job._getRoot().id, job.id)
        else:
            jid = "%d" % job.id

        if output_location and job.outputdata and job.outputdata._name != 'DQ2OutputDataset':

            if job._getRoot().subjobs:
                if config['NoSubDirsAtAllForLocalOutput']:
                    output_location = output_location
                elif config['SingleDirForLocalOutput']:
                    output_location = os.path.join(output_location,
                                                   "%d" % (job._getRoot().id))
                elif config['IndividualSubjobDirsForLocalOutput']:
                    output_location = os.path.join(
                        output_location, "%d/%d" % (job._getRoot().id, job.id))
                else:
                    output_location = os.path.join(output_location, jid)

            if job.outputdata:
                # Remove trailing number if job is copied
                pat = re.compile(r'\/[\d\.]+\/[\d\.]+$')
                if re.findall(pat, output_location):
                    output_location = re.sub(pat, '', output_location)

                    if config['NoSubDirsAtAllForLocalOutput']:
                        output_location = output_location
                    elif config['SingleDirForLocalOutput']:
                        output_location = os.path.join(
                            output_location, "%d" % (job._getRoot().id))
                    elif config['IndividualSubjobDirsForLocalOutput']:
                        output_location = os.path.join(
                            output_location,
                            "%d/%d" % (job._getRoot().id, job.id))
                    else:
                        output_location = os.path.join(output_location, jid)

                job.outputdata.location = output_location

        if job.outputdata and job.outputdata._name == 'DQ2OutputDataset':

            # output dataset name from master_prepare
            output_datasetname = self.output_datasetname
            output_lfn = self.output_lfn

            output_jobid = jid
            # Set subjob datasetname
            job.outputdata.datasetname = output_datasetname
            # Set master job datasetname
            if job._getRoot().subjobs:
                job._getRoot().outputdata.datasetname = output_datasetname
            # Create output dataset -> moved to the worker node code !
            if not job.outputdata.dataset_exists(output_datasetname):
                if job._getRoot().subjobs:
                    if job.id == 0:
                        #job.outputdata.create_dataset(output_datasetname)
                        pass
                else:
                    #job.outputdata.create_dataset(output_datasetname)
                    pass
            else:
                if (job._getRoot().subjobs
                        and job.id == 0) or not job._getRoot().subjobs:
                    logger.warning(
                        "Dataset %s already exists - appending new files to this dataset",
                        output_datasetname)
                    output_location = job.outputdata.get_locations(
                        datasetname=output_datasetname, quiet=True)
                    logger.debug('Output3: %s,%s', output_location,
                                 job.outputdata.location)
                    if output_location:
                        output_location = output_location[0]
                        if job._getRoot().subjobs:
                            job._getRoot(
                            ).outputdata.location = output_location
                            job.outputdata.location = output_location
                        else:
                            job.outputdata.location = output_location

                    logger.debug('Output4: %s,%s', output_location,
                                 job.outputdata.location)

        inputbox = [
            File(os.path.join(os.path.dirname(__file__), 'athena-utility.sh'))
        ]

        if input_guids:
            inputbox += [
                FileBuffer('input_guids', '\n'.join(input_guids) + '\n')
            ]

        if input_files:
            inputbox += [
                FileBuffer('input_files', '\n'.join(input_files) + '\n')
            ]

        if input_tag_guids:
            inputbox += [
                FileBuffer('input_tag_guids',
                           '\n'.join(input_tag_guids) + '\n')
            ]

        if input_tag_files:
            inputbox += [
                FileBuffer('input_tag_files',
                           '\n'.join(input_tag_files) + '\n')
            ]

        if input_esd_guids:
            inputbox += [
                FileBuffer('input_esd_guids',
                           '\n'.join(input_esd_guids) + '\n')
            ]

        if input_esd_files:
            inputbox += [
                FileBuffer('input_esd_files',
                           '\n'.join(input_esd_files) + '\n')
            ]

        # check for output data given in prepare info
        if job.outputdata and job.application.atlas_exetype == "ATHENA":
            for of in job.application.atlas_run_config['output']['alloutputs']:
                if not of in job.outputdata.outputdata:
                    job.outputdata.outputdata.append(of)

        if job.outputdata and job.outputdata.outputdata:
            inputbox += [
                FileBuffer('output_files',
                           '\n'.join(job.outputdata.outputdata) + '\n')
            ]
        elif job.outputdata and not job.outputdata.outputdata:
            raise ApplicationConfigurationError(
                None,
                'j.outputdata.outputdata is empty - Please specify output filename(s).'
            )

        exe = os.path.join(os.path.dirname(__file__), 'run-athena-local.sh')
        outputbox = jobmasterconfig.outputbox
        environment = jobmasterconfig.env.copy()

        ## create and add sample files for FileStager
        if job.inputdata and job.inputdata._name == 'StagerDataset':

            if not job.inputdata.dataset:
                raise ApplicationConfigurationError(
                    None, 'dataset name not specified in job.inputdata')

            ## ship fs-copy.py with the job as it's going to be used as a copy command wrapper by FileStager
            inputbox += [
                File(os.path.join(os.path.dirname(__file__), 'fs-copy.py'))
            ]

            (jo_path, ic_path) = job.inputdata.make_FileStager_jobOptions(
                job=job, max_events=app.max_events)
            inputbox += [File(jo_path), File(ic_path)]

            ## re-make the environment['ATHENA_OPTIONS']
            athena_options = os.path.basename(File(jo_path).name)
            for option_file in app.option_file:
                athena_option = os.path.basename(option_file.name)
                athena_options += ' ' + athena_option
                if app.options:
                    athena_options = app.options + ' ' + athena_options

            environment['ATHENA_OPTIONS'] = athena_options
            environment['DATASETTYPE'] = 'FILE_STAGER'

            ## ask to send back the FileStager.out/err generated by fs-copy.py
            outputbox += ['FileStager.out', 'FileStager.err']

        # If ArgSplitter is used
        try:
            if job.application.args:
                environment['ATHENA_OPTIONS'] = environment[
                    'ATHENA_OPTIONS'] + ' ' + ' '.join(job.application.args)
                if job.application.options:
                    job.application.options = job.application.options + ' ' + job.application.args
                else:
                    job.application.options = job.application.args
        except AttributeError:
            pass

        if job.outputdata and job.outputdata._name=='DQ2OutputDataset' and output_location == [ ]:
            raise ApplicationConfigurationError(
                None,
                'j.outputdata.outputdata is empty - Please specify output filename(s).'
            )

        # set EOS env setting
        environment['EOS_COMMAND_PATH'] = config['PathToEOSBinary']

        # flag for single output dir
        if (config['SingleDirForLocalOutput'] or
                config['NoSubDirsAtAllForLocalOutput']) and job._getParent():
            environment['SINGLE_OUTPUT_DIR'] = jid

            # change the filename
            newoutput = []
            for outf in job.outputdata.outputdata:
                newfile, newfileExt = os.path.splitext(outf)
                jid = "%d.%d" % (job._getParent().id, job.id)
                newoutput.append("%s.%s%s" % (newfile, jid, newfileExt))

            job.outputdata.outputdata = newoutput[:]

        environment['OUTPUT_LOCATION'] = output_location
        if job.outputdata and job.outputdata._name == 'DQ2OutputDataset':
            environment['OUTPUT_DATASETNAME'] = output_datasetname
            environment['OUTPUT_LFN'] = output_lfn
            environment['OUTPUT_JOBID'] = output_jobid
            environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']
            environment['DQ2_OUTPUTFILE_NAMELENGTH'] = str(
                configDQ2['OUTPUTFILE_NAMELENGTH'])
            if job.outputdata.use_shortfilename:
                environment['GANGA_SHORTFILENAME'] = '1'
            else:
                environment['GANGA_SHORTFILENAME'] = ''
            try:
                environment['GANGA_GLITE_UI'] = configLCG['GLITE_SETUP']
            except:
                pass
            environment['DQ2_OUTPUT_SPACE_TOKENS'] = ':'.join(
                configDQ2['DQ2_OUTPUT_SPACE_TOKENS'])
            environment['DQ2_BACKUP_OUTPUT_LOCATIONS'] = ':'.join(
                configDQ2['DQ2_BACKUP_OUTPUT_LOCATIONS'])

        # CN: extra condition for TNTSplitter
        if job._getRoot().splitter and job._getRoot(
        ).splitter._name == 'TNTJobSplitter':
            # set up dq2 environment
            datasetname = job.inputdata.dataset
            environment['DATASETNAME'] = ':'.join(datasetname)
            environment['DATASETLOCATION'] = ':'.join(
                job.inputdata.get_locations())
            environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']
            #environment['DATASETTYPE']=job.inputdata.type
            # At present, DQ2 download is the only thing that works
            environment['DATASETTYPE'] = "DQ2_DOWNLOAD"
            if job.inputdata.accessprotocol:
                environment[
                    'DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol
            if job.inputsandbox: inputbox += job.inputsandbox

        # Fix DATASETNAME env variable for DQ2_COPY mode
        if job.inputdata and job.inputdata._name in [
                'DQ2Dataset'
        ] and job.inputdata.type in ['DQ2_LOCAL', 'DQ2_COPY', 'FILE_STAGER']:
            if job.inputdata.dataset:
                from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import resolve_container
                datasets = resolve_container(job.inputdata.dataset)
                environment['DATASETNAME'] = datasets[0]
                try:
                    environment['DATASETLOCATION'] = ':'.join(
                        job.inputdata.get_locations(
                            overlap=False)[datasets[0]])
                except:
                    printout = 'Job submission failed ! Dataset %s could not be found in DQ2 ! Maybe retry ?' % (
                        datasets[0])
                    raise ApplicationConfigurationError(None, printout)

        if job.inputdata and job.inputdata._name == 'ATLASTier3Dataset':
            environment['DATASETTYPE'] = 'TIER3'

        # USE_POOLFILECATALOG_FAILOVER of Local/ATLASLocalDataset
        if job.inputdata and job.inputdata._name == 'ATLASLocalDataset':
            if job.inputdata.use_poolfilecatalog_failover:
                environment['USE_POOLFILECATALOG_FAILOVER'] = '1'

        # CREATE_POOLFILECATALOG of Local/ATLASLocalDataset
        environment['CREATE_POOLFILECATALOG'] = '1'
        if job.inputdata and job.inputdata._name == 'ATLASLocalDataset':
            if not job.inputdata.create_poolfilecatalog:
                environment['CREATE_POOLFILECATALOG'] = '0'

        # Write trf parameters
        trf_params = ' '
        for key, value in job.application.trf_parameter.iteritems():
            if key == 'dbrelease':
                environment['DBDATASETNAME'] = value.split(':')[0]
                environment['DBFILENAME'] = value.split(':')[1]
            else:
                trf_params = trf_params + key + '=' + str(value) + ' '
        if trf_params != ' ' and job.application.atlas_exetype == 'TRF':
            _append_file_buffer(inputbox, 'trf_params', [trf_params])
            if not 'db_dq2localid.py' in [
                    os.path.basename(file.name) for file in inputbox
            ]:
                _append_files(inputbox, 'db_dq2localid.py')

        # set RecExCommon options
        environment['RECEXTYPE'] = job.application.recex_type

        # Athena run dir
        if job.application.atlas_exetype == "ATHENA" and job.application.atlas_run_dir != "":
            environment['ATLAS_RUN_DIR'] = job.application.atlas_run_dir

        # Set DQ2_LOCAL_SITE_ID
        if hasattr(job.backend, 'extraopts'):
            if job.backend.extraopts.find('site=hh') > 0:
                environment['DQ2_LOCAL_SITE_ID'] = 'DESY-HH_SCRATCHDISK'
                environment[
                    'GANGA_LCG_CE'] = 'grid-ce5.desy.de:2119'  # hack for FILE_STAGER at NAF
            elif job.backend.extraopts.find('site=zn') > 0:
                environment['DQ2_LOCAL_SITE_ID'] = 'DESY-ZN_SCRATCHDISK'
                environment[
                    'GANGA_LCG_CE'] = 'lcg-ce0.ifh.de:2119'  # hack for FILE_STAGER at NAF
            else:
                environment['DQ2_LOCAL_SITE_ID'] = configDQ2[
                    'DQ2_LOCAL_SITE_ID']
        else:
            environment['DQ2_LOCAL_SITE_ID'] = configDQ2['DQ2_LOCAL_SITE_ID']

        return StandardJobConfig(File(exe), inputbox, [], outputbox,
                                 environment)

Ejemplo n.º 2

Mostrar archivo

Archivo: TagPrepareLocalRTHandler.py Proyecto: slangrock/ganga

    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):
        """prepare the subjob specific configuration"""

        job = app._getParent()  # Returns job or subjob object
        logger.debug("TagPrepareLocalRTHandler prepare called, %s", job.id)

        # prepare inputdata
        input_files = []
        input_guids = []

        if job.inputdata:

            # check for subjobs
            if job._getRoot().subjobs:
                if job.inputdata._name == 'ATLASLocalDataset':
                    if not job.inputdata.names:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')
                    input_files = job.inputdata.names

                elif job.inputdata._name == 'DQ2Dataset':
                    raise ApplicationConfigurationError(
                        None, 'Cannot use DQ2Dataset with a local job')
            else:
                if job.inputdata._name == 'ATLASLocalDataset':
                    input_files = ATLASLocalDataset.get_filenames(app)

                elif job.inputdata._name == 'DQ2Dataset':
                    raise ApplicationConfigurationError(
                        None, 'Cannot use DQ2Dataset with a local job')

        if job.outputdata:
            raise ApplicationConfigurationError(
                None, 'No outputdata required for TagPrepare job.')

        if job._getRoot().subjobs:
            jid = "%d.%d" % (job._getRoot().id, job.id)
        else:
            jid = "%d" % job.id

        # prepare inputsandbox
        inputbox = [File(os.path.join(__athdirectory__, 'athena-utility.sh'))]
        if input_files:
            _append_file_buffer(inputbox, 'input_files', input_files)

        exe = os.path.join(__directory__, 'run-tagprepare-local.sh')
        outputbox = jobmasterconfig.outputbox
        environment = jobmasterconfig.env.copy()

        # If ArgSplitter is used
        try:
            if job.application.args:
                environment['ATHENA_OPTIONS'] = environment[
                    'ATHENA_OPTIONS'] + ' ' + ' '.join(job.application.args)
                if job.application.options:
                    job.application.options = job.application.options + ' ' + job.application.args
                else:
                    job.application.options = job.application.args
        except AttributeError:
            pass

        output_location = ''
        environment['OUTPUT_LOCATION'] = output_location
        environment['ATLASOutputDatasetLFC'] = config['ATLASOutputDatasetLFC']

        # Fix DATASETNAME env variable for DQ2_COPY mode
        if job.inputdata and job.inputdata._name == 'DQ2Dataset':
            if job.inputdata.dataset:
                from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import resolve_container
                datasets = resolve_container(job.inputdata.dataset)
                environment['DATASETNAME'] = datasets[0]
                try:
                    environment['DATASETLOCATION'] = ':'.join(
                        job.inputdata.get_locations(
                            overlap=False)[datasets[0]])
                except:
                    printout = 'Job submission failed ! Dataset %s could not be found in DQ2 ! Maybe retry ?' % (
                        datasets[0])
                    raise ApplicationConfigurationError(None, printout)

        # Work around for glite WMS spaced environement variable problem
        inputbox.append(
            FileBuffer('athena_options', environment['ATHENA_OPTIONS'] + '\n'))

        # append a property for monitoring to the jobconfig of subjobs
        lcg_config = StandardJobConfig(File(exe), inputbox, [], outputbox,
                                       environment)
        return lcg_config

Ejemplo n.º 3

Mostrar archivo

Archivo: AthenaLocalRTHandler.py Proyecto: datty/ganga

    def prepare(self,app,appsubconfig,appmasterconfig,jobmasterconfig):
        """prepare the subjob specific configuration"""

        job = app._getParent() # Returns job or subjob object
        logger.debug("AthenaLocalRTHandler prepare called, %s", job.id )

        input_files = []
        input_guids = []
        input_tag_files = []
        input_tag_guids = []
        input_esd_files = []
        input_esd_guids = []

        # If job has inputdata
        if job.inputdata:

            # DQ2Dataset, ATLASLocalDataset and ATLASCastorDataset job splitting is done in AthenaSplitterJob

            if job._getRoot().subjobs:
                if job.inputdata._name == 'ATLASLocalDataset' or job.inputdata._name == 'ATLASCastorDataset':
                    if not job.inputdata.names: raise ApplicationConfigurationError(None,'No inputdata has been specified.')
                    input_files = job.inputdata.names

                elif job.inputdata._name == 'ATLASDataset':
                    if not job.inputdata.lfn: raise ApplicationConfigurationError(None,'No inputdata has been specified.') 
                    input_files = job.inputdata.lfn

                elif job.inputdata._name == 'ATLASTier3Dataset':
                    if not job.inputdata.names:
                        raise ApplicationConfigurationError(None,'No inputdata has been specified.') 
                    if job.inputdata.names:
                        input_files = job.inputdata.names
                        input_guids = input_files

                elif job.inputdata._name == 'DQ2Dataset':
                    if not job.inputdata.names: raise ApplicationConfigurationError(None,'No inputdata has been specified.')
                    input_guids = job.inputdata.guids
                    input_files = job.inputdata.names
                    if not job.inputdata.type in ['DQ2_LOCAL', 'FILE_STAGER', 'LFC', 'TAG', 'TNT_LOCAL', 'TNT_DOWNLOAD' ]:
                        job.inputdata.type ='DQ2_LOCAL'
       
            else:
                if job.inputdata._name == 'ATLASCastorDataset':
                    input_files = ATLASCastorDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASLocalDataset':
                    input_files = ATLASLocalDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASDataset':
                    input_files = ATLASDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASTier3Dataset':
                    if job.inputdata.names:
                        input_files = job.inputdata.names
                        input_guids = input_files
                    elif job.inputdata.pfnListFile:
                        logger.info('Loading file names from %s'%job.inputdata.pfnListFile.name)
                        pfnListFile = open(job.inputdata.pfnListFile.name)
                        job.inputdata.names = [ line.strip() for line in pfnListFile]
                        pfnListFile.close()
                        input_files = job.inputdata.names
                        input_guids = input_files
                    else:
                        raise ApplicationConfigurationError(None,'No inputdata has been specified.') 

                elif job.inputdata._name == 'DQ2Dataset':
                    if not job.inputdata.type in ['DQ2_LOCAL', 'FILE_STAGER', 'LFC', 'TAG', 'TNT_LOCAL', 'TNT_DOWNLOAD' ]:
                        job.inputdata.type ='DQ2_LOCAL'

                    contents = job.inputdata.get_contents()
                    input_files = [ lfn  for guid, lfn in contents ]
                    input_guids = [ guid for guid, lfn in contents ]

                    if job.inputdata.tagdataset:
                        tag_contents = job.inputdata.get_tag_contents()
                        input_tag_files = [ lfn  for guid, lfn in tag_contents ]
                        input_tag_guids = [ guid for guid, lfn in tag_contents ] 
                    if job.inputdata.use_aodesd_backnav:
                        esd_contents = job.inputdata.get_contents(backnav=True)
                        input_esd_files = [ lfn  for guid, lfn in esd_contents ]
                        input_esd_guids = [ guid for guid, lfn in esd_contents ]                        

                    job.inputdata.names = input_files          
                    job.inputdata.guids = input_guids          
 
        # Outputdataset
        output_location=''
        if job.outputdata:
            
            if job.outputdata._name=='DQ2OutputDataset':

                if job.outputdata.location:
                    if isDQ2SRMSite(job.outputdata.location):
                        output_location = job.outputdata.location
                    else:
                        logger.warning('Unknown output location %s.',job.outputdata.location)
                elif job._getRoot().subjobs and job._getRoot().outputdata.location:
                    if isDQ2SRMSite(job._getRoot().outputdata.location):
                        output_location = job._getRoot().outputdata.location
                    else:
                        logger.warning('Unknown output location %s.',job.getRoot().outputdata.location)
                        
                logger.debug('Output: %s,%s',output_location, job.outputdata.location)

            elif job.outputdata.location=='' and job.outputdata._name=='DQ2OutputDataset':
                output_location = ''
            elif job.outputdata.location:
                output_location = expandfilename(job.outputdata.location)
            else:
                try:
                    output_location=config['LocalOutputLocation']
                    if job.outputdata:
                        job.outputdata.location = expandfilename(output_location)
                except ConfigError:
                    logger.warning('No default output location specified in the configuration.')
        else:
            try:
                output_location=config['LocalOutputLocation']
            except ConfigError:
                logger.warning('No default output location specified in the configuration.')

        if job._getRoot().subjobs:
            jid = "%d.%d" % (job._getRoot().id, job.id)
        else:
            jid = "%d" % job.id

        if output_location and job.outputdata and job.outputdata._name!='DQ2OutputDataset':

            if job._getRoot().subjobs:
                if config['NoSubDirsAtAllForLocalOutput']:
                    output_location = output_location
                elif config['SingleDirForLocalOutput']:
                    output_location = os.path.join(output_location, "%d" % (job._getRoot().id))
                elif config['IndividualSubjobDirsForLocalOutput']:
                    output_location = os.path.join(output_location, "%d/%d" % (job._getRoot().id, job.id))
                else:
                    output_location = os.path.join(output_location, jid)
                
            if job.outputdata:
                # Remove trailing number if job is copied
                pat = re.compile(r'\/[\d\.]+\/[\d\.]+$')
                if re.findall(pat,output_location):
                    output_location = re.sub(pat, '', output_location)

                    if config['NoSubDirsAtAllForLocalOutput']:
                        output_location = output_location
                    elif config['SingleDirForLocalOutput']:
                        output_location = os.path.join(output_location, "%d" % (job._getRoot().id))
                    elif config['IndividualSubjobDirsForLocalOutput']:
                        output_location = os.path.join(output_location, "%d/%d" % (job._getRoot().id, job.id))
                    else:
                        output_location = os.path.join(output_location, jid)
                    
                job.outputdata.location = output_location

        if job.outputdata and job.outputdata._name=='DQ2OutputDataset':

            # output dataset name from master_prepare
            output_datasetname = self.output_datasetname
            output_lfn = self.output_lfn

            output_jobid = jid
            # Set subjob datasetname
            job.outputdata.datasetname=output_datasetname
            # Set master job datasetname
            if job._getRoot().subjobs:
                job._getRoot().outputdata.datasetname=output_datasetname
            # Create output dataset -> moved to the worker node code !
            if not job.outputdata.dataset_exists(output_datasetname):
                if job._getRoot().subjobs:
                    if job.id==0:
                        #job.outputdata.create_dataset(output_datasetname)
                        pass
                else:
                    #job.outputdata.create_dataset(output_datasetname)
                    pass
            else:
                if (job._getRoot().subjobs and job.id==0) or not job._getRoot().subjobs:
                    logger.warning("Dataset %s already exists - appending new files to this dataset", output_datasetname)
                    output_location = job.outputdata.get_locations(datasetname=output_datasetname, quiet=True)
                    logger.debug('Output3: %s,%s',output_location, job.outputdata.location)
                    if output_location:
                        output_location = output_location[0] 
                        if job._getRoot().subjobs:
                            job._getRoot().outputdata.location=output_location
                            job.outputdata.location=output_location
                        else:
                            job.outputdata.location=output_location
                            
                    logger.debug('Output4: %s,%s',output_location, job.outputdata.location)

        inputbox = [File(os.path.join(os.path.dirname(__file__),'athena-utility.sh'))]
                
        if input_guids:
            inputbox += [ FileBuffer('input_guids','\n'.join(input_guids)+'\n') ]

        if input_files: 
            inputbox += [ FileBuffer('input_files','\n'.join(input_files)+'\n') ]

        if input_tag_guids:
            inputbox += [ FileBuffer('input_tag_guids','\n'.join(input_tag_guids)+'\n') ]

        if input_tag_files: 
            inputbox += [ FileBuffer('input_tag_files','\n'.join(input_tag_files)+'\n') ]

        if input_esd_guids:
            inputbox += [ FileBuffer('input_esd_guids','\n'.join(input_esd_guids)+'\n') ]

        if input_esd_files: 
            inputbox += [ FileBuffer('input_esd_files','\n'.join(input_esd_files)+'\n') ]

        # check for output data given in prepare info
        if job.outputdata and job.application.atlas_exetype == "ATHENA":
            for of in job.application.atlas_run_config['output']['alloutputs']:
                if not of in job.outputdata.outputdata:
                    job.outputdata.outputdata.append(of)
            
        if job.outputdata and job.outputdata.outputdata:
            inputbox += [ FileBuffer('output_files','\n'.join(job.outputdata.outputdata)+'\n') ]
        elif job.outputdata and not job.outputdata.outputdata:
            raise ApplicationConfigurationError(None,'j.outputdata.outputdata is empty - Please specify output filename(s).')
   
        exe = os.path.join(os.path.dirname(__file__),'run-athena-local.sh')
        outputbox = jobmasterconfig.outputbox
        environment = jobmasterconfig.env.copy()

        ## create and add sample files for FileStager
        if job.inputdata and job.inputdata._name == 'StagerDataset':

            if not job.inputdata.dataset:
                raise ApplicationConfigurationError(None,'dataset name not specified in job.inputdata')

            ## ship fs-copy.py with the job as it's going to be used as a copy command wrapper by FileStager
            inputbox += [ File( os.path.join( os.path.dirname(__file__), 'fs-copy.py') ) ]

            (jo_path, ic_path) = job.inputdata.make_FileStager_jobOptions(job=job, max_events=app.max_events)
            inputbox += [ File(jo_path), File(ic_path) ]

            ## re-make the environment['ATHENA_OPTIONS']
            athena_options = os.path.basename( File(jo_path).name )
            for option_file in app.option_file:
                athena_option = os.path.basename(option_file.name)
                athena_options += ' ' + athena_option
                if app.options:
                    athena_options =  app.options + ' ' + athena_options

            environment['ATHENA_OPTIONS'] = athena_options
            environment['DATASETTYPE']    = 'FILE_STAGER'

            ## ask to send back the FileStager.out/err generated by fs-copy.py
            outputbox += ['FileStager.out', 'FileStager.err']

        # If ArgSplitter is used
        try:
            if job.application.args:
                environment['ATHENA_OPTIONS'] = environment['ATHENA_OPTIONS'] + ' ' + ' '.join(job.application.args)
                if job.application.options:
                    job.application.options = job.application.options + ' ' + job.application.args
                else:
                    job.application.options=job.application.args
        except AttributeError:
            pass

        if job.outputdata and job.outputdata._name=='DQ2OutputDataset' and output_location == [ ]:
            raise ApplicationConfigurationError(None,'j.outputdata.outputdata is empty - Please specify output filename(s).')

        # set EOS env setting
        environment['EOS_COMMAND_PATH'] = config['PathToEOSBinary']

        # flag for single output dir
        if (config['SingleDirForLocalOutput'] or config['NoSubDirsAtAllForLocalOutput']) and job._getParent():
            environment['SINGLE_OUTPUT_DIR'] = jid

            # change the filename
            newoutput = []
            for outf in job.outputdata.outputdata:
                newfile, newfileExt = os.path.splitext(outf)
                jid = "%d.%d" % (job._getParent().id, job.id)
                newoutput.append("%s.%s%s" % (newfile, jid, newfileExt) )               

            job.outputdata.outputdata = newoutput[:]
            
        environment['OUTPUT_LOCATION'] = output_location
        if job.outputdata and job.outputdata._name == 'DQ2OutputDataset':
            environment['OUTPUT_DATASETNAME'] = output_datasetname
            environment['OUTPUT_LFN'] = output_lfn
            environment['OUTPUT_JOBID'] = output_jobid
            environment['DQ2_URL_SERVER']=configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL']=configDQ2['DQ2_URL_SERVER_SSL']
            environment['DQ2_OUTPUTFILE_NAMELENGTH'] = str(configDQ2['OUTPUTFILE_NAMELENGTH'])
            if job.outputdata.use_shortfilename:
                environment['GANGA_SHORTFILENAME'] = '1'
            else:
                environment['GANGA_SHORTFILENAME'] = ''
            try:
                environment['GANGA_GLITE_UI']=configLCG['GLITE_SETUP']
            except:
                pass
            environment['DQ2_OUTPUT_SPACE_TOKENS']= ':'.join(configDQ2['DQ2_OUTPUT_SPACE_TOKENS'])
            environment['DQ2_BACKUP_OUTPUT_LOCATIONS']= ':'.join(configDQ2['DQ2_BACKUP_OUTPUT_LOCATIONS'])
            
        # CN: extra condition for TNTSplitter
        if job._getRoot().splitter and job._getRoot().splitter._name == 'TNTJobSplitter':
            # set up dq2 environment
            datasetname = job.inputdata.dataset
            environment['DATASETNAME']= ':'.join(datasetname) 
            environment['DATASETLOCATION'] = ':'.join(job.inputdata.get_locations())
            environment['DQ2_URL_SERVER']=configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL']=configDQ2['DQ2_URL_SERVER_SSL']
            #environment['DATASETTYPE']=job.inputdata.type
            # At present, DQ2 download is the only thing that works
            environment['DATASETTYPE']="DQ2_DOWNLOAD"
            if job.inputdata.accessprotocol:
                 environment['DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol
            if job.inputsandbox: inputbox += job.inputsandbox   

        # Fix DATASETNAME env variable for DQ2_COPY mode
        if job.inputdata and job.inputdata._name in [ 'DQ2Dataset' ] and job.inputdata.type in [ 'DQ2_LOCAL', 'DQ2_COPY', 'FILE_STAGER' ]:
            if job.inputdata.dataset:
                from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import resolve_container
                datasets = resolve_container(job.inputdata.dataset) 
                environment['DATASETNAME'] = datasets[0]
                try:
                    environment['DATASETLOCATION'] = ':'.join(job.inputdata.get_locations(overlap=False)[ datasets[0] ])
                except:
                    printout = 'Job submission failed ! Dataset %s could not be found in DQ2 ! Maybe retry ?' %(datasets[0])
                    raise ApplicationConfigurationError(None,printout )


        if job.inputdata and job.inputdata._name == 'ATLASTier3Dataset':
            environment['DATASETTYPE'] = 'TIER3'


            
        # USE_POOLFILECATALOG_FAILOVER of Local/ATLASLocalDataset
        if job.inputdata and job.inputdata._name == 'ATLASLocalDataset':
            if job.inputdata.use_poolfilecatalog_failover:
                environment['USE_POOLFILECATALOG_FAILOVER'] = '1'

        # CREATE_POOLFILECATALOG of Local/ATLASLocalDataset
        environment['CREATE_POOLFILECATALOG'] = '1'
        if job.inputdata and job.inputdata._name == 'ATLASLocalDataset':
            if not job.inputdata.create_poolfilecatalog:
                environment['CREATE_POOLFILECATALOG'] = '0'
                
        # Write trf parameters
        trf_params = ' '
        for key, value in job.application.trf_parameter.iteritems():
            if key == 'dbrelease':
                environment['DBDATASETNAME'] = value.split(':')[0]
                environment['DBFILENAME'] = value.split(':')[1]
            else:
                trf_params = trf_params + key + '=' + str(value) + ' '
        if trf_params!=' ' and job.application.atlas_exetype=='TRF':
           _append_file_buffer(inputbox,'trf_params', [ trf_params ]) 
           if not 'db_dq2localid.py' in [ os.path.basename(file.name) for file in inputbox ]:
               _append_files(inputbox, 'db_dq2localid.py')

        # set RecExCommon options
        environment['RECEXTYPE'] = job.application.recex_type

        # Athena run dir
        if job.application.atlas_exetype == "ATHENA" and job.application.atlas_run_dir != "":
            environment['ATLAS_RUN_DIR'] = job.application.atlas_run_dir
            
        # Set DQ2_LOCAL_SITE_ID
        if hasattr(job.backend, 'extraopts'):
            if job.backend.extraopts.find('site=hh')>0:
                environment['DQ2_LOCAL_SITE_ID'] = 'DESY-HH_SCRATCHDISK'
                environment['GANGA_LCG_CE'] = 'grid-ce5.desy.de:2119' # hack for FILE_STAGER at NAF
            elif job.backend.extraopts.find('site=zn')>0:
                environment['DQ2_LOCAL_SITE_ID'] = 'DESY-ZN_SCRATCHDISK'
                environment['GANGA_LCG_CE'] = 'lcg-ce0.ifh.de:2119' # hack for FILE_STAGER at NAF
            else:
                environment['DQ2_LOCAL_SITE_ID'] = configDQ2['DQ2_LOCAL_SITE_ID']
        else:
            environment['DQ2_LOCAL_SITE_ID'] = configDQ2['DQ2_LOCAL_SITE_ID']

        return StandardJobConfig(File(exe), inputbox, [], outputbox, environment)

Ejemplo n.º 4

Mostrar archivo

Archivo: TagPrepareLocalRTHandler.py Proyecto: VladimirRomanovsky/ganga

    def prepare(self,app,appsubconfig,appmasterconfig,jobmasterconfig):
        """prepare the subjob specific configuration"""

        job = app._getParent() # Returns job or subjob object
        logger.debug("TagPrepareLocalRTHandler prepare called, %s", job.id)

        # prepare inputdata
        input_files = []
        input_guids = []
       
        if job.inputdata:

            # check for subjobs
            if job._getRoot().subjobs:
                if job.inputdata._name == 'ATLASLocalDataset':
                    if not job.inputdata.names: raise ApplicationConfigurationError(None,'No inputdata has been specified.')
                    input_files = job.inputdata.names

                elif job.inputdata._name == 'DQ2Dataset':
                    raise ApplicationConfigurationError(None,'Cannot use DQ2Dataset with a local job'  )
            else:
                if job.inputdata._name == 'ATLASLocalDataset':
                    input_files = ATLASLocalDataset.get_filenames(app)

                elif job.inputdata._name == 'DQ2Dataset':
                    raise ApplicationConfigurationError(None,'Cannot use DQ2Dataset with a local job'  )

        if job.outputdata:
            raise ApplicationConfigurationError(None,'No outputdata required for TagPrepare job.')

        if job._getRoot().subjobs:
            jid = "%d.%d" % (job._getRoot().id, job.id)
        else:
            jid = "%d" % job.id

        # prepare inputsandbox
        inputbox = [File(os.path.join(__athdirectory__,'athena-utility.sh')) ]
        if input_files:     _append_file_buffer(inputbox,'input_files',input_files)

        exe = os.path.join(__directory__,'run-tagprepare-local.sh')
        outputbox = jobmasterconfig.outputbox
        environment  = jobmasterconfig.env.copy()
        
        # If ArgSplitter is used
        try:
            if job.application.args:
                environment['ATHENA_OPTIONS'] = environment['ATHENA_OPTIONS'] + ' ' + ' '.join(job.application.args)
                if job.application.options:
                    job.application.options = job.application.options + ' ' + job.application.args
                else:
                    job.application.options = job.application.args
        except AttributeError:
            pass
        
        output_location = ''
        environment['OUTPUT_LOCATION'] = output_location
        environment['ATLASOutputDatasetLFC'] = config['ATLASOutputDatasetLFC']

        # Fix DATASETNAME env variable for DQ2_COPY mode
        if job.inputdata and job.inputdata._name == 'DQ2Dataset':
            if job.inputdata.dataset:
                from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import resolve_container
                datasets = resolve_container(job.inputdata.dataset)
                environment['DATASETNAME'] = datasets[0]
                try:
                    environment['DATASETLOCATION'] = ':'.join(job.inputdata.get_locations(overlap=False)[ datasets[0] ])
                except:
                    printout = 'Job submission failed ! Dataset %s could not be found in DQ2 ! Maybe retry ?' %(datasets[0])
                    raise ApplicationConfigurationError(None,printout )

        # Work around for glite WMS spaced environement variable problem
        inputbox.append(FileBuffer('athena_options',environment['ATHENA_OPTIONS']+'\n'))

        # append a property for monitoring to the jobconfig of subjobs
        lcg_config = StandardJobConfig(File(exe), inputbox, [], outputbox, environment)
        return lcg_config