Python isDQ2SRMSite Examples

Programming Language: Python

Namespace/Package Name: GangaAtlas.Lib.ATLASDataset

Method/Function: isDQ2SRMSite

Examples at hotexamples.com: 4

Python isDQ2SRMSite - 4 examples found. These are the top rated real world Python examples of GangaAtlas.Lib.ATLASDataset.isDQ2SRMSite extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: AthenaLocalRTHandler.py Project: pseyfert/ganga

    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):
        """prepare the subjob specific configuration"""

        job = app._getParent()  # Returns job or subjob object
        logger.debug("AthenaLocalRTHandler prepare called, %s", job.id)

        input_files = []
        input_guids = []
        input_tag_files = []
        input_tag_guids = []
        input_esd_files = []
        input_esd_guids = []

        # If job has inputdata
        if job.inputdata:

            # DQ2Dataset, ATLASLocalDataset and ATLASCastorDataset job splitting is done in AthenaSplitterJob

            if job._getRoot().subjobs:
                if job.inputdata._name == 'ATLASLocalDataset' or job.inputdata._name == 'ATLASCastorDataset':
                    if not job.inputdata.names:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')
                    input_files = job.inputdata.names

                elif job.inputdata._name == 'ATLASDataset':
                    if not job.inputdata.lfn:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')
                    input_files = job.inputdata.lfn

                elif job.inputdata._name == 'ATLASTier3Dataset':
                    if not job.inputdata.names:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')
                    if job.inputdata.names:
                        input_files = job.inputdata.names
                        input_guids = input_files

                elif job.inputdata._name == 'DQ2Dataset':
                    if not job.inputdata.names:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')
                    input_guids = job.inputdata.guids
                    input_files = job.inputdata.names
                    if not job.inputdata.type in [
                            'DQ2_LOCAL', 'FILE_STAGER', 'LFC', 'TAG',
                            'TNT_LOCAL', 'TNT_DOWNLOAD'
                    ]:
                        job.inputdata.type = 'DQ2_LOCAL'

            else:
                if job.inputdata._name == 'ATLASCastorDataset':
                    input_files = ATLASCastorDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASLocalDataset':
                    input_files = ATLASLocalDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASDataset':
                    input_files = ATLASDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASTier3Dataset':
                    if job.inputdata.names:
                        input_files = job.inputdata.names
                        input_guids = input_files
                    elif job.inputdata.pfnListFile:
                        logger.info('Loading file names from %s' %
                                    job.inputdata.pfnListFile.name)
                        pfnListFile = open(job.inputdata.pfnListFile.name)
                        job.inputdata.names = [
                            line.strip() for line in pfnListFile
                        ]
                        pfnListFile.close()
                        input_files = job.inputdata.names
                        input_guids = input_files
                    else:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')

                elif job.inputdata._name == 'DQ2Dataset':
                    if not job.inputdata.type in [
                            'DQ2_LOCAL', 'FILE_STAGER', 'LFC', 'TAG',
                            'TNT_LOCAL', 'TNT_DOWNLOAD'
                    ]:
                        job.inputdata.type = 'DQ2_LOCAL'

                    contents = job.inputdata.get_contents()
                    input_files = [lfn for guid, lfn in contents]
                    input_guids = [guid for guid, lfn in contents]

                    if job.inputdata.tagdataset:
                        tag_contents = job.inputdata.get_tag_contents()
                        input_tag_files = [lfn for guid, lfn in tag_contents]
                        input_tag_guids = [guid for guid, lfn in tag_contents]
                    if job.inputdata.use_aodesd_backnav:
                        esd_contents = job.inputdata.get_contents(backnav=True)
                        input_esd_files = [lfn for guid, lfn in esd_contents]
                        input_esd_guids = [guid for guid, lfn in esd_contents]

                    job.inputdata.names = input_files
                    job.inputdata.guids = input_guids

        # Outputdataset
        output_location = ''
        if job.outputdata:

            if job.outputdata._name == 'DQ2OutputDataset':

                if job.outputdata.location:
                    if isDQ2SRMSite(job.outputdata.location):
                        output_location = job.outputdata.location
                    else:
                        logger.warning('Unknown output location %s.',
                                       job.outputdata.location)
                elif job._getRoot().subjobs and job._getRoot(
                ).outputdata.location:
                    if isDQ2SRMSite(job._getRoot().outputdata.location):
                        output_location = job._getRoot().outputdata.location
                    else:
                        logger.warning('Unknown output location %s.',
                                       job.getRoot().outputdata.location)

                logger.debug('Output: %s,%s', output_location,
                             job.outputdata.location)

            elif job.outputdata.location == '' and job.outputdata._name == 'DQ2OutputDataset':
                output_location = ''
            elif job.outputdata.location:
                output_location = expandfilename(job.outputdata.location)
            else:
                try:
                    output_location = config['LocalOutputLocation']
                    if job.outputdata:
                        job.outputdata.location = expandfilename(
                            output_location)
                except ConfigError:
                    logger.warning(
                        'No default output location specified in the configuration.'
                    )
        else:
            try:
                output_location = config['LocalOutputLocation']
            except ConfigError:
                logger.warning(
                    'No default output location specified in the configuration.'
                )

        if job._getRoot().subjobs:
            jid = "%d.%d" % (job._getRoot().id, job.id)
        else:
            jid = "%d" % job.id

        if output_location and job.outputdata and job.outputdata._name != 'DQ2OutputDataset':

            if job._getRoot().subjobs:
                if config['NoSubDirsAtAllForLocalOutput']:
                    output_location = output_location
                elif config['SingleDirForLocalOutput']:
                    output_location = os.path.join(output_location,
                                                   "%d" % (job._getRoot().id))
                elif config['IndividualSubjobDirsForLocalOutput']:
                    output_location = os.path.join(
                        output_location, "%d/%d" % (job._getRoot().id, job.id))
                else:
                    output_location = os.path.join(output_location, jid)

            if job.outputdata:
                # Remove trailing number if job is copied
                pat = re.compile(r'\/[\d\.]+\/[\d\.]+$')
                if re.findall(pat, output_location):
                    output_location = re.sub(pat, '', output_location)

                    if config['NoSubDirsAtAllForLocalOutput']:
                        output_location = output_location
                    elif config['SingleDirForLocalOutput']:
                        output_location = os.path.join(
                            output_location, "%d" % (job._getRoot().id))
                    elif config['IndividualSubjobDirsForLocalOutput']:
                        output_location = os.path.join(
                            output_location,
                            "%d/%d" % (job._getRoot().id, job.id))
                    else:
                        output_location = os.path.join(output_location, jid)

                job.outputdata.location = output_location

        if job.outputdata and job.outputdata._name == 'DQ2OutputDataset':

            # output dataset name from master_prepare
            output_datasetname = self.output_datasetname
            output_lfn = self.output_lfn

            output_jobid = jid
            # Set subjob datasetname
            job.outputdata.datasetname = output_datasetname
            # Set master job datasetname
            if job._getRoot().subjobs:
                job._getRoot().outputdata.datasetname = output_datasetname
            # Create output dataset -> moved to the worker node code !
            if not job.outputdata.dataset_exists(output_datasetname):
                if job._getRoot().subjobs:
                    if job.id == 0:
                        #job.outputdata.create_dataset(output_datasetname)
                        pass
                else:
                    #job.outputdata.create_dataset(output_datasetname)
                    pass
            else:
                if (job._getRoot().subjobs
                        and job.id == 0) or not job._getRoot().subjobs:
                    logger.warning(
                        "Dataset %s already exists - appending new files to this dataset",
                        output_datasetname)
                    output_location = job.outputdata.get_locations(
                        datasetname=output_datasetname, quiet=True)
                    logger.debug('Output3: %s,%s', output_location,
                                 job.outputdata.location)
                    if output_location:
                        output_location = output_location[0]
                        if job._getRoot().subjobs:
                            job._getRoot(
                            ).outputdata.location = output_location
                            job.outputdata.location = output_location
                        else:
                            job.outputdata.location = output_location

                    logger.debug('Output4: %s,%s', output_location,
                                 job.outputdata.location)

        inputbox = [
            File(os.path.join(os.path.dirname(__file__), 'athena-utility.sh'))
        ]

        if input_guids:
            inputbox += [
                FileBuffer('input_guids', '\n'.join(input_guids) + '\n')
            ]

        if input_files:
            inputbox += [
                FileBuffer('input_files', '\n'.join(input_files) + '\n')
            ]

        if input_tag_guids:
            inputbox += [
                FileBuffer('input_tag_guids',
                           '\n'.join(input_tag_guids) + '\n')
            ]

        if input_tag_files:
            inputbox += [
                FileBuffer('input_tag_files',
                           '\n'.join(input_tag_files) + '\n')
            ]

        if input_esd_guids:
            inputbox += [
                FileBuffer('input_esd_guids',
                           '\n'.join(input_esd_guids) + '\n')
            ]

        if input_esd_files:
            inputbox += [
                FileBuffer('input_esd_files',
                           '\n'.join(input_esd_files) + '\n')
            ]

        # check for output data given in prepare info
        if job.outputdata and job.application.atlas_exetype == "ATHENA":
            for of in job.application.atlas_run_config['output']['alloutputs']:
                if not of in job.outputdata.outputdata:
                    job.outputdata.outputdata.append(of)

        if job.outputdata and job.outputdata.outputdata:
            inputbox += [
                FileBuffer('output_files',
                           '\n'.join(job.outputdata.outputdata) + '\n')
            ]
        elif job.outputdata and not job.outputdata.outputdata:
            raise ApplicationConfigurationError(
                None,
                'j.outputdata.outputdata is empty - Please specify output filename(s).'
            )

        exe = os.path.join(os.path.dirname(__file__), 'run-athena-local.sh')
        outputbox = jobmasterconfig.outputbox
        environment = jobmasterconfig.env.copy()

        ## create and add sample files for FileStager
        if job.inputdata and job.inputdata._name == 'StagerDataset':

            if not job.inputdata.dataset:
                raise ApplicationConfigurationError(
                    None, 'dataset name not specified in job.inputdata')

            ## ship fs-copy.py with the job as it's going to be used as a copy command wrapper by FileStager
            inputbox += [
                File(os.path.join(os.path.dirname(__file__), 'fs-copy.py'))
            ]

            (jo_path, ic_path) = job.inputdata.make_FileStager_jobOptions(
                job=job, max_events=app.max_events)
            inputbox += [File(jo_path), File(ic_path)]

            ## re-make the environment['ATHENA_OPTIONS']
            athena_options = os.path.basename(File(jo_path).name)
            for option_file in app.option_file:
                athena_option = os.path.basename(option_file.name)
                athena_options += ' ' + athena_option
                if app.options:
                    athena_options = app.options + ' ' + athena_options

            environment['ATHENA_OPTIONS'] = athena_options
            environment['DATASETTYPE'] = 'FILE_STAGER'

            ## ask to send back the FileStager.out/err generated by fs-copy.py
            outputbox += ['FileStager.out', 'FileStager.err']

        # If ArgSplitter is used
        try:
            if job.application.args:
                environment['ATHENA_OPTIONS'] = environment[
                    'ATHENA_OPTIONS'] + ' ' + ' '.join(job.application.args)
                if job.application.options:
                    job.application.options = job.application.options + ' ' + job.application.args
                else:
                    job.application.options = job.application.args
        except AttributeError:
            pass

        if job.outputdata and job.outputdata._name=='DQ2OutputDataset' and output_location == [ ]:
            raise ApplicationConfigurationError(
                None,
                'j.outputdata.outputdata is empty - Please specify output filename(s).'
            )

        # set EOS env setting
        environment['EOS_COMMAND_PATH'] = config['PathToEOSBinary']

        # flag for single output dir
        if (config['SingleDirForLocalOutput'] or
                config['NoSubDirsAtAllForLocalOutput']) and job._getParent():
            environment['SINGLE_OUTPUT_DIR'] = jid

            # change the filename
            newoutput = []
            for outf in job.outputdata.outputdata:
                newfile, newfileExt = os.path.splitext(outf)
                jid = "%d.%d" % (job._getParent().id, job.id)
                newoutput.append("%s.%s%s" % (newfile, jid, newfileExt))

            job.outputdata.outputdata = newoutput[:]

        environment['OUTPUT_LOCATION'] = output_location
        if job.outputdata and job.outputdata._name == 'DQ2OutputDataset':
            environment['OUTPUT_DATASETNAME'] = output_datasetname
            environment['OUTPUT_LFN'] = output_lfn
            environment['OUTPUT_JOBID'] = output_jobid
            environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']
            environment['DQ2_OUTPUTFILE_NAMELENGTH'] = str(
                configDQ2['OUTPUTFILE_NAMELENGTH'])
            if job.outputdata.use_shortfilename:
                environment['GANGA_SHORTFILENAME'] = '1'
            else:
                environment['GANGA_SHORTFILENAME'] = ''
            try:
                environment['GANGA_GLITE_UI'] = configLCG['GLITE_SETUP']
            except:
                pass
            environment['DQ2_OUTPUT_SPACE_TOKENS'] = ':'.join(
                configDQ2['DQ2_OUTPUT_SPACE_TOKENS'])
            environment['DQ2_BACKUP_OUTPUT_LOCATIONS'] = ':'.join(
                configDQ2['DQ2_BACKUP_OUTPUT_LOCATIONS'])

        # CN: extra condition for TNTSplitter
        if job._getRoot().splitter and job._getRoot(
        ).splitter._name == 'TNTJobSplitter':
            # set up dq2 environment
            datasetname = job.inputdata.dataset
            environment['DATASETNAME'] = ':'.join(datasetname)
            environment['DATASETLOCATION'] = ':'.join(
                job.inputdata.get_locations())
            environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']
            #environment['DATASETTYPE']=job.inputdata.type
            # At present, DQ2 download is the only thing that works
            environment['DATASETTYPE'] = "DQ2_DOWNLOAD"
            if job.inputdata.accessprotocol:
                environment[
                    'DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol
            if job.inputsandbox: inputbox += job.inputsandbox

        # Fix DATASETNAME env variable for DQ2_COPY mode
        if job.inputdata and job.inputdata._name in [
                'DQ2Dataset'
        ] and job.inputdata.type in ['DQ2_LOCAL', 'DQ2_COPY', 'FILE_STAGER']:
            if job.inputdata.dataset:
                from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import resolve_container
                datasets = resolve_container(job.inputdata.dataset)
                environment['DATASETNAME'] = datasets[0]
                try:
                    environment['DATASETLOCATION'] = ':'.join(
                        job.inputdata.get_locations(
                            overlap=False)[datasets[0]])
                except:
                    printout = 'Job submission failed ! Dataset %s could not be found in DQ2 ! Maybe retry ?' % (
                        datasets[0])
                    raise ApplicationConfigurationError(None, printout)

        if job.inputdata and job.inputdata._name == 'ATLASTier3Dataset':
            environment['DATASETTYPE'] = 'TIER3'

        # USE_POOLFILECATALOG_FAILOVER of Local/ATLASLocalDataset
        if job.inputdata and job.inputdata._name == 'ATLASLocalDataset':
            if job.inputdata.use_poolfilecatalog_failover:
                environment['USE_POOLFILECATALOG_FAILOVER'] = '1'

        # CREATE_POOLFILECATALOG of Local/ATLASLocalDataset
        environment['CREATE_POOLFILECATALOG'] = '1'
        if job.inputdata and job.inputdata._name == 'ATLASLocalDataset':
            if not job.inputdata.create_poolfilecatalog:
                environment['CREATE_POOLFILECATALOG'] = '0'

        # Write trf parameters
        trf_params = ' '
        for key, value in job.application.trf_parameter.iteritems():
            if key == 'dbrelease':
                environment['DBDATASETNAME'] = value.split(':')[0]
                environment['DBFILENAME'] = value.split(':')[1]
            else:
                trf_params = trf_params + key + '=' + str(value) + ' '
        if trf_params != ' ' and job.application.atlas_exetype == 'TRF':
            _append_file_buffer(inputbox, 'trf_params', [trf_params])
            if not 'db_dq2localid.py' in [
                    os.path.basename(file.name) for file in inputbox
            ]:
                _append_files(inputbox, 'db_dq2localid.py')

        # set RecExCommon options
        environment['RECEXTYPE'] = job.application.recex_type

        # Athena run dir
        if job.application.atlas_exetype == "ATHENA" and job.application.atlas_run_dir != "":
            environment['ATLAS_RUN_DIR'] = job.application.atlas_run_dir

        # Set DQ2_LOCAL_SITE_ID
        if hasattr(job.backend, 'extraopts'):
            if job.backend.extraopts.find('site=hh') > 0:
                environment['DQ2_LOCAL_SITE_ID'] = 'DESY-HH_SCRATCHDISK'
                environment[
                    'GANGA_LCG_CE'] = 'grid-ce5.desy.de:2119'  # hack for FILE_STAGER at NAF
            elif job.backend.extraopts.find('site=zn') > 0:
                environment['DQ2_LOCAL_SITE_ID'] = 'DESY-ZN_SCRATCHDISK'
                environment[
                    'GANGA_LCG_CE'] = 'lcg-ce0.ifh.de:2119'  # hack for FILE_STAGER at NAF
            else:
                environment['DQ2_LOCAL_SITE_ID'] = configDQ2[
                    'DQ2_LOCAL_SITE_ID']
        else:
            environment['DQ2_LOCAL_SITE_ID'] = configDQ2['DQ2_LOCAL_SITE_ID']

        return StandardJobConfig(File(exe), inputbox, [], outputbox,
                                 environment)

Example #2

Show file

File: AthenaLocalRTHandler.py Project: datty/ganga

    def prepare(self,app,appsubconfig,appmasterconfig,jobmasterconfig):
        """prepare the subjob specific configuration"""

        job = app._getParent() # Returns job or subjob object
        logger.debug("AthenaLocalRTHandler prepare called, %s", job.id )

        input_files = []
        input_guids = []
        input_tag_files = []
        input_tag_guids = []
        input_esd_files = []
        input_esd_guids = []

        # If job has inputdata
        if job.inputdata:

            # DQ2Dataset, ATLASLocalDataset and ATLASCastorDataset job splitting is done in AthenaSplitterJob

            if job._getRoot().subjobs:
                if job.inputdata._name == 'ATLASLocalDataset' or job.inputdata._name == 'ATLASCastorDataset':
                    if not job.inputdata.names: raise ApplicationConfigurationError(None,'No inputdata has been specified.')
                    input_files = job.inputdata.names

                elif job.inputdata._name == 'ATLASDataset':
                    if not job.inputdata.lfn: raise ApplicationConfigurationError(None,'No inputdata has been specified.') 
                    input_files = job.inputdata.lfn

                elif job.inputdata._name == 'ATLASTier3Dataset':
                    if not job.inputdata.names:
                        raise ApplicationConfigurationError(None,'No inputdata has been specified.') 
                    if job.inputdata.names:
                        input_files = job.inputdata.names
                        input_guids = input_files

                elif job.inputdata._name == 'DQ2Dataset':
                    if not job.inputdata.names: raise ApplicationConfigurationError(None,'No inputdata has been specified.')
                    input_guids = job.inputdata.guids
                    input_files = job.inputdata.names
                    if not job.inputdata.type in ['DQ2_LOCAL', 'FILE_STAGER', 'LFC', 'TAG', 'TNT_LOCAL', 'TNT_DOWNLOAD' ]:
                        job.inputdata.type ='DQ2_LOCAL'
       
            else:
                if job.inputdata._name == 'ATLASCastorDataset':
                    input_files = ATLASCastorDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASLocalDataset':
                    input_files = ATLASLocalDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASDataset':
                    input_files = ATLASDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASTier3Dataset':
                    if job.inputdata.names:
                        input_files = job.inputdata.names
                        input_guids = input_files
                    elif job.inputdata.pfnListFile:
                        logger.info('Loading file names from %s'%job.inputdata.pfnListFile.name)
                        pfnListFile = open(job.inputdata.pfnListFile.name)
                        job.inputdata.names = [ line.strip() for line in pfnListFile]
                        pfnListFile.close()
                        input_files = job.inputdata.names
                        input_guids = input_files
                    else:
                        raise ApplicationConfigurationError(None,'No inputdata has been specified.') 

                elif job.inputdata._name == 'DQ2Dataset':
                    if not job.inputdata.type in ['DQ2_LOCAL', 'FILE_STAGER', 'LFC', 'TAG', 'TNT_LOCAL', 'TNT_DOWNLOAD' ]:
                        job.inputdata.type ='DQ2_LOCAL'

                    contents = job.inputdata.get_contents()
                    input_files = [ lfn  for guid, lfn in contents ]
                    input_guids = [ guid for guid, lfn in contents ]

                    if job.inputdata.tagdataset:
                        tag_contents = job.inputdata.get_tag_contents()
                        input_tag_files = [ lfn  for guid, lfn in tag_contents ]
                        input_tag_guids = [ guid for guid, lfn in tag_contents ] 
                    if job.inputdata.use_aodesd_backnav:
                        esd_contents = job.inputdata.get_contents(backnav=True)
                        input_esd_files = [ lfn  for guid, lfn in esd_contents ]
                        input_esd_guids = [ guid for guid, lfn in esd_contents ]                        

                    job.inputdata.names = input_files          
                    job.inputdata.guids = input_guids          
 
        # Outputdataset
        output_location=''
        if job.outputdata:
            
            if job.outputdata._name=='DQ2OutputDataset':

                if job.outputdata.location:
                    if isDQ2SRMSite(job.outputdata.location):
                        output_location = job.outputdata.location
                    else:
                        logger.warning('Unknown output location %s.',job.outputdata.location)
                elif job._getRoot().subjobs and job._getRoot().outputdata.location:
                    if isDQ2SRMSite(job._getRoot().outputdata.location):
                        output_location = job._getRoot().outputdata.location
                    else:
                        logger.warning('Unknown output location %s.',job.getRoot().outputdata.location)
                        
                logger.debug('Output: %s,%s',output_location, job.outputdata.location)

            elif job.outputdata.location=='' and job.outputdata._name=='DQ2OutputDataset':
                output_location = ''
            elif job.outputdata.location:
                output_location = expandfilename(job.outputdata.location)
            else:
                try:
                    output_location=config['LocalOutputLocation']
                    if job.outputdata:
                        job.outputdata.location = expandfilename(output_location)
                except ConfigError:
                    logger.warning('No default output location specified in the configuration.')
        else:
            try:
                output_location=config['LocalOutputLocation']
            except ConfigError:
                logger.warning('No default output location specified in the configuration.')

        if job._getRoot().subjobs:
            jid = "%d.%d" % (job._getRoot().id, job.id)
        else:
            jid = "%d" % job.id

        if output_location and job.outputdata and job.outputdata._name!='DQ2OutputDataset':

            if job._getRoot().subjobs:
                if config['NoSubDirsAtAllForLocalOutput']:
                    output_location = output_location
                elif config['SingleDirForLocalOutput']:
                    output_location = os.path.join(output_location, "%d" % (job._getRoot().id))
                elif config['IndividualSubjobDirsForLocalOutput']:
                    output_location = os.path.join(output_location, "%d/%d" % (job._getRoot().id, job.id))
                else:
                    output_location = os.path.join(output_location, jid)
                
            if job.outputdata:
                # Remove trailing number if job is copied
                pat = re.compile(r'\/[\d\.]+\/[\d\.]+$')
                if re.findall(pat,output_location):
                    output_location = re.sub(pat, '', output_location)

                    if config['NoSubDirsAtAllForLocalOutput']:
                        output_location = output_location
                    elif config['SingleDirForLocalOutput']:
                        output_location = os.path.join(output_location, "%d" % (job._getRoot().id))
                    elif config['IndividualSubjobDirsForLocalOutput']:
                        output_location = os.path.join(output_location, "%d/%d" % (job._getRoot().id, job.id))
                    else:
                        output_location = os.path.join(output_location, jid)
                    
                job.outputdata.location = output_location

        if job.outputdata and job.outputdata._name=='DQ2OutputDataset':

            # output dataset name from master_prepare
            output_datasetname = self.output_datasetname
            output_lfn = self.output_lfn

            output_jobid = jid
            # Set subjob datasetname
            job.outputdata.datasetname=output_datasetname
            # Set master job datasetname
            if job._getRoot().subjobs:
                job._getRoot().outputdata.datasetname=output_datasetname
            # Create output dataset -> moved to the worker node code !
            if not job.outputdata.dataset_exists(output_datasetname):
                if job._getRoot().subjobs:
                    if job.id==0:
                        #job.outputdata.create_dataset(output_datasetname)
                        pass
                else:
                    #job.outputdata.create_dataset(output_datasetname)
                    pass
            else:
                if (job._getRoot().subjobs and job.id==0) or not job._getRoot().subjobs:
                    logger.warning("Dataset %s already exists - appending new files to this dataset", output_datasetname)
                    output_location = job.outputdata.get_locations(datasetname=output_datasetname, quiet=True)
                    logger.debug('Output3: %s,%s',output_location, job.outputdata.location)
                    if output_location:
                        output_location = output_location[0] 
                        if job._getRoot().subjobs:
                            job._getRoot().outputdata.location=output_location
                            job.outputdata.location=output_location
                        else:
                            job.outputdata.location=output_location
                            
                    logger.debug('Output4: %s,%s',output_location, job.outputdata.location)

        inputbox = [File(os.path.join(os.path.dirname(__file__),'athena-utility.sh'))]
                
        if input_guids:
            inputbox += [ FileBuffer('input_guids','\n'.join(input_guids)+'\n') ]

        if input_files: 
            inputbox += [ FileBuffer('input_files','\n'.join(input_files)+'\n') ]

        if input_tag_guids:
            inputbox += [ FileBuffer('input_tag_guids','\n'.join(input_tag_guids)+'\n') ]

        if input_tag_files: 
            inputbox += [ FileBuffer('input_tag_files','\n'.join(input_tag_files)+'\n') ]

        if input_esd_guids:
            inputbox += [ FileBuffer('input_esd_guids','\n'.join(input_esd_guids)+'\n') ]

        if input_esd_files: 
            inputbox += [ FileBuffer('input_esd_files','\n'.join(input_esd_files)+'\n') ]

        # check for output data given in prepare info
        if job.outputdata and job.application.atlas_exetype == "ATHENA":
            for of in job.application.atlas_run_config['output']['alloutputs']:
                if not of in job.outputdata.outputdata:
                    job.outputdata.outputdata.append(of)
            
        if job.outputdata and job.outputdata.outputdata:
            inputbox += [ FileBuffer('output_files','\n'.join(job.outputdata.outputdata)+'\n') ]
        elif job.outputdata and not job.outputdata.outputdata:
            raise ApplicationConfigurationError(None,'j.outputdata.outputdata is empty - Please specify output filename(s).')
   
        exe = os.path.join(os.path.dirname(__file__),'run-athena-local.sh')
        outputbox = jobmasterconfig.outputbox
        environment = jobmasterconfig.env.copy()

        ## create and add sample files for FileStager
        if job.inputdata and job.inputdata._name == 'StagerDataset':

            if not job.inputdata.dataset:
                raise ApplicationConfigurationError(None,'dataset name not specified in job.inputdata')

            ## ship fs-copy.py with the job as it's going to be used as a copy command wrapper by FileStager
            inputbox += [ File( os.path.join( os.path.dirname(__file__), 'fs-copy.py') ) ]

            (jo_path, ic_path) = job.inputdata.make_FileStager_jobOptions(job=job, max_events=app.max_events)
            inputbox += [ File(jo_path), File(ic_path) ]

            ## re-make the environment['ATHENA_OPTIONS']
            athena_options = os.path.basename( File(jo_path).name )
            for option_file in app.option_file:
                athena_option = os.path.basename(option_file.name)
                athena_options += ' ' + athena_option
                if app.options:
                    athena_options =  app.options + ' ' + athena_options

            environment['ATHENA_OPTIONS'] = athena_options
            environment['DATASETTYPE']    = 'FILE_STAGER'

            ## ask to send back the FileStager.out/err generated by fs-copy.py
            outputbox += ['FileStager.out', 'FileStager.err']

        # If ArgSplitter is used
        try:
            if job.application.args:
                environment['ATHENA_OPTIONS'] = environment['ATHENA_OPTIONS'] + ' ' + ' '.join(job.application.args)
                if job.application.options:
                    job.application.options = job.application.options + ' ' + job.application.args
                else:
                    job.application.options=job.application.args
        except AttributeError:
            pass

        if job.outputdata and job.outputdata._name=='DQ2OutputDataset' and output_location == [ ]:
            raise ApplicationConfigurationError(None,'j.outputdata.outputdata is empty - Please specify output filename(s).')

        # set EOS env setting
        environment['EOS_COMMAND_PATH'] = config['PathToEOSBinary']

        # flag for single output dir
        if (config['SingleDirForLocalOutput'] or config['NoSubDirsAtAllForLocalOutput']) and job._getParent():
            environment['SINGLE_OUTPUT_DIR'] = jid

            # change the filename
            newoutput = []
            for outf in job.outputdata.outputdata:
                newfile, newfileExt = os.path.splitext(outf)
                jid = "%d.%d" % (job._getParent().id, job.id)
                newoutput.append("%s.%s%s" % (newfile, jid, newfileExt) )               

            job.outputdata.outputdata = newoutput[:]
            
        environment['OUTPUT_LOCATION'] = output_location
        if job.outputdata and job.outputdata._name == 'DQ2OutputDataset':
            environment['OUTPUT_DATASETNAME'] = output_datasetname
            environment['OUTPUT_LFN'] = output_lfn
            environment['OUTPUT_JOBID'] = output_jobid
            environment['DQ2_URL_SERVER']=configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL']=configDQ2['DQ2_URL_SERVER_SSL']
            environment['DQ2_OUTPUTFILE_NAMELENGTH'] = str(configDQ2['OUTPUTFILE_NAMELENGTH'])
            if job.outputdata.use_shortfilename:
                environment['GANGA_SHORTFILENAME'] = '1'
            else:
                environment['GANGA_SHORTFILENAME'] = ''
            try:
                environment['GANGA_GLITE_UI']=configLCG['GLITE_SETUP']
            except:
                pass
            environment['DQ2_OUTPUT_SPACE_TOKENS']= ':'.join(configDQ2['DQ2_OUTPUT_SPACE_TOKENS'])
            environment['DQ2_BACKUP_OUTPUT_LOCATIONS']= ':'.join(configDQ2['DQ2_BACKUP_OUTPUT_LOCATIONS'])
            
        # CN: extra condition for TNTSplitter
        if job._getRoot().splitter and job._getRoot().splitter._name == 'TNTJobSplitter':
            # set up dq2 environment
            datasetname = job.inputdata.dataset
            environment['DATASETNAME']= ':'.join(datasetname) 
            environment['DATASETLOCATION'] = ':'.join(job.inputdata.get_locations())
            environment['DQ2_URL_SERVER']=configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL']=configDQ2['DQ2_URL_SERVER_SSL']
            #environment['DATASETTYPE']=job.inputdata.type
            # At present, DQ2 download is the only thing that works
            environment['DATASETTYPE']="DQ2_DOWNLOAD"
            if job.inputdata.accessprotocol:
                 environment['DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol
            if job.inputsandbox: inputbox += job.inputsandbox   

        # Fix DATASETNAME env variable for DQ2_COPY mode
        if job.inputdata and job.inputdata._name in [ 'DQ2Dataset' ] and job.inputdata.type in [ 'DQ2_LOCAL', 'DQ2_COPY', 'FILE_STAGER' ]:
            if job.inputdata.dataset:
                from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import resolve_container
                datasets = resolve_container(job.inputdata.dataset) 
                environment['DATASETNAME'] = datasets[0]
                try:
                    environment['DATASETLOCATION'] = ':'.join(job.inputdata.get_locations(overlap=False)[ datasets[0] ])
                except:
                    printout = 'Job submission failed ! Dataset %s could not be found in DQ2 ! Maybe retry ?' %(datasets[0])
                    raise ApplicationConfigurationError(None,printout )


        if job.inputdata and job.inputdata._name == 'ATLASTier3Dataset':
            environment['DATASETTYPE'] = 'TIER3'


            
        # USE_POOLFILECATALOG_FAILOVER of Local/ATLASLocalDataset
        if job.inputdata and job.inputdata._name == 'ATLASLocalDataset':
            if job.inputdata.use_poolfilecatalog_failover:
                environment['USE_POOLFILECATALOG_FAILOVER'] = '1'

        # CREATE_POOLFILECATALOG of Local/ATLASLocalDataset
        environment['CREATE_POOLFILECATALOG'] = '1'
        if job.inputdata and job.inputdata._name == 'ATLASLocalDataset':
            if not job.inputdata.create_poolfilecatalog:
                environment['CREATE_POOLFILECATALOG'] = '0'
                
        # Write trf parameters
        trf_params = ' '
        for key, value in job.application.trf_parameter.iteritems():
            if key == 'dbrelease':
                environment['DBDATASETNAME'] = value.split(':')[0]
                environment['DBFILENAME'] = value.split(':')[1]
            else:
                trf_params = trf_params + key + '=' + str(value) + ' '
        if trf_params!=' ' and job.application.atlas_exetype=='TRF':
           _append_file_buffer(inputbox,'trf_params', [ trf_params ]) 
           if not 'db_dq2localid.py' in [ os.path.basename(file.name) for file in inputbox ]:
               _append_files(inputbox, 'db_dq2localid.py')

        # set RecExCommon options
        environment['RECEXTYPE'] = job.application.recex_type

        # Athena run dir
        if job.application.atlas_exetype == "ATHENA" and job.application.atlas_run_dir != "":
            environment['ATLAS_RUN_DIR'] = job.application.atlas_run_dir
            
        # Set DQ2_LOCAL_SITE_ID
        if hasattr(job.backend, 'extraopts'):
            if job.backend.extraopts.find('site=hh')>0:
                environment['DQ2_LOCAL_SITE_ID'] = 'DESY-HH_SCRATCHDISK'
                environment['GANGA_LCG_CE'] = 'grid-ce5.desy.de:2119' # hack for FILE_STAGER at NAF
            elif job.backend.extraopts.find('site=zn')>0:
                environment['DQ2_LOCAL_SITE_ID'] = 'DESY-ZN_SCRATCHDISK'
                environment['GANGA_LCG_CE'] = 'lcg-ce0.ifh.de:2119' # hack for FILE_STAGER at NAF
            else:
                environment['DQ2_LOCAL_SITE_ID'] = configDQ2['DQ2_LOCAL_SITE_ID']
        else:
            environment['DQ2_LOCAL_SITE_ID'] = configDQ2['DQ2_LOCAL_SITE_ID']

        return StandardJobConfig(File(exe), inputbox, [], outputbox, environment)

Example #3

Show file

File: AthenaLCGRTHandler.py Project: slangrock/ganga

    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):
        """prepare the subjob specific configuration"""

        job = app._getParent()  # Returns job or subjob object
        logger.debug("AthenaLCGRTHandler prepare called, %s", job.id)

        #       prepare inputdata

        input_files = []
        input_guids = []
        input_tag_files = []
        input_tag_guids = []
        input_esd_files = []
        input_esd_guids = []
        add_files = []

        if job.inputdata:

            # DQ2Dataset, ATLASLocalDataset and ATLASCastorDataset job splitting is done in AthenaSplitterJob

            if job._getRoot().subjobs:
                if job.inputdata._name == 'ATLASLocalDataset' or job.inputdata._name == 'ATLASCastorDataset':
                    if not job.inputdata.names:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')
                    input_files = job.inputdata.names

                elif job.inputdata._name == 'ATLASDataset':
                    if not job.inputdata.lfn:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')
                    input_files = job.inputdata.lfn

                elif job.inputdata._name == 'ATLASTier3Dataset':
                    if not job.inputdata.names:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')
                    if job.inputdata.names:
                        input_files = job.inputdata.names
                        input_guids = input_files

                elif job.inputdata._name in [
                        'DQ2Dataset', 'AMIDataset', 'EventPicking'
                ]:
                    if not job.inputdata.names:
                        raise ApplicationConfigurationError(
                            None,
                            'No inputdata has been specified. Failure in job %s.%s. Dataset %s'
                            %
                            (job._getRoot().id, job.id, job.inputdata.dataset))
                    input_guids = job.inputdata.guids
                    input_files = job.inputdata.names

                    if job.inputdata.tag_info:

                        # check for conflicts with TAG_LOCAL or TAG_COPY
                        if job.inputdata.type in ['TAG_LOCAL', 'TAG_COPY']:
                            raise ApplicationConfigurationError(
                                None,
                                "Cannot provide both tag_info and run as '%s'. Please use one or the other!"
                                % job.inputdata.type)

                        # check if FILE_STAGER is used
                        if job.inputdata.type == 'FILE_STAGER':
                            logger.warning(
                                "TAG jobs currently can't use the FILE_STAGER. Switching to DQ2_COPY instead."
                            )
                            job.inputdata.type = 'DQ2_COPY'

                        # add additional file info for tags
                        for tag_file in job.inputdata.tag_info:
                            for ref in job.inputdata.tag_info[tag_file][
                                    'refs']:
                                add_files.append(ref[1] + ':' + ref[0] + ':' +
                                                 ref[2])

                    if not job.inputdata.type in [
                            'DQ2_LOCAL', 'LFC', 'TAG', 'TNT_LOCAL',
                            'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER',
                            'TAG_LOCAL', 'TAG_COPY'
                    ]:
                        job.inputdata.type = 'DQ2_LOCAL'
                    if not job.inputdata.datatype in [
                            'DATA', 'MC', 'MuonCalibStream'
                    ]:
                        job.inputdata.datatype = 'MC'

            else:
                if job.inputdata._name == 'ATLASCastorDataset':
                    input_files = ATLASCastorDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASLocalDataset':
                    input_files = ATLASLocalDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASDataset':
                    input_files = ATLASDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASTier3Dataset':
                    if job.inputdata.names:
                        input_files = job.inputdata.names
                        input_guids = input_files
                    elif job.inputdata.pfnListFile:
                        logger.info('Loading file names from %s' %
                                    job.inputdata.pfnListFile.name)
                        pfnListFile = open(job.inputdata.pfnListFile.name)
                        job.inputdata.names = [
                            line.strip() for line in pfnListFile
                        ]
                        pfnListFile.close()
                        input_files = job.inputdata.names
                        input_guids = input_files
                    else:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')

                elif job.inputdata._name in [
                        'DQ2Dataset', 'AMIDataset', 'EventPicking'
                ]:
                    if not job.inputdata.type in [
                            'DQ2_LOCAL', 'LFC', 'TAG', 'TNT_LOCAL',
                            'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER',
                            'TAG_LOCAL', 'TAG_COPY'
                    ]:
                        job.inputdata.type = 'DQ2_LOCAL'
                    if not job.inputdata.datatype in [
                            'DATA', 'MC', 'MuonCalibStream'
                    ]:
                        job.inputdata.datatype = 'MC'

                    input_guids, input_files = _splitlist(
                        job.inputdata.get_contents())

                    if job.inputdata.tagdataset:
                        input_tag_guids, input_tag_files = _splitlist(
                            job.inputdata.get_tag_contents())
                    if job.inputdata.use_aodesd_backnav:
                        input_esd_guids, input_esd_files = _splitlist(
                            job.inputdata.get_contents(backnav=True))

                    job.inputdata.names = input_files
                    job.inputdata.guids = input_guids

#       prepare outputdata

        output_location = ''
        if job.outputdata:

            if job.outputdata._name == 'DQ2OutputDataset':

                if job.outputdata.location:
                    if type(job.outputdata.location) == str and isDQ2SRMSite(
                            job.outputdata.location):
                        output_location = job.outputdata.location
                    else:
                        logger.warning('Unknown output location %s.',
                                       job.outputdata.location)

                    #if job.backend.requirements._name == 'AtlasLCGRequirements':
                    #    if job.backend.requirements.cloud:
                    #        if whichCloud(output_location) != job.backend.requirements.cloud:
                    #            printout = 'Job submission failed ! j.outputdata.location=%s is not in the same cloud as j.backend.requirements.cloud=%s' %(job.outputdata.location, job.backend.requirements.cloud )
                    #            raise ApplicationConfigurationError(None, printout)
                    #    if job.backend.requirements.sites:
                    #        if whichCloud(output_location) != whichCloud(job.backend.requirements.sites[0]):
                    #            printout = 'Job submission failed ! j.outputdata.location=%s is not in the same cloud as j.backend.requirements.sites=%s'%(job.outputdata.location, job.backend.requirements.sites)
                    #            raise ApplicationConfigurationError(None,printout )

                elif job._getRoot().subjobs and job._getRoot(
                ).outputdata.location:
                    if isDQ2SRMSite(job._getRoot().outputdata.location):
                        output_location = job._getRoot().outputdata.location
                    else:
                        logger.warning('Unknown output location %s.',
                                       job.getRoot().outputdata.location)

                logger.debug('Output: %s,%s', output_location,
                             job.outputdata.location)
            else:
                if job.outputdata.location:
                    output_location = job.outputdata.location
                else:
                    try:
                        output_location = config['LCGOutputLocation']
                    except ConfigError:
                        logger.warning(
                            'No default output location specified in the configuration.'
                        )
            if job.outputdata.location:
                job.outputdata.location = output_location
                logger.debug('Output: %s,%s', output_location,
                             job.outputdata.location)

        if job._getRoot().subjobs:
            jid = "%d.%d" % (job._getRoot().id, job.id)
        else:
            jid = "%d" % job.id

        if output_location and job.outputdata and job.outputdata._name != 'DQ2OutputDataset':
            output_location = os.path.join(output_location, jid)
            if job.outputdata:
                # Remove trailing number if job is copied

                pat = re.compile(r'\/[\d\.]+\/[\d\.]+$')
                if re.findall(pat, output_location):
                    output_location = re.sub(pat, '', output_location)
                    output_location = os.path.join(output_location, jid)

                job.outputdata.location = output_location

        if job.outputdata and job.outputdata._name == 'DQ2OutputDataset':

            # output dataset name from master_prepare
            output_datasetname = self.output_datasetname
            output_lfn = self.output_lfn

            output_jobid = jid
            # Set subjob datasetname
            job.outputdata.datasetname = output_datasetname
            # Set master job datasetname
            if job._getRoot().subjobs:
                job._getRoot().outputdata.datasetname = output_datasetname
            # Create output dataset -> moved to the worker node code !
            if not job.outputdata.dataset_exists(output_datasetname):
                if job._getRoot().subjobs:
                    if job.id == 0:
                        #job.outputdata.create_dataset(output_datasetname)
                        pass
                else:
                    #job.outputdata.create_dataset(output_datasetname)
                    pass
                if output_location and configDQ2['USE_STAGEOUT_SUBSCRIPTION']:
                    job.outputdata.create_subscription(output_datasetname,
                                                       output_location)

            else:
                if (job._getRoot().subjobs
                        and job.id == 0) or not job._getRoot().subjobs:
                    logger.warning(
                        "Dataset %s already exists - appending new files to this dataset",
                        output_datasetname)
                    output_location = job.outputdata.get_locations(
                        datasetname=output_datasetname, quiet=True)
                    logger.debug('Output3: %s,%s', output_location,
                                 job.outputdata.location)
                    if output_location:
                        output_location = output_location[0]
                        if job._getRoot().subjobs:
                            job._getRoot(
                            ).outputdata.location = output_location
                            job.outputdata.location = output_location
                        else:
                            job.outputdata.location = output_location

                    logger.debug('Output4: %s,%s', output_location,
                                 job.outputdata.location)

        if getConfig('LCG')['JobLogHandler'] == 'DQ2' and (
                not job.outputdata or
            (job.outputdata and job.outputdata._name != 'DQ2OutputDataset')):
            raise ApplicationConfigurationError(
                None,
                'Staging of log files in DQ2 requested, but DQ2 output dataset not specified.'
            )

#       prepare inputsandbox

        inputbox = [File(os.path.join(__directory__, 'athena-utility.sh'))]
        if input_guids:
            _append_file_buffer(inputbox, 'input_guids', input_guids)
        if input_files:
            _append_file_buffer(inputbox, 'input_files', input_files)
        if add_files: _append_file_buffer(inputbox, 'add_files', add_files)
        if input_tag_guids:
            _append_file_buffer(inputbox, 'input_tag_guids', input_tag_guids)
        if input_tag_files:
            _append_file_buffer(inputbox, 'input_tag_files', input_tag_files)
        if input_esd_guids:
            _append_file_buffer(inputbox, 'input_esd_guids', input_esd_guids)
        if input_esd_files:
            _append_file_buffer(inputbox, 'input_esd_files', input_esd_files)

        if job.inputdata and job.inputdata._name == 'DQ2Dataset':
            for tag_file in job.inputdata.tag_info:
                if job.inputdata.tag_info[tag_file]['path'] != '':
                    inputbox.append(
                        File(
                            os.path.join(
                                job.inputdata.tag_info[tag_file]['path'],
                                tag_file)))

        # check for output data given in prepare info
        if job.outputdata and job.application.atlas_exetype == "ATHENA":
            for of in job.application.atlas_run_config['output']['alloutputs']:
                if not of in job.outputdata.outputdata:
                    job.outputdata.outputdata.append(of)

        if job.outputdata and job.outputdata.outputdata:
            _append_file_buffer(inputbox, 'output_files',
                                job.outputdata.outputdata)
        elif job.outputdata and not job.outputdata.outputdata:
            raise ApplicationConfigurationError(
                None,
                'j.outputdata.outputdata is empty - Please specify output filename(s).'
            )

        exe = os.path.join(__directory__, 'run-athena-lcg.sh')
        outputbox = jobmasterconfig.outputbox
        requirements = jobmasterconfig.requirements.__copy__()
        environment = jobmasterconfig.env.copy()
        # If ArgSplitter is used
        try:
            if job.application.args:
                environment['ATHENA_OPTIONS'] = environment[
                    'ATHENA_OPTIONS'] + ' ' + ' '.join(job.application.args)
                if job.application.options:
                    job.application.options = job.application.options + ' ' + job.application.args
                else:
                    job.application.options = job.application.args
        except AttributeError:
            pass

        if output_location and output_location.find(
                '/castor/cern.ch/grid/atlas/t0') >= 0:
            raise ApplicationConfigurationError(
                None,
                'You are try to save the output to TIER0DISK - please use another area !'
            )
        if not output_location:
            output_location = ''
        if configDQ2['USE_STAGEOUT_SUBSCRIPTION']:
            output_location = ''
        environment['OUTPUT_LOCATION'] = output_location
        environment['ATLASOutputDatasetLFC'] = config['ATLASOutputDatasetLFC']
        if job.outputdata and job.outputdata._name == 'DQ2OutputDataset':
            environment['OUTPUT_DATASETNAME'] = output_datasetname
            environment['OUTPUT_LFN'] = output_lfn
            environment['OUTPUT_JOBID'] = output_jobid
            environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']
            environment['DQ2_OUTPUTFILE_NAMELENGTH'] = configDQ2[
                'OUTPUTFILE_NAMELENGTH']
            if job.outputdata.use_shortfilename:
                environment['GANGA_SHORTFILENAME'] = '1'
            else:
                environment['GANGA_SHORTFILENAME'] = ''

            environment['DQ2_OUTPUT_SPACE_TOKENS'] = ':'.join(
                configDQ2['DQ2_OUTPUT_SPACE_TOKENS'])
            environment['DQ2_BACKUP_OUTPUT_LOCATIONS'] = ':'.join(
                configDQ2['DQ2_BACKUP_OUTPUT_LOCATIONS'])

        # CN: extra condition for TNTSplitter
        if job._getRoot().splitter and job._getRoot(
        ).splitter._name == 'TNTJobSplitter':
            # set up dq2 environment
            datasetname = job.inputdata.dataset
            environment['DATASETNAME'] = ':'.join(datasetname)
            environment['DATASETLOCATION'] = ':'.join(
                job.inputdata.get_locations())
            environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']
            environment['DATASETTYPE'] = job.inputdata.type
            environment['DATASETDATATYPE'] = job.inputdata.datatype
            if job.inputdata.accessprotocol:
                environment[
                    'DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol
            if job.inputsandbox: inputbox += job.inputsandbox

        if job.inputdata and job.inputdata._name == 'DQ2Dataset' and job.inputdata.tag_info:
            if job.inputdata.tag_info[job.inputdata.tag_info.keys(
            )[0]]['dataset'] != '' and job.inputdata.tag_info[tag_file][
                    'path'] == '':
                environment['TAG_TYPE'] = 'DQ2'
            else:
                environment['TAG_TYPE'] = 'LOCAL'

        # Fix DATASETNAME env variable for DQ2_COPY mode
        if job.inputdata and (job.inputdata._name in [
                'DQ2Dataset', 'AMIDataset', 'EventPicking'
        ]) and (job.inputdata.type in [
                'DQ2_LOCAL', 'DQ2_COPY', 'FILE_STAGER', 'TAG_LOCAL', 'TAG_COPY'
        ]):
            if job.inputdata.dataset:
                from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import resolve_container
                datasets = resolve_container(job.inputdata.dataset)
                environment['DATASETNAME'] = datasets[0]
                try:
                    environment['DATASETLOCATION'] = ':'.join(
                        job.inputdata.get_locations(
                            overlap=False)[datasets[0]])
                except:
                    printout = 'Job submission failed ! Dataset %s could not be found in DQ2 ! Maybe retry ?' % (
                        datasets[0])
                    raise ApplicationConfigurationError(None, printout)

        if job.inputdata and job.inputdata._name == 'ATLASTier3Dataset':
            environment['DATASETTYPE'] = 'TIER3'

        # Work around for glite WMS spaced environement variable problem
        inputbox.append(
            FileBuffer('athena_options', environment['ATHENA_OPTIONS'] + '\n'))

        # Write trf parameters
        trf_params = ' '
        for key, value in job.application.trf_parameter.iteritems():
            if key == 'dbrelease':
                environment['DBDATASETNAME'] = value.split(':')[0]
                environment['DBFILENAME'] = value.split(':')[1]
            else:
                trf_params = trf_params + key + '=' + str(value) + ' '
        if trf_params != ' ' and job.application.atlas_exetype == 'TRF':
            _append_file_buffer(inputbox, 'trf_params', [trf_params])

        # set RecExCommon options
        environment['RECEXTYPE'] = job.application.recex_type

        # event based splitting:  set max_events and skip_events
        if job._getRoot().splitter and hasattr(
                job._getRoot().splitter,
                'numevtsperjob') and job._getRoot().splitter.numevtsperjob > 0:
            environment['ATHENA_MAX_EVENTS'] = str(job.application.max_events)
            environment['ATHENA_SKIP_EVENTS'] = str(
                job.application.skip_events)

        # pick event
        if job._getRoot().splitter and job._getRoot(
        ).inputdata and job._getRoot().inputdata._name == 'EventPicking':
            #Replace blank space
            environment['ATHENA_RUN_EVENTS'] = str(
                job.application.run_event).replace(' ', '')
            environment['ATHENA_FILTER_POLICY'] = str(
                job.inputdata.pick_filter_policy)

# append a property for monitoring to the jobconfig of subjobs
        lcg_config = LCGJobConfig(File(exe), inputbox, [], outputbox,
                                  environment, [], requirements)
        lcg_config.monitoring_svc = mc['Athena']
        return lcg_config

Example #4

Show file

File: AthenaLCGRTHandler.py Project: Erni1619/ganga

    def prepare(self,app,appsubconfig,appmasterconfig,jobmasterconfig):
        """prepare the subjob specific configuration"""

        job = app._getParent() # Returns job or subjob object
        logger.debug("AthenaLCGRTHandler prepare called, %s", job.id)

#       prepare inputdata

        input_files = []
        input_guids = []
        input_tag_files = []
        input_tag_guids = []
        input_esd_files = []
        input_esd_guids = []
        add_files = []
       
        if job.inputdata:

            # DQ2Dataset, ATLASLocalDataset and ATLASCastorDataset job splitting is done in AthenaSplitterJob

            if job._getRoot().subjobs:
                if job.inputdata._name == 'ATLASLocalDataset':
                    if not job.inputdata.names: raise ApplicationConfigurationError('No inputdata has been specified.')
                    input_files = job.inputdata.names

                elif job.inputdata._name == 'ATLASTier3Dataset':
                    if not job.inputdata.names:
                        raise ApplicationConfigurationError('No inputdata has been specified.')
                    if job.inputdata.names:
                        input_files = job.inputdata.names
                        input_guids = input_files

                elif job.inputdata._name in [ 'DQ2Dataset', 'EventPicking']:
                    if not job.inputdata.names: raise ApplicationConfigurationError('No inputdata has been specified. Failure in job %s.%s. Dataset %s' %(job._getRoot().id, job.id, job.inputdata.dataset)  )
                    input_guids = job.inputdata.guids
                    input_files = job.inputdata.names

                    if job.inputdata.tag_info:

                        # check for conflicts with TAG_LOCAL or TAG_COPY
                        if job.inputdata.type in ['TAG_LOCAL', 'TAG_COPY']:
                            raise ApplicationConfigurationError("Cannot provide both tag_info and run as '%s'. Please use one or the other!" % job.inputdata.type)
                        
                        # check if FILE_STAGER is used
                        if job.inputdata.type == 'FILE_STAGER':
                            logger.warning("TAG jobs currently can't use the FILE_STAGER. Switching to DQ2_COPY instead.")
                            job.inputdata.type = 'DQ2_COPY'
                        
                        # add additional file info for tags
                        for tag_file in job.inputdata.tag_info:
                            for ref in job.inputdata.tag_info[tag_file]['refs']:
                                add_files.append( ref[1] + ':' + ref[0] + ':' + ref[2] )
                    
                    if not job.inputdata.type in ['DQ2_LOCAL', 'LFC', 'TAG', 'TNT_LOCAL', 'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER', 'TAG_LOCAL', 'TAG_COPY' ]:
                        job.inputdata.type ='DQ2_LOCAL'
                    if not job.inputdata.datatype in ['DATA', 'MC', 'MuonCalibStream']:
                        job.inputdata.datatype ='MC'

            else:
                if job.inputdata._name == 'ATLASLocalDataset':
                    input_files = ATLASLocalDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASTier3Dataset':
                    if job.inputdata.names:
                        input_files = job.inputdata.names
                        input_guids = input_files
                    elif job.inputdata.pfnListFile:
                        logger.info('Loading file names from %s'%job.inputdata.pfnListFile.name)
                        pfnListFile = open(job.inputdata.pfnListFile.name)
                        job.inputdata.names = [ line.strip() for line in pfnListFile]
                        pfnListFile.close()
                        input_files = job.inputdata.names 
                        input_guids = input_files
                    else:
                        raise ApplicationConfigurationError('No inputdata has been specified.')

                elif job.inputdata._name in [ 'DQ2Dataset', 'EventPicking']:
                    if not job.inputdata.type in ['DQ2_LOCAL', 'LFC', 'TAG', 'TNT_LOCAL', 'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER', 'TAG_LOCAL', 'TAG_COPY' ]:
                        job.inputdata.type ='DQ2_LOCAL'
                    if not job.inputdata.datatype in ['DATA', 'MC', 'MuonCalibStream']:
                        job.inputdata.datatype ='MC'

                    input_guids, input_files = _splitlist(job.inputdata.get_contents())

                    if job.inputdata.use_aodesd_backnav:
                        input_esd_guids, input_esd_files = _splitlist(job.inputdata.get_contents(backnav=True))

                    job.inputdata.names = input_files          
                    job.inputdata.guids = input_guids          

#       prepare outputdata
       
        output_location = ''
        if job.outputdata:

            if job.outputdata._name=='DQ2OutputDataset':

                if job.outputdata.location:
                    if type(job.outputdata.location) == str and isDQ2SRMSite(job.outputdata.location):
                        output_location = job.outputdata.location
                    else:
                        logger.warning('Unknown output location %s.',job.outputdata.location)

                    #if job.backend.requirements._name == 'AtlasLCGRequirements':
                    #    if job.backend.requirements.cloud:
                    #        if whichCloud(output_location) != job.backend.requirements.cloud:
                    #            printout = 'Job submission failed ! j.outputdata.location=%s is not in the same cloud as j.backend.requirements.cloud=%s' %(job.outputdata.location, job.backend.requirements.cloud )
                    #            raise ApplicationConfigurationError(None, printout)
                    #    if job.backend.requirements.sites:
                    #        if whichCloud(output_location) != whichCloud(job.backend.requirements.sites[0]):
                    #            printout = 'Job submission failed ! j.outputdata.location=%s is not in the same cloud as j.backend.requirements.sites=%s'%(job.outputdata.location, job.backend.requirements.sites)
                    #            raise ApplicationConfigurationError(None,printout )     
                    
                elif job._getRoot().subjobs and job._getRoot().outputdata.location:
                    if isDQ2SRMSite(job._getRoot().outputdata.location):
                        output_location = job._getRoot().outputdata.location
                    else:
                        logger.warning('Unknown output location %s.',job.getRoot().outputdata.location)
                        
                
                logger.debug('Output: %s,%s',output_location, job.outputdata.location)
            else:
                if job.outputdata.location:
                    output_location = job.outputdata.location
                else:
                    try:
                        output_location = config['LCGOutputLocation']
                    except ConfigError:
                        logger.warning('No default output location specified in the configuration.')
            if job.outputdata.location:
                job.outputdata.location = output_location 
                logger.debug('Output: %s,%s',output_location, job.outputdata.location)

        if job._getRoot().subjobs:
            jid = "%d.%d" % (job._getRoot().id, job.id)
        else:
            jid = "%d" % job.id

        if output_location and job.outputdata and job.outputdata._name!='DQ2OutputDataset':
            output_location = os.path.join(output_location, jid)
            if job.outputdata:
                # Remove trailing number if job is copied

                pat = re.compile(r'\/[\d\.]+\/[\d\.]+$')
                if re.findall(pat,output_location):
                    output_location = re.sub(pat, '', output_location)
                    output_location = os.path.join(output_location, jid)

                job.outputdata.location = output_location

        if job.outputdata and job.outputdata._name=='DQ2OutputDataset':

            # output dataset name from master_prepare
            output_datasetname = self.output_datasetname
            output_lfn = self.output_lfn

            output_jobid = jid
            # Set subjob datasetname
            job.outputdata.datasetname=output_datasetname
            # Set master job datasetname
            if job._getRoot().subjobs:
                job._getRoot().outputdata.datasetname=output_datasetname
            # Create output dataset -> moved to the worker node code !
            if not job.outputdata.dataset_exists(output_datasetname):
                if job._getRoot().subjobs:
                    if job.id==0:
                        #job.outputdata.create_dataset(output_datasetname)
                        pass
                else:
                    #job.outputdata.create_dataset(output_datasetname)
                    pass
                if output_location and configDQ2['USE_STAGEOUT_SUBSCRIPTION']:
                    job.outputdata.create_subscription(output_datasetname, output_location)    
                
            else:
                if (job._getRoot().subjobs and job.id==0) or not job._getRoot().subjobs:
                    logger.warning("Dataset %s already exists - appending new files to this dataset", output_datasetname)
                    output_location = job.outputdata.get_locations(datasetname=output_datasetname, quiet=True)
                    logger.debug('Output3: %s,%s',output_location, job.outputdata.location)
                    if output_location:
                        output_location = output_location[0] 
                        if job._getRoot().subjobs:
                            job._getRoot().outputdata.location=output_location
                            job.outputdata.location=output_location
                        else:
                            job.outputdata.location=output_location
                            
                    logger.debug('Output4: %s,%s',output_location, job.outputdata.location)

        if getConfig('LCG')['JobLogHandler'] == 'DQ2' and (not job.outputdata or (job.outputdata and job.outputdata._name != 'DQ2OutputDataset')):
            raise ApplicationConfigurationError('Staging of log files in DQ2 requested, but DQ2 output dataset not specified.')
        
#       prepare inputsandbox

        inputbox = [File(os.path.join(__directory__,'athena-utility.sh')) ]
        if input_guids:     _append_file_buffer(inputbox,'input_guids',input_guids)
        if input_files:     _append_file_buffer(inputbox,'input_files',input_files)
        if add_files:     _append_file_buffer(inputbox,'add_files',add_files)
        if input_tag_guids: _append_file_buffer(inputbox,'input_tag_guids',input_tag_guids)
        if input_tag_files: _append_file_buffer(inputbox,'input_tag_files',input_tag_files)
        if input_esd_guids: _append_file_buffer(inputbox,'input_esd_guids',input_esd_guids)
        if input_esd_files: _append_file_buffer(inputbox,'input_esd_files',input_esd_files)

        if job.inputdata and job.inputdata._name == 'DQ2Dataset':
            for tag_file in job.inputdata.tag_info:
                if job.inputdata.tag_info[tag_file]['path'] != '':
                    inputbox.append( File( os.path.join( job.inputdata.tag_info[tag_file]['path'], tag_file) ) )

        # check for output data given in prepare info
        if job.outputdata and job.application.atlas_exetype == "ATHENA":
            for of in job.application.atlas_run_config['output']['alloutputs']:
                if not of in job.outputdata.outputdata:
                    job.outputdata.outputdata.append(of)

                                        
        if job.outputdata and job.outputdata.outputdata:
            _append_file_buffer(inputbox,'output_files',job.outputdata.outputdata)
        elif job.outputdata and not job.outputdata.outputdata:
            raise ApplicationConfigurationError('j.outputdata.outputdata is empty - Please specify output filename(s).')

        exe = os.path.join(__directory__,'run-athena-lcg.sh')
        outputbox = jobmasterconfig.outputbox
        requirements = jobmasterconfig.requirements.__copy__()
        environment  = jobmasterconfig.env.copy()
        # If ArgSplitter is used
        try:
            if job.application.args:
                environment['ATHENA_OPTIONS'] = environment['ATHENA_OPTIONS'] + ' ' + ' '.join(job.application.args)
                if job.application.options:
                    job.application.options = job.application.options + ' ' + job.application.args
                else:
                    job.application.options = job.application.args
        except AttributeError:
            pass
        
        if output_location and output_location.find('/castor/cern.ch/grid/atlas/t0')>=0:
            raise ApplicationConfigurationError('You are try to save the output to TIER0DISK - please use another area !')
        if not output_location:
            output_location = ''
        if configDQ2['USE_STAGEOUT_SUBSCRIPTION']:
            output_location = ''
        environment['OUTPUT_LOCATION'] = output_location
        environment['ATLASOutputDatasetLFC'] = config['ATLASOutputDatasetLFC']
        if job.outputdata and job.outputdata._name == 'DQ2OutputDataset':
            environment['OUTPUT_DATASETNAME'] = output_datasetname
            environment['OUTPUT_LFN'] = output_lfn
            environment['OUTPUT_JOBID'] = output_jobid
            environment['DQ2_URL_SERVER']= configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']
            environment['DQ2_OUTPUTFILE_NAMELENGTH'] = configDQ2['OUTPUTFILE_NAMELENGTH']
            if job.outputdata.use_shortfilename:
                environment['GANGA_SHORTFILENAME'] = '1'
            else:
                environment['GANGA_SHORTFILENAME'] = ''
                
            environment['DQ2_OUTPUT_SPACE_TOKENS']= ':'.join(configDQ2['DQ2_OUTPUT_SPACE_TOKENS'])
            environment['DQ2_BACKUP_OUTPUT_LOCATIONS']= ':'.join(configDQ2['DQ2_BACKUP_OUTPUT_LOCATIONS'])
            
        # CN: extra condition for TNTSplitter
        if job._getRoot().splitter and job._getRoot().splitter._name == 'TNTJobSplitter':
            # set up dq2 environment
            datasetname = job.inputdata.dataset
            environment['DATASETNAME']= ':'.join(datasetname)
            environment['DATASETLOCATION'] = ':'.join(job.inputdata.get_locations())
            environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']
            environment['DATASETTYPE'] = job.inputdata.type
            environment['DATASETDATATYPE'] = job.inputdata.datatype
            if job.inputdata.accessprotocol:
                 environment['DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol
            if job.inputsandbox: inputbox += job.inputsandbox   

        if job.inputdata and job.inputdata._name == 'DQ2Dataset' and job.inputdata.tag_info:
            if job.inputdata.tag_info[job.inputdata.tag_info.keys()[0] ]['dataset'] != '' and job.inputdata.tag_info[tag_file]['path'] == '':
                environment['TAG_TYPE'] = 'DQ2'
            else:
                environment['TAG_TYPE'] = 'LOCAL'                

        # Fix DATASETNAME env variable for DQ2_COPY mode
        if job.inputdata and ( job.inputdata._name in [ 'DQ2Dataset', 'EventPicking']) and (job.inputdata.type in ['DQ2_LOCAL', 'DQ2_COPY', 'FILE_STAGER', 'TAG_LOCAL', 'TAG_COPY' ]):
            if job.inputdata.dataset:
                from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import resolve_container
                datasets = resolve_container(job.inputdata.dataset)
                environment['DATASETNAME'] = datasets[0]
                try:
                    environment['DATASETLOCATION'] = ':'.join(job.inputdata.get_locations(overlap=False)[ datasets[0] ])
                except:
                    printout = 'Job submission failed ! Dataset %s could not be found in DQ2 ! Maybe retry ?' %(datasets[0])
                    raise ApplicationConfigurationError(printout )

        if job.inputdata and job.inputdata._name == 'ATLASTier3Dataset':
            environment['DATASETTYPE'] = 'TIER3'

        # Work around for glite WMS spaced environement variable problem
        inputbox.append(FileBuffer('athena_options',environment['ATHENA_OPTIONS']+'\n'))

        # Write trf parameters
        trf_params = ' '
        for key, value in job.application.trf_parameter.iteritems():
            if key == 'dbrelease':
                environment['DBDATASETNAME'] = value.split(':')[0]
                environment['DBFILENAME'] = value.split(':')[1]
            else:
                trf_params = trf_params + key + '=' + str(value) + ' '
        if trf_params!=' ' and job.application.atlas_exetype=='TRF':
           _append_file_buffer(inputbox,'trf_params', [ trf_params ] ) 

        # set RecExCommon options
        environment['RECEXTYPE'] = job.application.recex_type

        # event based splitting:  set max_events and skip_events
        if job._getRoot().splitter and hasattr(job._getRoot().splitter, 'numevtsperjob') and job._getRoot().splitter.numevtsperjob > 0 :
                environment['ATHENA_MAX_EVENTS'] = str(job.application.max_events)
                environment['ATHENA_SKIP_EVENTS'] = str(job.application.skip_events)

        # pick event 
        if job._getRoot().splitter and job._getRoot().inputdata and job._getRoot().inputdata._name == 'EventPicking' :
                #Replace blank space 
                environment['ATHENA_RUN_EVENTS'] = str(job.application.run_event).replace(' ', '')
                environment['ATHENA_FILTER_POLICY'] = str(job.inputdata.pick_filter_policy)
        
# append a property for monitoring to the jobconfig of subjobs
        lcg_config = LCGJobConfig(File(exe), inputbox, [], outputbox, environment, [], requirements)
        lcg_config.monitoring_svc = mc['Athena']
        return lcg_config