Example #1
0
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):
        """prepare the subjob specific configuration"""

        job = app._getParent()  # Returns job or subjob object
        logger.debug("AthenaLocalRTHandler prepare called, %s", job.id)

        input_files = []
        input_guids = []
        input_tag_files = []
        input_tag_guids = []
        input_esd_files = []
        input_esd_guids = []

        # If job has inputdata
        if job.inputdata:

            # DQ2Dataset, ATLASLocalDataset and ATLASCastorDataset job splitting is done in AthenaSplitterJob

            if job._getRoot().subjobs:
                if job.inputdata._name == 'ATLASLocalDataset' or job.inputdata._name == 'ATLASCastorDataset':
                    if not job.inputdata.names:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')
                    input_files = job.inputdata.names

                elif job.inputdata._name == 'ATLASDataset':
                    if not job.inputdata.lfn:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')
                    input_files = job.inputdata.lfn

                elif job.inputdata._name == 'ATLASTier3Dataset':
                    if not job.inputdata.names:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')
                    if job.inputdata.names:
                        input_files = job.inputdata.names
                        input_guids = input_files

                elif job.inputdata._name == 'DQ2Dataset':
                    if not job.inputdata.names:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')
                    input_guids = job.inputdata.guids
                    input_files = job.inputdata.names
                    if not job.inputdata.type in [
                            'DQ2_LOCAL', 'FILE_STAGER', 'LFC', 'TAG',
                            'TNT_LOCAL', 'TNT_DOWNLOAD'
                    ]:
                        job.inputdata.type = 'DQ2_LOCAL'

            else:
                if job.inputdata._name == 'ATLASCastorDataset':
                    input_files = ATLASCastorDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASLocalDataset':
                    input_files = ATLASLocalDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASDataset':
                    input_files = ATLASDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASTier3Dataset':
                    if job.inputdata.names:
                        input_files = job.inputdata.names
                        input_guids = input_files
                    elif job.inputdata.pfnListFile:
                        logger.info('Loading file names from %s' %
                                    job.inputdata.pfnListFile.name)
                        pfnListFile = open(job.inputdata.pfnListFile.name)
                        job.inputdata.names = [
                            line.strip() for line in pfnListFile
                        ]
                        pfnListFile.close()
                        input_files = job.inputdata.names
                        input_guids = input_files
                    else:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')

                elif job.inputdata._name == 'DQ2Dataset':
                    if not job.inputdata.type in [
                            'DQ2_LOCAL', 'FILE_STAGER', 'LFC', 'TAG',
                            'TNT_LOCAL', 'TNT_DOWNLOAD'
                    ]:
                        job.inputdata.type = 'DQ2_LOCAL'

                    contents = job.inputdata.get_contents()
                    input_files = [lfn for guid, lfn in contents]
                    input_guids = [guid for guid, lfn in contents]

                    if job.inputdata.tagdataset:
                        tag_contents = job.inputdata.get_tag_contents()
                        input_tag_files = [lfn for guid, lfn in tag_contents]
                        input_tag_guids = [guid for guid, lfn in tag_contents]
                    if job.inputdata.use_aodesd_backnav:
                        esd_contents = job.inputdata.get_contents(backnav=True)
                        input_esd_files = [lfn for guid, lfn in esd_contents]
                        input_esd_guids = [guid for guid, lfn in esd_contents]

                    job.inputdata.names = input_files
                    job.inputdata.guids = input_guids

        # Outputdataset
        output_location = ''
        if job.outputdata:

            if job.outputdata._name == 'DQ2OutputDataset':

                if job.outputdata.location:
                    if isDQ2SRMSite(job.outputdata.location):
                        output_location = job.outputdata.location
                    else:
                        logger.warning('Unknown output location %s.',
                                       job.outputdata.location)
                elif job._getRoot().subjobs and job._getRoot(
                ).outputdata.location:
                    if isDQ2SRMSite(job._getRoot().outputdata.location):
                        output_location = job._getRoot().outputdata.location
                    else:
                        logger.warning('Unknown output location %s.',
                                       job.getRoot().outputdata.location)

                logger.debug('Output: %s,%s', output_location,
                             job.outputdata.location)

            elif job.outputdata.location == '' and job.outputdata._name == 'DQ2OutputDataset':
                output_location = ''
            elif job.outputdata.location:
                output_location = expandfilename(job.outputdata.location)
            else:
                try:
                    output_location = config['LocalOutputLocation']
                    if job.outputdata:
                        job.outputdata.location = expandfilename(
                            output_location)
                except ConfigError:
                    logger.warning(
                        'No default output location specified in the configuration.'
                    )
        else:
            try:
                output_location = config['LocalOutputLocation']
            except ConfigError:
                logger.warning(
                    'No default output location specified in the configuration.'
                )

        if job._getRoot().subjobs:
            jid = "%d.%d" % (job._getRoot().id, job.id)
        else:
            jid = "%d" % job.id

        if output_location and job.outputdata and job.outputdata._name != 'DQ2OutputDataset':

            if job._getRoot().subjobs:
                if config['NoSubDirsAtAllForLocalOutput']:
                    output_location = output_location
                elif config['SingleDirForLocalOutput']:
                    output_location = os.path.join(output_location,
                                                   "%d" % (job._getRoot().id))
                elif config['IndividualSubjobDirsForLocalOutput']:
                    output_location = os.path.join(
                        output_location, "%d/%d" % (job._getRoot().id, job.id))
                else:
                    output_location = os.path.join(output_location, jid)

            if job.outputdata:
                # Remove trailing number if job is copied
                pat = re.compile(r'\/[\d\.]+\/[\d\.]+$')
                if re.findall(pat, output_location):
                    output_location = re.sub(pat, '', output_location)

                    if config['NoSubDirsAtAllForLocalOutput']:
                        output_location = output_location
                    elif config['SingleDirForLocalOutput']:
                        output_location = os.path.join(
                            output_location, "%d" % (job._getRoot().id))
                    elif config['IndividualSubjobDirsForLocalOutput']:
                        output_location = os.path.join(
                            output_location,
                            "%d/%d" % (job._getRoot().id, job.id))
                    else:
                        output_location = os.path.join(output_location, jid)

                job.outputdata.location = output_location

        if job.outputdata and job.outputdata._name == 'DQ2OutputDataset':

            # output dataset name from master_prepare
            output_datasetname = self.output_datasetname
            output_lfn = self.output_lfn

            output_jobid = jid
            # Set subjob datasetname
            job.outputdata.datasetname = output_datasetname
            # Set master job datasetname
            if job._getRoot().subjobs:
                job._getRoot().outputdata.datasetname = output_datasetname
            # Create output dataset -> moved to the worker node code !
            if not job.outputdata.dataset_exists(output_datasetname):
                if job._getRoot().subjobs:
                    if job.id == 0:
                        #job.outputdata.create_dataset(output_datasetname)
                        pass
                else:
                    #job.outputdata.create_dataset(output_datasetname)
                    pass
            else:
                if (job._getRoot().subjobs
                        and job.id == 0) or not job._getRoot().subjobs:
                    logger.warning(
                        "Dataset %s already exists - appending new files to this dataset",
                        output_datasetname)
                    output_location = job.outputdata.get_locations(
                        datasetname=output_datasetname, quiet=True)
                    logger.debug('Output3: %s,%s', output_location,
                                 job.outputdata.location)
                    if output_location:
                        output_location = output_location[0]
                        if job._getRoot().subjobs:
                            job._getRoot(
                            ).outputdata.location = output_location
                            job.outputdata.location = output_location
                        else:
                            job.outputdata.location = output_location

                    logger.debug('Output4: %s,%s', output_location,
                                 job.outputdata.location)

        inputbox = [
            File(os.path.join(os.path.dirname(__file__), 'athena-utility.sh'))
        ]

        if input_guids:
            inputbox += [
                FileBuffer('input_guids', '\n'.join(input_guids) + '\n')
            ]

        if input_files:
            inputbox += [
                FileBuffer('input_files', '\n'.join(input_files) + '\n')
            ]

        if input_tag_guids:
            inputbox += [
                FileBuffer('input_tag_guids',
                           '\n'.join(input_tag_guids) + '\n')
            ]

        if input_tag_files:
            inputbox += [
                FileBuffer('input_tag_files',
                           '\n'.join(input_tag_files) + '\n')
            ]

        if input_esd_guids:
            inputbox += [
                FileBuffer('input_esd_guids',
                           '\n'.join(input_esd_guids) + '\n')
            ]

        if input_esd_files:
            inputbox += [
                FileBuffer('input_esd_files',
                           '\n'.join(input_esd_files) + '\n')
            ]

        # check for output data given in prepare info
        if job.outputdata and job.application.atlas_exetype == "ATHENA":
            for of in job.application.atlas_run_config['output']['alloutputs']:
                if not of in job.outputdata.outputdata:
                    job.outputdata.outputdata.append(of)

        if job.outputdata and job.outputdata.outputdata:
            inputbox += [
                FileBuffer('output_files',
                           '\n'.join(job.outputdata.outputdata) + '\n')
            ]
        elif job.outputdata and not job.outputdata.outputdata:
            raise ApplicationConfigurationError(
                None,
                'j.outputdata.outputdata is empty - Please specify output filename(s).'
            )

        exe = os.path.join(os.path.dirname(__file__), 'run-athena-local.sh')
        outputbox = jobmasterconfig.outputbox
        environment = jobmasterconfig.env.copy()

        ## create and add sample files for FileStager
        if job.inputdata and job.inputdata._name == 'StagerDataset':

            if not job.inputdata.dataset:
                raise ApplicationConfigurationError(
                    None, 'dataset name not specified in job.inputdata')

            ## ship fs-copy.py with the job as it's going to be used as a copy command wrapper by FileStager
            inputbox += [
                File(os.path.join(os.path.dirname(__file__), 'fs-copy.py'))
            ]

            (jo_path, ic_path) = job.inputdata.make_FileStager_jobOptions(
                job=job, max_events=app.max_events)
            inputbox += [File(jo_path), File(ic_path)]

            ## re-make the environment['ATHENA_OPTIONS']
            athena_options = os.path.basename(File(jo_path).name)
            for option_file in app.option_file:
                athena_option = os.path.basename(option_file.name)
                athena_options += ' ' + athena_option
                if app.options:
                    athena_options = app.options + ' ' + athena_options

            environment['ATHENA_OPTIONS'] = athena_options
            environment['DATASETTYPE'] = 'FILE_STAGER'

            ## ask to send back the FileStager.out/err generated by fs-copy.py
            outputbox += ['FileStager.out', 'FileStager.err']

        # If ArgSplitter is used
        try:
            if job.application.args:
                environment['ATHENA_OPTIONS'] = environment[
                    'ATHENA_OPTIONS'] + ' ' + ' '.join(job.application.args)
                if job.application.options:
                    job.application.options = job.application.options + ' ' + job.application.args
                else:
                    job.application.options = job.application.args
        except AttributeError:
            pass

        if job.outputdata and job.outputdata._name=='DQ2OutputDataset' and output_location == [ ]:
            raise ApplicationConfigurationError(
                None,
                'j.outputdata.outputdata is empty - Please specify output filename(s).'
            )

        # set EOS env setting
        environment['EOS_COMMAND_PATH'] = config['PathToEOSBinary']

        # flag for single output dir
        if (config['SingleDirForLocalOutput'] or
                config['NoSubDirsAtAllForLocalOutput']) and job._getParent():
            environment['SINGLE_OUTPUT_DIR'] = jid

            # change the filename
            newoutput = []
            for outf in job.outputdata.outputdata:
                newfile, newfileExt = os.path.splitext(outf)
                jid = "%d.%d" % (job._getParent().id, job.id)
                newoutput.append("%s.%s%s" % (newfile, jid, newfileExt))

            job.outputdata.outputdata = newoutput[:]

        environment['OUTPUT_LOCATION'] = output_location
        if job.outputdata and job.outputdata._name == 'DQ2OutputDataset':
            environment['OUTPUT_DATASETNAME'] = output_datasetname
            environment['OUTPUT_LFN'] = output_lfn
            environment['OUTPUT_JOBID'] = output_jobid
            environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']
            environment['DQ2_OUTPUTFILE_NAMELENGTH'] = str(
                configDQ2['OUTPUTFILE_NAMELENGTH'])
            if job.outputdata.use_shortfilename:
                environment['GANGA_SHORTFILENAME'] = '1'
            else:
                environment['GANGA_SHORTFILENAME'] = ''
            try:
                environment['GANGA_GLITE_UI'] = configLCG['GLITE_SETUP']
            except:
                pass
            environment['DQ2_OUTPUT_SPACE_TOKENS'] = ':'.join(
                configDQ2['DQ2_OUTPUT_SPACE_TOKENS'])
            environment['DQ2_BACKUP_OUTPUT_LOCATIONS'] = ':'.join(
                configDQ2['DQ2_BACKUP_OUTPUT_LOCATIONS'])

        # CN: extra condition for TNTSplitter
        if job._getRoot().splitter and job._getRoot(
        ).splitter._name == 'TNTJobSplitter':
            # set up dq2 environment
            datasetname = job.inputdata.dataset
            environment['DATASETNAME'] = ':'.join(datasetname)
            environment['DATASETLOCATION'] = ':'.join(
                job.inputdata.get_locations())
            environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']
            #environment['DATASETTYPE']=job.inputdata.type
            # At present, DQ2 download is the only thing that works
            environment['DATASETTYPE'] = "DQ2_DOWNLOAD"
            if job.inputdata.accessprotocol:
                environment[
                    'DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol
            if job.inputsandbox: inputbox += job.inputsandbox

        # Fix DATASETNAME env variable for DQ2_COPY mode
        if job.inputdata and job.inputdata._name in [
                'DQ2Dataset'
        ] and job.inputdata.type in ['DQ2_LOCAL', 'DQ2_COPY', 'FILE_STAGER']:
            if job.inputdata.dataset:
                from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import resolve_container
                datasets = resolve_container(job.inputdata.dataset)
                environment['DATASETNAME'] = datasets[0]
                try:
                    environment['DATASETLOCATION'] = ':'.join(
                        job.inputdata.get_locations(
                            overlap=False)[datasets[0]])
                except:
                    printout = 'Job submission failed ! Dataset %s could not be found in DQ2 ! Maybe retry ?' % (
                        datasets[0])
                    raise ApplicationConfigurationError(None, printout)

        if job.inputdata and job.inputdata._name == 'ATLASTier3Dataset':
            environment['DATASETTYPE'] = 'TIER3'

        # USE_POOLFILECATALOG_FAILOVER of Local/ATLASLocalDataset
        if job.inputdata and job.inputdata._name == 'ATLASLocalDataset':
            if job.inputdata.use_poolfilecatalog_failover:
                environment['USE_POOLFILECATALOG_FAILOVER'] = '1'

        # CREATE_POOLFILECATALOG of Local/ATLASLocalDataset
        environment['CREATE_POOLFILECATALOG'] = '1'
        if job.inputdata and job.inputdata._name == 'ATLASLocalDataset':
            if not job.inputdata.create_poolfilecatalog:
                environment['CREATE_POOLFILECATALOG'] = '0'

        # Write trf parameters
        trf_params = ' '
        for key, value in job.application.trf_parameter.iteritems():
            if key == 'dbrelease':
                environment['DBDATASETNAME'] = value.split(':')[0]
                environment['DBFILENAME'] = value.split(':')[1]
            else:
                trf_params = trf_params + key + '=' + str(value) + ' '
        if trf_params != ' ' and job.application.atlas_exetype == 'TRF':
            _append_file_buffer(inputbox, 'trf_params', [trf_params])
            if not 'db_dq2localid.py' in [
                    os.path.basename(file.name) for file in inputbox
            ]:
                _append_files(inputbox, 'db_dq2localid.py')

        # set RecExCommon options
        environment['RECEXTYPE'] = job.application.recex_type

        # Athena run dir
        if job.application.atlas_exetype == "ATHENA" and job.application.atlas_run_dir != "":
            environment['ATLAS_RUN_DIR'] = job.application.atlas_run_dir

        # Set DQ2_LOCAL_SITE_ID
        if hasattr(job.backend, 'extraopts'):
            if job.backend.extraopts.find('site=hh') > 0:
                environment['DQ2_LOCAL_SITE_ID'] = 'DESY-HH_SCRATCHDISK'
                environment[
                    'GANGA_LCG_CE'] = 'grid-ce5.desy.de:2119'  # hack for FILE_STAGER at NAF
            elif job.backend.extraopts.find('site=zn') > 0:
                environment['DQ2_LOCAL_SITE_ID'] = 'DESY-ZN_SCRATCHDISK'
                environment[
                    'GANGA_LCG_CE'] = 'lcg-ce0.ifh.de:2119'  # hack for FILE_STAGER at NAF
            else:
                environment['DQ2_LOCAL_SITE_ID'] = configDQ2[
                    'DQ2_LOCAL_SITE_ID']
        else:
            environment['DQ2_LOCAL_SITE_ID'] = configDQ2['DQ2_LOCAL_SITE_ID']

        return StandardJobConfig(File(exe), inputbox, [], outputbox,
                                 environment)
Example #2
0
    def prepare(self,app,appsubconfig,appmasterconfig,jobmasterconfig):
        """prepare the subjob specific configuration"""

        job = app._getParent() # Returns job or subjob object
        logger.debug("AthenaLocalRTHandler prepare called, %s", job.id )

        input_files = []
        input_guids = []
        input_tag_files = []
        input_tag_guids = []
        input_esd_files = []
        input_esd_guids = []

        # If job has inputdata
        if job.inputdata:

            # DQ2Dataset, ATLASLocalDataset and ATLASCastorDataset job splitting is done in AthenaSplitterJob

            if job._getRoot().subjobs:
                if job.inputdata._name == 'ATLASLocalDataset' or job.inputdata._name == 'ATLASCastorDataset':
                    if not job.inputdata.names: raise ApplicationConfigurationError(None,'No inputdata has been specified.')
                    input_files = job.inputdata.names

                elif job.inputdata._name == 'ATLASDataset':
                    if not job.inputdata.lfn: raise ApplicationConfigurationError(None,'No inputdata has been specified.') 
                    input_files = job.inputdata.lfn

                elif job.inputdata._name == 'ATLASTier3Dataset':
                    if not job.inputdata.names:
                        raise ApplicationConfigurationError(None,'No inputdata has been specified.') 
                    if job.inputdata.names:
                        input_files = job.inputdata.names
                        input_guids = input_files

                elif job.inputdata._name == 'DQ2Dataset':
                    if not job.inputdata.names: raise ApplicationConfigurationError(None,'No inputdata has been specified.')
                    input_guids = job.inputdata.guids
                    input_files = job.inputdata.names
                    if not job.inputdata.type in ['DQ2_LOCAL', 'FILE_STAGER', 'LFC', 'TAG', 'TNT_LOCAL', 'TNT_DOWNLOAD' ]:
                        job.inputdata.type ='DQ2_LOCAL'
       
            else:
                if job.inputdata._name == 'ATLASCastorDataset':
                    input_files = ATLASCastorDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASLocalDataset':
                    input_files = ATLASLocalDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASDataset':
                    input_files = ATLASDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASTier3Dataset':
                    if job.inputdata.names:
                        input_files = job.inputdata.names
                        input_guids = input_files
                    elif job.inputdata.pfnListFile:
                        logger.info('Loading file names from %s'%job.inputdata.pfnListFile.name)
                        pfnListFile = open(job.inputdata.pfnListFile.name)
                        job.inputdata.names = [ line.strip() for line in pfnListFile]
                        pfnListFile.close()
                        input_files = job.inputdata.names
                        input_guids = input_files
                    else:
                        raise ApplicationConfigurationError(None,'No inputdata has been specified.') 

                elif job.inputdata._name == 'DQ2Dataset':
                    if not job.inputdata.type in ['DQ2_LOCAL', 'FILE_STAGER', 'LFC', 'TAG', 'TNT_LOCAL', 'TNT_DOWNLOAD' ]:
                        job.inputdata.type ='DQ2_LOCAL'

                    contents = job.inputdata.get_contents()
                    input_files = [ lfn  for guid, lfn in contents ]
                    input_guids = [ guid for guid, lfn in contents ]

                    if job.inputdata.tagdataset:
                        tag_contents = job.inputdata.get_tag_contents()
                        input_tag_files = [ lfn  for guid, lfn in tag_contents ]
                        input_tag_guids = [ guid for guid, lfn in tag_contents ] 
                    if job.inputdata.use_aodesd_backnav:
                        esd_contents = job.inputdata.get_contents(backnav=True)
                        input_esd_files = [ lfn  for guid, lfn in esd_contents ]
                        input_esd_guids = [ guid for guid, lfn in esd_contents ]                        

                    job.inputdata.names = input_files          
                    job.inputdata.guids = input_guids          
 
        # Outputdataset
        output_location=''
        if job.outputdata:
            
            if job.outputdata._name=='DQ2OutputDataset':

                if job.outputdata.location:
                    if isDQ2SRMSite(job.outputdata.location):
                        output_location = job.outputdata.location
                    else:
                        logger.warning('Unknown output location %s.',job.outputdata.location)
                elif job._getRoot().subjobs and job._getRoot().outputdata.location:
                    if isDQ2SRMSite(job._getRoot().outputdata.location):
                        output_location = job._getRoot().outputdata.location
                    else:
                        logger.warning('Unknown output location %s.',job.getRoot().outputdata.location)
                        
                logger.debug('Output: %s,%s',output_location, job.outputdata.location)

            elif job.outputdata.location=='' and job.outputdata._name=='DQ2OutputDataset':
                output_location = ''
            elif job.outputdata.location:
                output_location = expandfilename(job.outputdata.location)
            else:
                try:
                    output_location=config['LocalOutputLocation']
                    if job.outputdata:
                        job.outputdata.location = expandfilename(output_location)
                except ConfigError:
                    logger.warning('No default output location specified in the configuration.')
        else:
            try:
                output_location=config['LocalOutputLocation']
            except ConfigError:
                logger.warning('No default output location specified in the configuration.')

        if job._getRoot().subjobs:
            jid = "%d.%d" % (job._getRoot().id, job.id)
        else:
            jid = "%d" % job.id

        if output_location and job.outputdata and job.outputdata._name!='DQ2OutputDataset':

            if job._getRoot().subjobs:
                if config['NoSubDirsAtAllForLocalOutput']:
                    output_location = output_location
                elif config['SingleDirForLocalOutput']:
                    output_location = os.path.join(output_location, "%d" % (job._getRoot().id))
                elif config['IndividualSubjobDirsForLocalOutput']:
                    output_location = os.path.join(output_location, "%d/%d" % (job._getRoot().id, job.id))
                else:
                    output_location = os.path.join(output_location, jid)
                
            if job.outputdata:
                # Remove trailing number if job is copied
                pat = re.compile(r'\/[\d\.]+\/[\d\.]+$')
                if re.findall(pat,output_location):
                    output_location = re.sub(pat, '', output_location)

                    if config['NoSubDirsAtAllForLocalOutput']:
                        output_location = output_location
                    elif config['SingleDirForLocalOutput']:
                        output_location = os.path.join(output_location, "%d" % (job._getRoot().id))
                    elif config['IndividualSubjobDirsForLocalOutput']:
                        output_location = os.path.join(output_location, "%d/%d" % (job._getRoot().id, job.id))
                    else:
                        output_location = os.path.join(output_location, jid)
                    
                job.outputdata.location = output_location

        if job.outputdata and job.outputdata._name=='DQ2OutputDataset':

            # output dataset name from master_prepare
            output_datasetname = self.output_datasetname
            output_lfn = self.output_lfn

            output_jobid = jid
            # Set subjob datasetname
            job.outputdata.datasetname=output_datasetname
            # Set master job datasetname
            if job._getRoot().subjobs:
                job._getRoot().outputdata.datasetname=output_datasetname
            # Create output dataset -> moved to the worker node code !
            if not job.outputdata.dataset_exists(output_datasetname):
                if job._getRoot().subjobs:
                    if job.id==0:
                        #job.outputdata.create_dataset(output_datasetname)
                        pass
                else:
                    #job.outputdata.create_dataset(output_datasetname)
                    pass
            else:
                if (job._getRoot().subjobs and job.id==0) or not job._getRoot().subjobs:
                    logger.warning("Dataset %s already exists - appending new files to this dataset", output_datasetname)
                    output_location = job.outputdata.get_locations(datasetname=output_datasetname, quiet=True)
                    logger.debug('Output3: %s,%s',output_location, job.outputdata.location)
                    if output_location:
                        output_location = output_location[0] 
                        if job._getRoot().subjobs:
                            job._getRoot().outputdata.location=output_location
                            job.outputdata.location=output_location
                        else:
                            job.outputdata.location=output_location
                            
                    logger.debug('Output4: %s,%s',output_location, job.outputdata.location)

        inputbox = [File(os.path.join(os.path.dirname(__file__),'athena-utility.sh'))]
                
        if input_guids:
            inputbox += [ FileBuffer('input_guids','\n'.join(input_guids)+'\n') ]

        if input_files: 
            inputbox += [ FileBuffer('input_files','\n'.join(input_files)+'\n') ]

        if input_tag_guids:
            inputbox += [ FileBuffer('input_tag_guids','\n'.join(input_tag_guids)+'\n') ]

        if input_tag_files: 
            inputbox += [ FileBuffer('input_tag_files','\n'.join(input_tag_files)+'\n') ]

        if input_esd_guids:
            inputbox += [ FileBuffer('input_esd_guids','\n'.join(input_esd_guids)+'\n') ]

        if input_esd_files: 
            inputbox += [ FileBuffer('input_esd_files','\n'.join(input_esd_files)+'\n') ]

        # check for output data given in prepare info
        if job.outputdata and job.application.atlas_exetype == "ATHENA":
            for of in job.application.atlas_run_config['output']['alloutputs']:
                if not of in job.outputdata.outputdata:
                    job.outputdata.outputdata.append(of)
            
        if job.outputdata and job.outputdata.outputdata:
            inputbox += [ FileBuffer('output_files','\n'.join(job.outputdata.outputdata)+'\n') ]
        elif job.outputdata and not job.outputdata.outputdata:
            raise ApplicationConfigurationError(None,'j.outputdata.outputdata is empty - Please specify output filename(s).')
   
        exe = os.path.join(os.path.dirname(__file__),'run-athena-local.sh')
        outputbox = jobmasterconfig.outputbox
        environment = jobmasterconfig.env.copy()

        ## create and add sample files for FileStager
        if job.inputdata and job.inputdata._name == 'StagerDataset':

            if not job.inputdata.dataset:
                raise ApplicationConfigurationError(None,'dataset name not specified in job.inputdata')

            ## ship fs-copy.py with the job as it's going to be used as a copy command wrapper by FileStager
            inputbox += [ File( os.path.join( os.path.dirname(__file__), 'fs-copy.py') ) ]

            (jo_path, ic_path) = job.inputdata.make_FileStager_jobOptions(job=job, max_events=app.max_events)
            inputbox += [ File(jo_path), File(ic_path) ]

            ## re-make the environment['ATHENA_OPTIONS']
            athena_options = os.path.basename( File(jo_path).name )
            for option_file in app.option_file:
                athena_option = os.path.basename(option_file.name)
                athena_options += ' ' + athena_option
                if app.options:
                    athena_options =  app.options + ' ' + athena_options

            environment['ATHENA_OPTIONS'] = athena_options
            environment['DATASETTYPE']    = 'FILE_STAGER'

            ## ask to send back the FileStager.out/err generated by fs-copy.py
            outputbox += ['FileStager.out', 'FileStager.err']

        # If ArgSplitter is used
        try:
            if job.application.args:
                environment['ATHENA_OPTIONS'] = environment['ATHENA_OPTIONS'] + ' ' + ' '.join(job.application.args)
                if job.application.options:
                    job.application.options = job.application.options + ' ' + job.application.args
                else:
                    job.application.options=job.application.args
        except AttributeError:
            pass

        if job.outputdata and job.outputdata._name=='DQ2OutputDataset' and output_location == [ ]:
            raise ApplicationConfigurationError(None,'j.outputdata.outputdata is empty - Please specify output filename(s).')

        # set EOS env setting
        environment['EOS_COMMAND_PATH'] = config['PathToEOSBinary']

        # flag for single output dir
        if (config['SingleDirForLocalOutput'] or config['NoSubDirsAtAllForLocalOutput']) and job._getParent():
            environment['SINGLE_OUTPUT_DIR'] = jid

            # change the filename
            newoutput = []
            for outf in job.outputdata.outputdata:
                newfile, newfileExt = os.path.splitext(outf)
                jid = "%d.%d" % (job._getParent().id, job.id)
                newoutput.append("%s.%s%s" % (newfile, jid, newfileExt) )               

            job.outputdata.outputdata = newoutput[:]
            
        environment['OUTPUT_LOCATION'] = output_location
        if job.outputdata and job.outputdata._name == 'DQ2OutputDataset':
            environment['OUTPUT_DATASETNAME'] = output_datasetname
            environment['OUTPUT_LFN'] = output_lfn
            environment['OUTPUT_JOBID'] = output_jobid
            environment['DQ2_URL_SERVER']=configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL']=configDQ2['DQ2_URL_SERVER_SSL']
            environment['DQ2_OUTPUTFILE_NAMELENGTH'] = str(configDQ2['OUTPUTFILE_NAMELENGTH'])
            if job.outputdata.use_shortfilename:
                environment['GANGA_SHORTFILENAME'] = '1'
            else:
                environment['GANGA_SHORTFILENAME'] = ''
            try:
                environment['GANGA_GLITE_UI']=configLCG['GLITE_SETUP']
            except:
                pass
            environment['DQ2_OUTPUT_SPACE_TOKENS']= ':'.join(configDQ2['DQ2_OUTPUT_SPACE_TOKENS'])
            environment['DQ2_BACKUP_OUTPUT_LOCATIONS']= ':'.join(configDQ2['DQ2_BACKUP_OUTPUT_LOCATIONS'])
            
        # CN: extra condition for TNTSplitter
        if job._getRoot().splitter and job._getRoot().splitter._name == 'TNTJobSplitter':
            # set up dq2 environment
            datasetname = job.inputdata.dataset
            environment['DATASETNAME']= ':'.join(datasetname) 
            environment['DATASETLOCATION'] = ':'.join(job.inputdata.get_locations())
            environment['DQ2_URL_SERVER']=configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL']=configDQ2['DQ2_URL_SERVER_SSL']
            #environment['DATASETTYPE']=job.inputdata.type
            # At present, DQ2 download is the only thing that works
            environment['DATASETTYPE']="DQ2_DOWNLOAD"
            if job.inputdata.accessprotocol:
                 environment['DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol
            if job.inputsandbox: inputbox += job.inputsandbox   

        # Fix DATASETNAME env variable for DQ2_COPY mode
        if job.inputdata and job.inputdata._name in [ 'DQ2Dataset' ] and job.inputdata.type in [ 'DQ2_LOCAL', 'DQ2_COPY', 'FILE_STAGER' ]:
            if job.inputdata.dataset:
                from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import resolve_container
                datasets = resolve_container(job.inputdata.dataset) 
                environment['DATASETNAME'] = datasets[0]
                try:
                    environment['DATASETLOCATION'] = ':'.join(job.inputdata.get_locations(overlap=False)[ datasets[0] ])
                except:
                    printout = 'Job submission failed ! Dataset %s could not be found in DQ2 ! Maybe retry ?' %(datasets[0])
                    raise ApplicationConfigurationError(None,printout )


        if job.inputdata and job.inputdata._name == 'ATLASTier3Dataset':
            environment['DATASETTYPE'] = 'TIER3'


            
        # USE_POOLFILECATALOG_FAILOVER of Local/ATLASLocalDataset
        if job.inputdata and job.inputdata._name == 'ATLASLocalDataset':
            if job.inputdata.use_poolfilecatalog_failover:
                environment['USE_POOLFILECATALOG_FAILOVER'] = '1'

        # CREATE_POOLFILECATALOG of Local/ATLASLocalDataset
        environment['CREATE_POOLFILECATALOG'] = '1'
        if job.inputdata and job.inputdata._name == 'ATLASLocalDataset':
            if not job.inputdata.create_poolfilecatalog:
                environment['CREATE_POOLFILECATALOG'] = '0'
                
        # Write trf parameters
        trf_params = ' '
        for key, value in job.application.trf_parameter.iteritems():
            if key == 'dbrelease':
                environment['DBDATASETNAME'] = value.split(':')[0]
                environment['DBFILENAME'] = value.split(':')[1]
            else:
                trf_params = trf_params + key + '=' + str(value) + ' '
        if trf_params!=' ' and job.application.atlas_exetype=='TRF':
           _append_file_buffer(inputbox,'trf_params', [ trf_params ]) 
           if not 'db_dq2localid.py' in [ os.path.basename(file.name) for file in inputbox ]:
               _append_files(inputbox, 'db_dq2localid.py')

        # set RecExCommon options
        environment['RECEXTYPE'] = job.application.recex_type

        # Athena run dir
        if job.application.atlas_exetype == "ATHENA" and job.application.atlas_run_dir != "":
            environment['ATLAS_RUN_DIR'] = job.application.atlas_run_dir
            
        # Set DQ2_LOCAL_SITE_ID
        if hasattr(job.backend, 'extraopts'):
            if job.backend.extraopts.find('site=hh')>0:
                environment['DQ2_LOCAL_SITE_ID'] = 'DESY-HH_SCRATCHDISK'
                environment['GANGA_LCG_CE'] = 'grid-ce5.desy.de:2119' # hack for FILE_STAGER at NAF
            elif job.backend.extraopts.find('site=zn')>0:
                environment['DQ2_LOCAL_SITE_ID'] = 'DESY-ZN_SCRATCHDISK'
                environment['GANGA_LCG_CE'] = 'lcg-ce0.ifh.de:2119' # hack for FILE_STAGER at NAF
            else:
                environment['DQ2_LOCAL_SITE_ID'] = configDQ2['DQ2_LOCAL_SITE_ID']
        else:
            environment['DQ2_LOCAL_SITE_ID'] = configDQ2['DQ2_LOCAL_SITE_ID']

        return StandardJobConfig(File(exe), inputbox, [], outputbox, environment)
Example #3
0
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):
        """prepare the subjob specific configuration"""

        job = app._getParent()  # Returns job or subjob object
        logger.debug("AthenaLCGRTHandler prepare called, %s", job.id)

        #       prepare inputdata

        input_files = []
        input_guids = []
        input_tag_files = []
        input_tag_guids = []
        input_esd_files = []
        input_esd_guids = []
        add_files = []

        if job.inputdata:

            # DQ2Dataset, ATLASLocalDataset and ATLASCastorDataset job splitting is done in AthenaSplitterJob

            if job._getRoot().subjobs:
                if job.inputdata._name == 'ATLASLocalDataset' or job.inputdata._name == 'ATLASCastorDataset':
                    if not job.inputdata.names:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')
                    input_files = job.inputdata.names

                elif job.inputdata._name == 'ATLASDataset':
                    if not job.inputdata.lfn:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')
                    input_files = job.inputdata.lfn

                elif job.inputdata._name == 'ATLASTier3Dataset':
                    if not job.inputdata.names:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')
                    if job.inputdata.names:
                        input_files = job.inputdata.names
                        input_guids = input_files

                elif job.inputdata._name in [
                        'DQ2Dataset', 'AMIDataset', 'EventPicking'
                ]:
                    if not job.inputdata.names:
                        raise ApplicationConfigurationError(
                            None,
                            'No inputdata has been specified. Failure in job %s.%s. Dataset %s'
                            %
                            (job._getRoot().id, job.id, job.inputdata.dataset))
                    input_guids = job.inputdata.guids
                    input_files = job.inputdata.names

                    if job.inputdata.tag_info:

                        # check for conflicts with TAG_LOCAL or TAG_COPY
                        if job.inputdata.type in ['TAG_LOCAL', 'TAG_COPY']:
                            raise ApplicationConfigurationError(
                                None,
                                "Cannot provide both tag_info and run as '%s'. Please use one or the other!"
                                % job.inputdata.type)

                        # check if FILE_STAGER is used
                        if job.inputdata.type == 'FILE_STAGER':
                            logger.warning(
                                "TAG jobs currently can't use the FILE_STAGER. Switching to DQ2_COPY instead."
                            )
                            job.inputdata.type = 'DQ2_COPY'

                        # add additional file info for tags
                        for tag_file in job.inputdata.tag_info:
                            for ref in job.inputdata.tag_info[tag_file][
                                    'refs']:
                                add_files.append(ref[1] + ':' + ref[0] + ':' +
                                                 ref[2])

                    if not job.inputdata.type in [
                            'DQ2_LOCAL', 'LFC', 'TAG', 'TNT_LOCAL',
                            'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER',
                            'TAG_LOCAL', 'TAG_COPY'
                    ]:
                        job.inputdata.type = 'DQ2_LOCAL'
                    if not job.inputdata.datatype in [
                            'DATA', 'MC', 'MuonCalibStream'
                    ]:
                        job.inputdata.datatype = 'MC'

            else:
                if job.inputdata._name == 'ATLASCastorDataset':
                    input_files = ATLASCastorDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASLocalDataset':
                    input_files = ATLASLocalDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASDataset':
                    input_files = ATLASDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASTier3Dataset':
                    if job.inputdata.names:
                        input_files = job.inputdata.names
                        input_guids = input_files
                    elif job.inputdata.pfnListFile:
                        logger.info('Loading file names from %s' %
                                    job.inputdata.pfnListFile.name)
                        pfnListFile = open(job.inputdata.pfnListFile.name)
                        job.inputdata.names = [
                            line.strip() for line in pfnListFile
                        ]
                        pfnListFile.close()
                        input_files = job.inputdata.names
                        input_guids = input_files
                    else:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')

                elif job.inputdata._name in [
                        'DQ2Dataset', 'AMIDataset', 'EventPicking'
                ]:
                    if not job.inputdata.type in [
                            'DQ2_LOCAL', 'LFC', 'TAG', 'TNT_LOCAL',
                            'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER',
                            'TAG_LOCAL', 'TAG_COPY'
                    ]:
                        job.inputdata.type = 'DQ2_LOCAL'
                    if not job.inputdata.datatype in [
                            'DATA', 'MC', 'MuonCalibStream'
                    ]:
                        job.inputdata.datatype = 'MC'

                    input_guids, input_files = _splitlist(
                        job.inputdata.get_contents())

                    if job.inputdata.tagdataset:
                        input_tag_guids, input_tag_files = _splitlist(
                            job.inputdata.get_tag_contents())
                    if job.inputdata.use_aodesd_backnav:
                        input_esd_guids, input_esd_files = _splitlist(
                            job.inputdata.get_contents(backnav=True))

                    job.inputdata.names = input_files
                    job.inputdata.guids = input_guids

#       prepare outputdata

        output_location = ''
        if job.outputdata:

            if job.outputdata._name == 'DQ2OutputDataset':

                if job.outputdata.location:
                    if type(job.outputdata.location) == str and isDQ2SRMSite(
                            job.outputdata.location):
                        output_location = job.outputdata.location
                    else:
                        logger.warning('Unknown output location %s.',
                                       job.outputdata.location)

                    #if job.backend.requirements._name == 'AtlasLCGRequirements':
                    #    if job.backend.requirements.cloud:
                    #        if whichCloud(output_location) != job.backend.requirements.cloud:
                    #            printout = 'Job submission failed ! j.outputdata.location=%s is not in the same cloud as j.backend.requirements.cloud=%s' %(job.outputdata.location, job.backend.requirements.cloud )
                    #            raise ApplicationConfigurationError(None, printout)
                    #    if job.backend.requirements.sites:
                    #        if whichCloud(output_location) != whichCloud(job.backend.requirements.sites[0]):
                    #            printout = 'Job submission failed ! j.outputdata.location=%s is not in the same cloud as j.backend.requirements.sites=%s'%(job.outputdata.location, job.backend.requirements.sites)
                    #            raise ApplicationConfigurationError(None,printout )

                elif job._getRoot().subjobs and job._getRoot(
                ).outputdata.location:
                    if isDQ2SRMSite(job._getRoot().outputdata.location):
                        output_location = job._getRoot().outputdata.location
                    else:
                        logger.warning('Unknown output location %s.',
                                       job.getRoot().outputdata.location)

                logger.debug('Output: %s,%s', output_location,
                             job.outputdata.location)
            else:
                if job.outputdata.location:
                    output_location = job.outputdata.location
                else:
                    try:
                        output_location = config['LCGOutputLocation']
                    except ConfigError:
                        logger.warning(
                            'No default output location specified in the configuration.'
                        )
            if job.outputdata.location:
                job.outputdata.location = output_location
                logger.debug('Output: %s,%s', output_location,
                             job.outputdata.location)

        if job._getRoot().subjobs:
            jid = "%d.%d" % (job._getRoot().id, job.id)
        else:
            jid = "%d" % job.id

        if output_location and job.outputdata and job.outputdata._name != 'DQ2OutputDataset':
            output_location = os.path.join(output_location, jid)
            if job.outputdata:
                # Remove trailing number if job is copied

                pat = re.compile(r'\/[\d\.]+\/[\d\.]+$')
                if re.findall(pat, output_location):
                    output_location = re.sub(pat, '', output_location)
                    output_location = os.path.join(output_location, jid)

                job.outputdata.location = output_location

        if job.outputdata and job.outputdata._name == 'DQ2OutputDataset':

            # output dataset name from master_prepare
            output_datasetname = self.output_datasetname
            output_lfn = self.output_lfn

            output_jobid = jid
            # Set subjob datasetname
            job.outputdata.datasetname = output_datasetname
            # Set master job datasetname
            if job._getRoot().subjobs:
                job._getRoot().outputdata.datasetname = output_datasetname
            # Create output dataset -> moved to the worker node code !
            if not job.outputdata.dataset_exists(output_datasetname):
                if job._getRoot().subjobs:
                    if job.id == 0:
                        #job.outputdata.create_dataset(output_datasetname)
                        pass
                else:
                    #job.outputdata.create_dataset(output_datasetname)
                    pass
                if output_location and configDQ2['USE_STAGEOUT_SUBSCRIPTION']:
                    job.outputdata.create_subscription(output_datasetname,
                                                       output_location)

            else:
                if (job._getRoot().subjobs
                        and job.id == 0) or not job._getRoot().subjobs:
                    logger.warning(
                        "Dataset %s already exists - appending new files to this dataset",
                        output_datasetname)
                    output_location = job.outputdata.get_locations(
                        datasetname=output_datasetname, quiet=True)
                    logger.debug('Output3: %s,%s', output_location,
                                 job.outputdata.location)
                    if output_location:
                        output_location = output_location[0]
                        if job._getRoot().subjobs:
                            job._getRoot(
                            ).outputdata.location = output_location
                            job.outputdata.location = output_location
                        else:
                            job.outputdata.location = output_location

                    logger.debug('Output4: %s,%s', output_location,
                                 job.outputdata.location)

        if getConfig('LCG')['JobLogHandler'] == 'DQ2' and (
                not job.outputdata or
            (job.outputdata and job.outputdata._name != 'DQ2OutputDataset')):
            raise ApplicationConfigurationError(
                None,
                'Staging of log files in DQ2 requested, but DQ2 output dataset not specified.'
            )

#       prepare inputsandbox

        inputbox = [File(os.path.join(__directory__, 'athena-utility.sh'))]
        if input_guids:
            _append_file_buffer(inputbox, 'input_guids', input_guids)
        if input_files:
            _append_file_buffer(inputbox, 'input_files', input_files)
        if add_files: _append_file_buffer(inputbox, 'add_files', add_files)
        if input_tag_guids:
            _append_file_buffer(inputbox, 'input_tag_guids', input_tag_guids)
        if input_tag_files:
            _append_file_buffer(inputbox, 'input_tag_files', input_tag_files)
        if input_esd_guids:
            _append_file_buffer(inputbox, 'input_esd_guids', input_esd_guids)
        if input_esd_files:
            _append_file_buffer(inputbox, 'input_esd_files', input_esd_files)

        if job.inputdata and job.inputdata._name == 'DQ2Dataset':
            for tag_file in job.inputdata.tag_info:
                if job.inputdata.tag_info[tag_file]['path'] != '':
                    inputbox.append(
                        File(
                            os.path.join(
                                job.inputdata.tag_info[tag_file]['path'],
                                tag_file)))

        # check for output data given in prepare info
        if job.outputdata and job.application.atlas_exetype == "ATHENA":
            for of in job.application.atlas_run_config['output']['alloutputs']:
                if not of in job.outputdata.outputdata:
                    job.outputdata.outputdata.append(of)

        if job.outputdata and job.outputdata.outputdata:
            _append_file_buffer(inputbox, 'output_files',
                                job.outputdata.outputdata)
        elif job.outputdata and not job.outputdata.outputdata:
            raise ApplicationConfigurationError(
                None,
                'j.outputdata.outputdata is empty - Please specify output filename(s).'
            )

        exe = os.path.join(__directory__, 'run-athena-lcg.sh')
        outputbox = jobmasterconfig.outputbox
        requirements = jobmasterconfig.requirements.__copy__()
        environment = jobmasterconfig.env.copy()
        # If ArgSplitter is used
        try:
            if job.application.args:
                environment['ATHENA_OPTIONS'] = environment[
                    'ATHENA_OPTIONS'] + ' ' + ' '.join(job.application.args)
                if job.application.options:
                    job.application.options = job.application.options + ' ' + job.application.args
                else:
                    job.application.options = job.application.args
        except AttributeError:
            pass

        if output_location and output_location.find(
                '/castor/cern.ch/grid/atlas/t0') >= 0:
            raise ApplicationConfigurationError(
                None,
                'You are try to save the output to TIER0DISK - please use another area !'
            )
        if not output_location:
            output_location = ''
        if configDQ2['USE_STAGEOUT_SUBSCRIPTION']:
            output_location = ''
        environment['OUTPUT_LOCATION'] = output_location
        environment['ATLASOutputDatasetLFC'] = config['ATLASOutputDatasetLFC']
        if job.outputdata and job.outputdata._name == 'DQ2OutputDataset':
            environment['OUTPUT_DATASETNAME'] = output_datasetname
            environment['OUTPUT_LFN'] = output_lfn
            environment['OUTPUT_JOBID'] = output_jobid
            environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']
            environment['DQ2_OUTPUTFILE_NAMELENGTH'] = configDQ2[
                'OUTPUTFILE_NAMELENGTH']
            if job.outputdata.use_shortfilename:
                environment['GANGA_SHORTFILENAME'] = '1'
            else:
                environment['GANGA_SHORTFILENAME'] = ''

            environment['DQ2_OUTPUT_SPACE_TOKENS'] = ':'.join(
                configDQ2['DQ2_OUTPUT_SPACE_TOKENS'])
            environment['DQ2_BACKUP_OUTPUT_LOCATIONS'] = ':'.join(
                configDQ2['DQ2_BACKUP_OUTPUT_LOCATIONS'])

        # CN: extra condition for TNTSplitter
        if job._getRoot().splitter and job._getRoot(
        ).splitter._name == 'TNTJobSplitter':
            # set up dq2 environment
            datasetname = job.inputdata.dataset
            environment['DATASETNAME'] = ':'.join(datasetname)
            environment['DATASETLOCATION'] = ':'.join(
                job.inputdata.get_locations())
            environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']
            environment['DATASETTYPE'] = job.inputdata.type
            environment['DATASETDATATYPE'] = job.inputdata.datatype
            if job.inputdata.accessprotocol:
                environment[
                    'DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol
            if job.inputsandbox: inputbox += job.inputsandbox

        if job.inputdata and job.inputdata._name == 'DQ2Dataset' and job.inputdata.tag_info:
            if job.inputdata.tag_info[job.inputdata.tag_info.keys(
            )[0]]['dataset'] != '' and job.inputdata.tag_info[tag_file][
                    'path'] == '':
                environment['TAG_TYPE'] = 'DQ2'
            else:
                environment['TAG_TYPE'] = 'LOCAL'

        # Fix DATASETNAME env variable for DQ2_COPY mode
        if job.inputdata and (job.inputdata._name in [
                'DQ2Dataset', 'AMIDataset', 'EventPicking'
        ]) and (job.inputdata.type in [
                'DQ2_LOCAL', 'DQ2_COPY', 'FILE_STAGER', 'TAG_LOCAL', 'TAG_COPY'
        ]):
            if job.inputdata.dataset:
                from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import resolve_container
                datasets = resolve_container(job.inputdata.dataset)
                environment['DATASETNAME'] = datasets[0]
                try:
                    environment['DATASETLOCATION'] = ':'.join(
                        job.inputdata.get_locations(
                            overlap=False)[datasets[0]])
                except:
                    printout = 'Job submission failed ! Dataset %s could not be found in DQ2 ! Maybe retry ?' % (
                        datasets[0])
                    raise ApplicationConfigurationError(None, printout)

        if job.inputdata and job.inputdata._name == 'ATLASTier3Dataset':
            environment['DATASETTYPE'] = 'TIER3'

        # Work around for glite WMS spaced environement variable problem
        inputbox.append(
            FileBuffer('athena_options', environment['ATHENA_OPTIONS'] + '\n'))

        # Write trf parameters
        trf_params = ' '
        for key, value in job.application.trf_parameter.iteritems():
            if key == 'dbrelease':
                environment['DBDATASETNAME'] = value.split(':')[0]
                environment['DBFILENAME'] = value.split(':')[1]
            else:
                trf_params = trf_params + key + '=' + str(value) + ' '
        if trf_params != ' ' and job.application.atlas_exetype == 'TRF':
            _append_file_buffer(inputbox, 'trf_params', [trf_params])

        # set RecExCommon options
        environment['RECEXTYPE'] = job.application.recex_type

        # event based splitting:  set max_events and skip_events
        if job._getRoot().splitter and hasattr(
                job._getRoot().splitter,
                'numevtsperjob') and job._getRoot().splitter.numevtsperjob > 0:
            environment['ATHENA_MAX_EVENTS'] = str(job.application.max_events)
            environment['ATHENA_SKIP_EVENTS'] = str(
                job.application.skip_events)

        # pick event
        if job._getRoot().splitter and job._getRoot(
        ).inputdata and job._getRoot().inputdata._name == 'EventPicking':
            #Replace blank space
            environment['ATHENA_RUN_EVENTS'] = str(
                job.application.run_event).replace(' ', '')
            environment['ATHENA_FILTER_POLICY'] = str(
                job.inputdata.pick_filter_policy)

# append a property for monitoring to the jobconfig of subjobs
        lcg_config = LCGJobConfig(File(exe), inputbox, [], outputbox,
                                  environment, [], requirements)
        lcg_config.monitoring_svc = mc['Athena']
        return lcg_config
Example #4
0
    def prepare(self,app,appsubconfig,appmasterconfig,jobmasterconfig):
        """prepare the subjob specific configuration"""

        job = app._getParent() # Returns job or subjob object
        logger.debug("AthenaLCGRTHandler prepare called, %s", job.id)

#       prepare inputdata

        input_files = []
        input_guids = []
        input_tag_files = []
        input_tag_guids = []
        input_esd_files = []
        input_esd_guids = []
        add_files = []
       
        if job.inputdata:

            # DQ2Dataset, ATLASLocalDataset and ATLASCastorDataset job splitting is done in AthenaSplitterJob

            if job._getRoot().subjobs:
                if job.inputdata._name == 'ATLASLocalDataset':
                    if not job.inputdata.names: raise ApplicationConfigurationError('No inputdata has been specified.')
                    input_files = job.inputdata.names

                elif job.inputdata._name == 'ATLASTier3Dataset':
                    if not job.inputdata.names:
                        raise ApplicationConfigurationError('No inputdata has been specified.')
                    if job.inputdata.names:
                        input_files = job.inputdata.names
                        input_guids = input_files

                elif job.inputdata._name in [ 'DQ2Dataset', 'EventPicking']:
                    if not job.inputdata.names: raise ApplicationConfigurationError('No inputdata has been specified. Failure in job %s.%s. Dataset %s' %(job._getRoot().id, job.id, job.inputdata.dataset)  )
                    input_guids = job.inputdata.guids
                    input_files = job.inputdata.names

                    if job.inputdata.tag_info:

                        # check for conflicts with TAG_LOCAL or TAG_COPY
                        if job.inputdata.type in ['TAG_LOCAL', 'TAG_COPY']:
                            raise ApplicationConfigurationError("Cannot provide both tag_info and run as '%s'. Please use one or the other!" % job.inputdata.type)
                        
                        # check if FILE_STAGER is used
                        if job.inputdata.type == 'FILE_STAGER':
                            logger.warning("TAG jobs currently can't use the FILE_STAGER. Switching to DQ2_COPY instead.")
                            job.inputdata.type = 'DQ2_COPY'
                        
                        # add additional file info for tags
                        for tag_file in job.inputdata.tag_info:
                            for ref in job.inputdata.tag_info[tag_file]['refs']:
                                add_files.append( ref[1] + ':' + ref[0] + ':' + ref[2] )
                    
                    if not job.inputdata.type in ['DQ2_LOCAL', 'LFC', 'TAG', 'TNT_LOCAL', 'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER', 'TAG_LOCAL', 'TAG_COPY' ]:
                        job.inputdata.type ='DQ2_LOCAL'
                    if not job.inputdata.datatype in ['DATA', 'MC', 'MuonCalibStream']:
                        job.inputdata.datatype ='MC'

            else:
                if job.inputdata._name == 'ATLASLocalDataset':
                    input_files = ATLASLocalDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASTier3Dataset':
                    if job.inputdata.names:
                        input_files = job.inputdata.names
                        input_guids = input_files
                    elif job.inputdata.pfnListFile:
                        logger.info('Loading file names from %s'%job.inputdata.pfnListFile.name)
                        pfnListFile = open(job.inputdata.pfnListFile.name)
                        job.inputdata.names = [ line.strip() for line in pfnListFile]
                        pfnListFile.close()
                        input_files = job.inputdata.names 
                        input_guids = input_files
                    else:
                        raise ApplicationConfigurationError('No inputdata has been specified.')

                elif job.inputdata._name in [ 'DQ2Dataset', 'EventPicking']:
                    if not job.inputdata.type in ['DQ2_LOCAL', 'LFC', 'TAG', 'TNT_LOCAL', 'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER', 'TAG_LOCAL', 'TAG_COPY' ]:
                        job.inputdata.type ='DQ2_LOCAL'
                    if not job.inputdata.datatype in ['DATA', 'MC', 'MuonCalibStream']:
                        job.inputdata.datatype ='MC'

                    input_guids, input_files = _splitlist(job.inputdata.get_contents())

                    if job.inputdata.use_aodesd_backnav:
                        input_esd_guids, input_esd_files = _splitlist(job.inputdata.get_contents(backnav=True))

                    job.inputdata.names = input_files          
                    job.inputdata.guids = input_guids          

#       prepare outputdata
       
        output_location = ''
        if job.outputdata:

            if job.outputdata._name=='DQ2OutputDataset':

                if job.outputdata.location:
                    if type(job.outputdata.location) == str and isDQ2SRMSite(job.outputdata.location):
                        output_location = job.outputdata.location
                    else:
                        logger.warning('Unknown output location %s.',job.outputdata.location)

                    #if job.backend.requirements._name == 'AtlasLCGRequirements':
                    #    if job.backend.requirements.cloud:
                    #        if whichCloud(output_location) != job.backend.requirements.cloud:
                    #            printout = 'Job submission failed ! j.outputdata.location=%s is not in the same cloud as j.backend.requirements.cloud=%s' %(job.outputdata.location, job.backend.requirements.cloud )
                    #            raise ApplicationConfigurationError(None, printout)
                    #    if job.backend.requirements.sites:
                    #        if whichCloud(output_location) != whichCloud(job.backend.requirements.sites[0]):
                    #            printout = 'Job submission failed ! j.outputdata.location=%s is not in the same cloud as j.backend.requirements.sites=%s'%(job.outputdata.location, job.backend.requirements.sites)
                    #            raise ApplicationConfigurationError(None,printout )     
                    
                elif job._getRoot().subjobs and job._getRoot().outputdata.location:
                    if isDQ2SRMSite(job._getRoot().outputdata.location):
                        output_location = job._getRoot().outputdata.location
                    else:
                        logger.warning('Unknown output location %s.',job.getRoot().outputdata.location)
                        
                
                logger.debug('Output: %s,%s',output_location, job.outputdata.location)
            else:
                if job.outputdata.location:
                    output_location = job.outputdata.location
                else:
                    try:
                        output_location = config['LCGOutputLocation']
                    except ConfigError:
                        logger.warning('No default output location specified in the configuration.')
            if job.outputdata.location:
                job.outputdata.location = output_location 
                logger.debug('Output: %s,%s',output_location, job.outputdata.location)

        if job._getRoot().subjobs:
            jid = "%d.%d" % (job._getRoot().id, job.id)
        else:
            jid = "%d" % job.id

        if output_location and job.outputdata and job.outputdata._name!='DQ2OutputDataset':
            output_location = os.path.join(output_location, jid)
            if job.outputdata:
                # Remove trailing number if job is copied

                pat = re.compile(r'\/[\d\.]+\/[\d\.]+$')
                if re.findall(pat,output_location):
                    output_location = re.sub(pat, '', output_location)
                    output_location = os.path.join(output_location, jid)

                job.outputdata.location = output_location

        if job.outputdata and job.outputdata._name=='DQ2OutputDataset':

            # output dataset name from master_prepare
            output_datasetname = self.output_datasetname
            output_lfn = self.output_lfn

            output_jobid = jid
            # Set subjob datasetname
            job.outputdata.datasetname=output_datasetname
            # Set master job datasetname
            if job._getRoot().subjobs:
                job._getRoot().outputdata.datasetname=output_datasetname
            # Create output dataset -> moved to the worker node code !
            if not job.outputdata.dataset_exists(output_datasetname):
                if job._getRoot().subjobs:
                    if job.id==0:
                        #job.outputdata.create_dataset(output_datasetname)
                        pass
                else:
                    #job.outputdata.create_dataset(output_datasetname)
                    pass
                if output_location and configDQ2['USE_STAGEOUT_SUBSCRIPTION']:
                    job.outputdata.create_subscription(output_datasetname, output_location)    
                
            else:
                if (job._getRoot().subjobs and job.id==0) or not job._getRoot().subjobs:
                    logger.warning("Dataset %s already exists - appending new files to this dataset", output_datasetname)
                    output_location = job.outputdata.get_locations(datasetname=output_datasetname, quiet=True)
                    logger.debug('Output3: %s,%s',output_location, job.outputdata.location)
                    if output_location:
                        output_location = output_location[0] 
                        if job._getRoot().subjobs:
                            job._getRoot().outputdata.location=output_location
                            job.outputdata.location=output_location
                        else:
                            job.outputdata.location=output_location
                            
                    logger.debug('Output4: %s,%s',output_location, job.outputdata.location)

        if getConfig('LCG')['JobLogHandler'] == 'DQ2' and (not job.outputdata or (job.outputdata and job.outputdata._name != 'DQ2OutputDataset')):
            raise ApplicationConfigurationError('Staging of log files in DQ2 requested, but DQ2 output dataset not specified.')
        
#       prepare inputsandbox

        inputbox = [File(os.path.join(__directory__,'athena-utility.sh')) ]
        if input_guids:     _append_file_buffer(inputbox,'input_guids',input_guids)
        if input_files:     _append_file_buffer(inputbox,'input_files',input_files)
        if add_files:     _append_file_buffer(inputbox,'add_files',add_files)
        if input_tag_guids: _append_file_buffer(inputbox,'input_tag_guids',input_tag_guids)
        if input_tag_files: _append_file_buffer(inputbox,'input_tag_files',input_tag_files)
        if input_esd_guids: _append_file_buffer(inputbox,'input_esd_guids',input_esd_guids)
        if input_esd_files: _append_file_buffer(inputbox,'input_esd_files',input_esd_files)

        if job.inputdata and job.inputdata._name == 'DQ2Dataset':
            for tag_file in job.inputdata.tag_info:
                if job.inputdata.tag_info[tag_file]['path'] != '':
                    inputbox.append( File( os.path.join( job.inputdata.tag_info[tag_file]['path'], tag_file) ) )

        # check for output data given in prepare info
        if job.outputdata and job.application.atlas_exetype == "ATHENA":
            for of in job.application.atlas_run_config['output']['alloutputs']:
                if not of in job.outputdata.outputdata:
                    job.outputdata.outputdata.append(of)

                                        
        if job.outputdata and job.outputdata.outputdata:
            _append_file_buffer(inputbox,'output_files',job.outputdata.outputdata)
        elif job.outputdata and not job.outputdata.outputdata:
            raise ApplicationConfigurationError('j.outputdata.outputdata is empty - Please specify output filename(s).')

        exe = os.path.join(__directory__,'run-athena-lcg.sh')
        outputbox = jobmasterconfig.outputbox
        requirements = jobmasterconfig.requirements.__copy__()
        environment  = jobmasterconfig.env.copy()
        # If ArgSplitter is used
        try:
            if job.application.args:
                environment['ATHENA_OPTIONS'] = environment['ATHENA_OPTIONS'] + ' ' + ' '.join(job.application.args)
                if job.application.options:
                    job.application.options = job.application.options + ' ' + job.application.args
                else:
                    job.application.options = job.application.args
        except AttributeError:
            pass
        
        if output_location and output_location.find('/castor/cern.ch/grid/atlas/t0')>=0:
            raise ApplicationConfigurationError('You are try to save the output to TIER0DISK - please use another area !')
        if not output_location:
            output_location = ''
        if configDQ2['USE_STAGEOUT_SUBSCRIPTION']:
            output_location = ''
        environment['OUTPUT_LOCATION'] = output_location
        environment['ATLASOutputDatasetLFC'] = config['ATLASOutputDatasetLFC']
        if job.outputdata and job.outputdata._name == 'DQ2OutputDataset':
            environment['OUTPUT_DATASETNAME'] = output_datasetname
            environment['OUTPUT_LFN'] = output_lfn
            environment['OUTPUT_JOBID'] = output_jobid
            environment['DQ2_URL_SERVER']= configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']
            environment['DQ2_OUTPUTFILE_NAMELENGTH'] = configDQ2['OUTPUTFILE_NAMELENGTH']
            if job.outputdata.use_shortfilename:
                environment['GANGA_SHORTFILENAME'] = '1'
            else:
                environment['GANGA_SHORTFILENAME'] = ''
                
            environment['DQ2_OUTPUT_SPACE_TOKENS']= ':'.join(configDQ2['DQ2_OUTPUT_SPACE_TOKENS'])
            environment['DQ2_BACKUP_OUTPUT_LOCATIONS']= ':'.join(configDQ2['DQ2_BACKUP_OUTPUT_LOCATIONS'])
            
        # CN: extra condition for TNTSplitter
        if job._getRoot().splitter and job._getRoot().splitter._name == 'TNTJobSplitter':
            # set up dq2 environment
            datasetname = job.inputdata.dataset
            environment['DATASETNAME']= ':'.join(datasetname)
            environment['DATASETLOCATION'] = ':'.join(job.inputdata.get_locations())
            environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']
            environment['DATASETTYPE'] = job.inputdata.type
            environment['DATASETDATATYPE'] = job.inputdata.datatype
            if job.inputdata.accessprotocol:
                 environment['DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol
            if job.inputsandbox: inputbox += job.inputsandbox   

        if job.inputdata and job.inputdata._name == 'DQ2Dataset' and job.inputdata.tag_info:
            if job.inputdata.tag_info[job.inputdata.tag_info.keys()[0] ]['dataset'] != '' and job.inputdata.tag_info[tag_file]['path'] == '':
                environment['TAG_TYPE'] = 'DQ2'
            else:
                environment['TAG_TYPE'] = 'LOCAL'                

        # Fix DATASETNAME env variable for DQ2_COPY mode
        if job.inputdata and ( job.inputdata._name in [ 'DQ2Dataset', 'EventPicking']) and (job.inputdata.type in ['DQ2_LOCAL', 'DQ2_COPY', 'FILE_STAGER', 'TAG_LOCAL', 'TAG_COPY' ]):
            if job.inputdata.dataset:
                from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import resolve_container
                datasets = resolve_container(job.inputdata.dataset)
                environment['DATASETNAME'] = datasets[0]
                try:
                    environment['DATASETLOCATION'] = ':'.join(job.inputdata.get_locations(overlap=False)[ datasets[0] ])
                except:
                    printout = 'Job submission failed ! Dataset %s could not be found in DQ2 ! Maybe retry ?' %(datasets[0])
                    raise ApplicationConfigurationError(printout )

        if job.inputdata and job.inputdata._name == 'ATLASTier3Dataset':
            environment['DATASETTYPE'] = 'TIER3'

        # Work around for glite WMS spaced environement variable problem
        inputbox.append(FileBuffer('athena_options',environment['ATHENA_OPTIONS']+'\n'))

        # Write trf parameters
        trf_params = ' '
        for key, value in job.application.trf_parameter.iteritems():
            if key == 'dbrelease':
                environment['DBDATASETNAME'] = value.split(':')[0]
                environment['DBFILENAME'] = value.split(':')[1]
            else:
                trf_params = trf_params + key + '=' + str(value) + ' '
        if trf_params!=' ' and job.application.atlas_exetype=='TRF':
           _append_file_buffer(inputbox,'trf_params', [ trf_params ] ) 

        # set RecExCommon options
        environment['RECEXTYPE'] = job.application.recex_type

        # event based splitting:  set max_events and skip_events
        if job._getRoot().splitter and hasattr(job._getRoot().splitter, 'numevtsperjob') and job._getRoot().splitter.numevtsperjob > 0 :
                environment['ATHENA_MAX_EVENTS'] = str(job.application.max_events)
                environment['ATHENA_SKIP_EVENTS'] = str(job.application.skip_events)

        # pick event 
        if job._getRoot().splitter and job._getRoot().inputdata and job._getRoot().inputdata._name == 'EventPicking' :
                #Replace blank space 
                environment['ATHENA_RUN_EVENTS'] = str(job.application.run_event).replace(' ', '')
                environment['ATHENA_FILTER_POLICY'] = str(job.inputdata.pick_filter_policy)
        
# append a property for monitoring to the jobconfig of subjobs
        lcg_config = LCGJobConfig(File(exe), inputbox, [], outputbox, environment, [], requirements)
        lcg_config.monitoring_svc = mc['Athena']
        return lcg_config