Ejemplo n.º 1
0
    def prepare(self, app, appconfig, appmasterconfig, jobmasterconfig):
        from Ganga.Lib.LCG import LCGJobConfig

        c = LCGJobConfig(app.exe, app.inputs, app.args, app.outputs, app.envs)
        c.monitoring_svc = mc['GangaTutorial']

        return c
Ejemplo n.º 2
0
    def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig):
        from Ganga.Lib.LCG import LCGJobConfig

        c = LCGJobConfig(app.exe,app.inputs,app.args,app.outputs,app.envs)
        c.monitoring_svc = mc['GangaTutorial']

        return c
Ejemplo n.º 3
0
    def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig):
        """prepare the subjob specific configuration"""

        job = app._getParent()  # Returns job or subjob object
        logger.debug("AthenaLCGRTHandler prepare called, %s", job.id)

        #       prepare inputdata

        input_files = []
        input_guids = []
        input_tag_files = []
        input_tag_guids = []
        input_esd_files = []
        input_esd_guids = []
        add_files = []

        if job.inputdata:

            # DQ2Dataset, ATLASLocalDataset and ATLASCastorDataset job splitting is done in AthenaSplitterJob

            if job._getRoot().subjobs:
                if job.inputdata._name == 'ATLASLocalDataset' or job.inputdata._name == 'ATLASCastorDataset':
                    if not job.inputdata.names:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')
                    input_files = job.inputdata.names

                elif job.inputdata._name == 'ATLASDataset':
                    if not job.inputdata.lfn:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')
                    input_files = job.inputdata.lfn

                elif job.inputdata._name == 'ATLASTier3Dataset':
                    if not job.inputdata.names:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')
                    if job.inputdata.names:
                        input_files = job.inputdata.names
                        input_guids = input_files

                elif job.inputdata._name in [
                        'DQ2Dataset', 'AMIDataset', 'EventPicking'
                ]:
                    if not job.inputdata.names:
                        raise ApplicationConfigurationError(
                            None,
                            'No inputdata has been specified. Failure in job %s.%s. Dataset %s'
                            %
                            (job._getRoot().id, job.id, job.inputdata.dataset))
                    input_guids = job.inputdata.guids
                    input_files = job.inputdata.names

                    if job.inputdata.tag_info:

                        # check for conflicts with TAG_LOCAL or TAG_COPY
                        if job.inputdata.type in ['TAG_LOCAL', 'TAG_COPY']:
                            raise ApplicationConfigurationError(
                                None,
                                "Cannot provide both tag_info and run as '%s'. Please use one or the other!"
                                % job.inputdata.type)

                        # check if FILE_STAGER is used
                        if job.inputdata.type == 'FILE_STAGER':
                            logger.warning(
                                "TAG jobs currently can't use the FILE_STAGER. Switching to DQ2_COPY instead."
                            )
                            job.inputdata.type = 'DQ2_COPY'

                        # add additional file info for tags
                        for tag_file in job.inputdata.tag_info:
                            for ref in job.inputdata.tag_info[tag_file][
                                    'refs']:
                                add_files.append(ref[1] + ':' + ref[0] + ':' +
                                                 ref[2])

                    if not job.inputdata.type in [
                            'DQ2_LOCAL', 'LFC', 'TAG', 'TNT_LOCAL',
                            'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER',
                            'TAG_LOCAL', 'TAG_COPY'
                    ]:
                        job.inputdata.type = 'DQ2_LOCAL'
                    if not job.inputdata.datatype in [
                            'DATA', 'MC', 'MuonCalibStream'
                    ]:
                        job.inputdata.datatype = 'MC'

            else:
                if job.inputdata._name == 'ATLASCastorDataset':
                    input_files = ATLASCastorDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASLocalDataset':
                    input_files = ATLASLocalDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASDataset':
                    input_files = ATLASDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASTier3Dataset':
                    if job.inputdata.names:
                        input_files = job.inputdata.names
                        input_guids = input_files
                    elif job.inputdata.pfnListFile:
                        logger.info('Loading file names from %s' %
                                    job.inputdata.pfnListFile.name)
                        pfnListFile = open(job.inputdata.pfnListFile.name)
                        job.inputdata.names = [
                            line.strip() for line in pfnListFile
                        ]
                        pfnListFile.close()
                        input_files = job.inputdata.names
                        input_guids = input_files
                    else:
                        raise ApplicationConfigurationError(
                            None, 'No inputdata has been specified.')

                elif job.inputdata._name in [
                        'DQ2Dataset', 'AMIDataset', 'EventPicking'
                ]:
                    if not job.inputdata.type in [
                            'DQ2_LOCAL', 'LFC', 'TAG', 'TNT_LOCAL',
                            'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER',
                            'TAG_LOCAL', 'TAG_COPY'
                    ]:
                        job.inputdata.type = 'DQ2_LOCAL'
                    if not job.inputdata.datatype in [
                            'DATA', 'MC', 'MuonCalibStream'
                    ]:
                        job.inputdata.datatype = 'MC'

                    input_guids, input_files = _splitlist(
                        job.inputdata.get_contents())

                    if job.inputdata.tagdataset:
                        input_tag_guids, input_tag_files = _splitlist(
                            job.inputdata.get_tag_contents())
                    if job.inputdata.use_aodesd_backnav:
                        input_esd_guids, input_esd_files = _splitlist(
                            job.inputdata.get_contents(backnav=True))

                    job.inputdata.names = input_files
                    job.inputdata.guids = input_guids

#       prepare outputdata

        output_location = ''
        if job.outputdata:

            if job.outputdata._name == 'DQ2OutputDataset':

                if job.outputdata.location:
                    if type(job.outputdata.location) == str and isDQ2SRMSite(
                            job.outputdata.location):
                        output_location = job.outputdata.location
                    else:
                        logger.warning('Unknown output location %s.',
                                       job.outputdata.location)

                    #if job.backend.requirements._name == 'AtlasLCGRequirements':
                    #    if job.backend.requirements.cloud:
                    #        if whichCloud(output_location) != job.backend.requirements.cloud:
                    #            printout = 'Job submission failed ! j.outputdata.location=%s is not in the same cloud as j.backend.requirements.cloud=%s' %(job.outputdata.location, job.backend.requirements.cloud )
                    #            raise ApplicationConfigurationError(None, printout)
                    #    if job.backend.requirements.sites:
                    #        if whichCloud(output_location) != whichCloud(job.backend.requirements.sites[0]):
                    #            printout = 'Job submission failed ! j.outputdata.location=%s is not in the same cloud as j.backend.requirements.sites=%s'%(job.outputdata.location, job.backend.requirements.sites)
                    #            raise ApplicationConfigurationError(None,printout )

                elif job._getRoot().subjobs and job._getRoot(
                ).outputdata.location:
                    if isDQ2SRMSite(job._getRoot().outputdata.location):
                        output_location = job._getRoot().outputdata.location
                    else:
                        logger.warning('Unknown output location %s.',
                                       job.getRoot().outputdata.location)

                logger.debug('Output: %s,%s', output_location,
                             job.outputdata.location)
            else:
                if job.outputdata.location:
                    output_location = job.outputdata.location
                else:
                    try:
                        output_location = config['LCGOutputLocation']
                    except ConfigError:
                        logger.warning(
                            'No default output location specified in the configuration.'
                        )
            if job.outputdata.location:
                job.outputdata.location = output_location
                logger.debug('Output: %s,%s', output_location,
                             job.outputdata.location)

        if job._getRoot().subjobs:
            jid = "%d.%d" % (job._getRoot().id, job.id)
        else:
            jid = "%d" % job.id

        if output_location and job.outputdata and job.outputdata._name != 'DQ2OutputDataset':
            output_location = os.path.join(output_location, jid)
            if job.outputdata:
                # Remove trailing number if job is copied

                pat = re.compile(r'\/[\d\.]+\/[\d\.]+$')
                if re.findall(pat, output_location):
                    output_location = re.sub(pat, '', output_location)
                    output_location = os.path.join(output_location, jid)

                job.outputdata.location = output_location

        if job.outputdata and job.outputdata._name == 'DQ2OutputDataset':

            # output dataset name from master_prepare
            output_datasetname = self.output_datasetname
            output_lfn = self.output_lfn

            output_jobid = jid
            # Set subjob datasetname
            job.outputdata.datasetname = output_datasetname
            # Set master job datasetname
            if job._getRoot().subjobs:
                job._getRoot().outputdata.datasetname = output_datasetname
            # Create output dataset -> moved to the worker node code !
            if not job.outputdata.dataset_exists(output_datasetname):
                if job._getRoot().subjobs:
                    if job.id == 0:
                        #job.outputdata.create_dataset(output_datasetname)
                        pass
                else:
                    #job.outputdata.create_dataset(output_datasetname)
                    pass
                if output_location and configDQ2['USE_STAGEOUT_SUBSCRIPTION']:
                    job.outputdata.create_subscription(output_datasetname,
                                                       output_location)

            else:
                if (job._getRoot().subjobs
                        and job.id == 0) or not job._getRoot().subjobs:
                    logger.warning(
                        "Dataset %s already exists - appending new files to this dataset",
                        output_datasetname)
                    output_location = job.outputdata.get_locations(
                        datasetname=output_datasetname, quiet=True)
                    logger.debug('Output3: %s,%s', output_location,
                                 job.outputdata.location)
                    if output_location:
                        output_location = output_location[0]
                        if job._getRoot().subjobs:
                            job._getRoot(
                            ).outputdata.location = output_location
                            job.outputdata.location = output_location
                        else:
                            job.outputdata.location = output_location

                    logger.debug('Output4: %s,%s', output_location,
                                 job.outputdata.location)

        if getConfig('LCG')['JobLogHandler'] == 'DQ2' and (
                not job.outputdata or
            (job.outputdata and job.outputdata._name != 'DQ2OutputDataset')):
            raise ApplicationConfigurationError(
                None,
                'Staging of log files in DQ2 requested, but DQ2 output dataset not specified.'
            )

#       prepare inputsandbox

        inputbox = [File(os.path.join(__directory__, 'athena-utility.sh'))]
        if input_guids:
            _append_file_buffer(inputbox, 'input_guids', input_guids)
        if input_files:
            _append_file_buffer(inputbox, 'input_files', input_files)
        if add_files: _append_file_buffer(inputbox, 'add_files', add_files)
        if input_tag_guids:
            _append_file_buffer(inputbox, 'input_tag_guids', input_tag_guids)
        if input_tag_files:
            _append_file_buffer(inputbox, 'input_tag_files', input_tag_files)
        if input_esd_guids:
            _append_file_buffer(inputbox, 'input_esd_guids', input_esd_guids)
        if input_esd_files:
            _append_file_buffer(inputbox, 'input_esd_files', input_esd_files)

        if job.inputdata and job.inputdata._name == 'DQ2Dataset':
            for tag_file in job.inputdata.tag_info:
                if job.inputdata.tag_info[tag_file]['path'] != '':
                    inputbox.append(
                        File(
                            os.path.join(
                                job.inputdata.tag_info[tag_file]['path'],
                                tag_file)))

        # check for output data given in prepare info
        if job.outputdata and job.application.atlas_exetype == "ATHENA":
            for of in job.application.atlas_run_config['output']['alloutputs']:
                if not of in job.outputdata.outputdata:
                    job.outputdata.outputdata.append(of)

        if job.outputdata and job.outputdata.outputdata:
            _append_file_buffer(inputbox, 'output_files',
                                job.outputdata.outputdata)
        elif job.outputdata and not job.outputdata.outputdata:
            raise ApplicationConfigurationError(
                None,
                'j.outputdata.outputdata is empty - Please specify output filename(s).'
            )

        exe = os.path.join(__directory__, 'run-athena-lcg.sh')
        outputbox = jobmasterconfig.outputbox
        requirements = jobmasterconfig.requirements.__copy__()
        environment = jobmasterconfig.env.copy()
        # If ArgSplitter is used
        try:
            if job.application.args:
                environment['ATHENA_OPTIONS'] = environment[
                    'ATHENA_OPTIONS'] + ' ' + ' '.join(job.application.args)
                if job.application.options:
                    job.application.options = job.application.options + ' ' + job.application.args
                else:
                    job.application.options = job.application.args
        except AttributeError:
            pass

        if output_location and output_location.find(
                '/castor/cern.ch/grid/atlas/t0') >= 0:
            raise ApplicationConfigurationError(
                None,
                'You are try to save the output to TIER0DISK - please use another area !'
            )
        if not output_location:
            output_location = ''
        if configDQ2['USE_STAGEOUT_SUBSCRIPTION']:
            output_location = ''
        environment['OUTPUT_LOCATION'] = output_location
        environment['ATLASOutputDatasetLFC'] = config['ATLASOutputDatasetLFC']
        if job.outputdata and job.outputdata._name == 'DQ2OutputDataset':
            environment['OUTPUT_DATASETNAME'] = output_datasetname
            environment['OUTPUT_LFN'] = output_lfn
            environment['OUTPUT_JOBID'] = output_jobid
            environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']
            environment['DQ2_OUTPUTFILE_NAMELENGTH'] = configDQ2[
                'OUTPUTFILE_NAMELENGTH']
            if job.outputdata.use_shortfilename:
                environment['GANGA_SHORTFILENAME'] = '1'
            else:
                environment['GANGA_SHORTFILENAME'] = ''

            environment['DQ2_OUTPUT_SPACE_TOKENS'] = ':'.join(
                configDQ2['DQ2_OUTPUT_SPACE_TOKENS'])
            environment['DQ2_BACKUP_OUTPUT_LOCATIONS'] = ':'.join(
                configDQ2['DQ2_BACKUP_OUTPUT_LOCATIONS'])

        # CN: extra condition for TNTSplitter
        if job._getRoot().splitter and job._getRoot(
        ).splitter._name == 'TNTJobSplitter':
            # set up dq2 environment
            datasetname = job.inputdata.dataset
            environment['DATASETNAME'] = ':'.join(datasetname)
            environment['DATASETLOCATION'] = ':'.join(
                job.inputdata.get_locations())
            environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']
            environment['DATASETTYPE'] = job.inputdata.type
            environment['DATASETDATATYPE'] = job.inputdata.datatype
            if job.inputdata.accessprotocol:
                environment[
                    'DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol
            if job.inputsandbox: inputbox += job.inputsandbox

        if job.inputdata and job.inputdata._name == 'DQ2Dataset' and job.inputdata.tag_info:
            if job.inputdata.tag_info[job.inputdata.tag_info.keys(
            )[0]]['dataset'] != '' and job.inputdata.tag_info[tag_file][
                    'path'] == '':
                environment['TAG_TYPE'] = 'DQ2'
            else:
                environment['TAG_TYPE'] = 'LOCAL'

        # Fix DATASETNAME env variable for DQ2_COPY mode
        if job.inputdata and (job.inputdata._name in [
                'DQ2Dataset', 'AMIDataset', 'EventPicking'
        ]) and (job.inputdata.type in [
                'DQ2_LOCAL', 'DQ2_COPY', 'FILE_STAGER', 'TAG_LOCAL', 'TAG_COPY'
        ]):
            if job.inputdata.dataset:
                from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import resolve_container
                datasets = resolve_container(job.inputdata.dataset)
                environment['DATASETNAME'] = datasets[0]
                try:
                    environment['DATASETLOCATION'] = ':'.join(
                        job.inputdata.get_locations(
                            overlap=False)[datasets[0]])
                except:
                    printout = 'Job submission failed ! Dataset %s could not be found in DQ2 ! Maybe retry ?' % (
                        datasets[0])
                    raise ApplicationConfigurationError(None, printout)

        if job.inputdata and job.inputdata._name == 'ATLASTier3Dataset':
            environment['DATASETTYPE'] = 'TIER3'

        # Work around for glite WMS spaced environement variable problem
        inputbox.append(
            FileBuffer('athena_options', environment['ATHENA_OPTIONS'] + '\n'))

        # Write trf parameters
        trf_params = ' '
        for key, value in job.application.trf_parameter.iteritems():
            if key == 'dbrelease':
                environment['DBDATASETNAME'] = value.split(':')[0]
                environment['DBFILENAME'] = value.split(':')[1]
            else:
                trf_params = trf_params + key + '=' + str(value) + ' '
        if trf_params != ' ' and job.application.atlas_exetype == 'TRF':
            _append_file_buffer(inputbox, 'trf_params', [trf_params])

        # set RecExCommon options
        environment['RECEXTYPE'] = job.application.recex_type

        # event based splitting:  set max_events and skip_events
        if job._getRoot().splitter and hasattr(
                job._getRoot().splitter,
                'numevtsperjob') and job._getRoot().splitter.numevtsperjob > 0:
            environment['ATHENA_MAX_EVENTS'] = str(job.application.max_events)
            environment['ATHENA_SKIP_EVENTS'] = str(
                job.application.skip_events)

        # pick event
        if job._getRoot().splitter and job._getRoot(
        ).inputdata and job._getRoot().inputdata._name == 'EventPicking':
            #Replace blank space
            environment['ATHENA_RUN_EVENTS'] = str(
                job.application.run_event).replace(' ', '')
            environment['ATHENA_FILTER_POLICY'] = str(
                job.inputdata.pick_filter_policy)

# append a property for monitoring to the jobconfig of subjobs
        lcg_config = LCGJobConfig(File(exe), inputbox, [], outputbox,
                                  environment, [], requirements)
        lcg_config.monitoring_svc = mc['Athena']
        return lcg_config
Ejemplo n.º 4
0
    def prepare(self,app,appsubconfig,appmasterconfig,jobmasterconfig):
        """prepare the subjob specific configuration"""

        job = app._getParent() # Returns job or subjob object
        logger.debug("AthenaLCGRTHandler prepare called, %s", job.id)

#       prepare inputdata

        input_files = []
        input_guids = []
        input_tag_files = []
        input_tag_guids = []
        input_esd_files = []
        input_esd_guids = []
        add_files = []
       
        if job.inputdata:

            # DQ2Dataset, ATLASLocalDataset and ATLASCastorDataset job splitting is done in AthenaSplitterJob

            if job._getRoot().subjobs:
                if job.inputdata._name == 'ATLASLocalDataset':
                    if not job.inputdata.names: raise ApplicationConfigurationError('No inputdata has been specified.')
                    input_files = job.inputdata.names

                elif job.inputdata._name == 'ATLASTier3Dataset':
                    if not job.inputdata.names:
                        raise ApplicationConfigurationError('No inputdata has been specified.')
                    if job.inputdata.names:
                        input_files = job.inputdata.names
                        input_guids = input_files

                elif job.inputdata._name in [ 'DQ2Dataset', 'EventPicking']:
                    if not job.inputdata.names: raise ApplicationConfigurationError('No inputdata has been specified. Failure in job %s.%s. Dataset %s' %(job._getRoot().id, job.id, job.inputdata.dataset)  )
                    input_guids = job.inputdata.guids
                    input_files = job.inputdata.names

                    if job.inputdata.tag_info:

                        # check for conflicts with TAG_LOCAL or TAG_COPY
                        if job.inputdata.type in ['TAG_LOCAL', 'TAG_COPY']:
                            raise ApplicationConfigurationError("Cannot provide both tag_info and run as '%s'. Please use one or the other!" % job.inputdata.type)
                        
                        # check if FILE_STAGER is used
                        if job.inputdata.type == 'FILE_STAGER':
                            logger.warning("TAG jobs currently can't use the FILE_STAGER. Switching to DQ2_COPY instead.")
                            job.inputdata.type = 'DQ2_COPY'
                        
                        # add additional file info for tags
                        for tag_file in job.inputdata.tag_info:
                            for ref in job.inputdata.tag_info[tag_file]['refs']:
                                add_files.append( ref[1] + ':' + ref[0] + ':' + ref[2] )
                    
                    if not job.inputdata.type in ['DQ2_LOCAL', 'LFC', 'TAG', 'TNT_LOCAL', 'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER', 'TAG_LOCAL', 'TAG_COPY' ]:
                        job.inputdata.type ='DQ2_LOCAL'
                    if not job.inputdata.datatype in ['DATA', 'MC', 'MuonCalibStream']:
                        job.inputdata.datatype ='MC'

            else:
                if job.inputdata._name == 'ATLASLocalDataset':
                    input_files = ATLASLocalDataset.get_filenames(app)

                elif job.inputdata._name == 'ATLASTier3Dataset':
                    if job.inputdata.names:
                        input_files = job.inputdata.names
                        input_guids = input_files
                    elif job.inputdata.pfnListFile:
                        logger.info('Loading file names from %s'%job.inputdata.pfnListFile.name)
                        pfnListFile = open(job.inputdata.pfnListFile.name)
                        job.inputdata.names = [ line.strip() for line in pfnListFile]
                        pfnListFile.close()
                        input_files = job.inputdata.names 
                        input_guids = input_files
                    else:
                        raise ApplicationConfigurationError('No inputdata has been specified.')

                elif job.inputdata._name in [ 'DQ2Dataset', 'EventPicking']:
                    if not job.inputdata.type in ['DQ2_LOCAL', 'LFC', 'TAG', 'TNT_LOCAL', 'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER', 'TAG_LOCAL', 'TAG_COPY' ]:
                        job.inputdata.type ='DQ2_LOCAL'
                    if not job.inputdata.datatype in ['DATA', 'MC', 'MuonCalibStream']:
                        job.inputdata.datatype ='MC'

                    input_guids, input_files = _splitlist(job.inputdata.get_contents())

                    if job.inputdata.use_aodesd_backnav:
                        input_esd_guids, input_esd_files = _splitlist(job.inputdata.get_contents(backnav=True))

                    job.inputdata.names = input_files          
                    job.inputdata.guids = input_guids          

#       prepare outputdata
       
        output_location = ''
        if job.outputdata:

            if job.outputdata._name=='DQ2OutputDataset':

                if job.outputdata.location:
                    if type(job.outputdata.location) == str and isDQ2SRMSite(job.outputdata.location):
                        output_location = job.outputdata.location
                    else:
                        logger.warning('Unknown output location %s.',job.outputdata.location)

                    #if job.backend.requirements._name == 'AtlasLCGRequirements':
                    #    if job.backend.requirements.cloud:
                    #        if whichCloud(output_location) != job.backend.requirements.cloud:
                    #            printout = 'Job submission failed ! j.outputdata.location=%s is not in the same cloud as j.backend.requirements.cloud=%s' %(job.outputdata.location, job.backend.requirements.cloud )
                    #            raise ApplicationConfigurationError(None, printout)
                    #    if job.backend.requirements.sites:
                    #        if whichCloud(output_location) != whichCloud(job.backend.requirements.sites[0]):
                    #            printout = 'Job submission failed ! j.outputdata.location=%s is not in the same cloud as j.backend.requirements.sites=%s'%(job.outputdata.location, job.backend.requirements.sites)
                    #            raise ApplicationConfigurationError(None,printout )     
                    
                elif job._getRoot().subjobs and job._getRoot().outputdata.location:
                    if isDQ2SRMSite(job._getRoot().outputdata.location):
                        output_location = job._getRoot().outputdata.location
                    else:
                        logger.warning('Unknown output location %s.',job.getRoot().outputdata.location)
                        
                
                logger.debug('Output: %s,%s',output_location, job.outputdata.location)
            else:
                if job.outputdata.location:
                    output_location = job.outputdata.location
                else:
                    try:
                        output_location = config['LCGOutputLocation']
                    except ConfigError:
                        logger.warning('No default output location specified in the configuration.')
            if job.outputdata.location:
                job.outputdata.location = output_location 
                logger.debug('Output: %s,%s',output_location, job.outputdata.location)

        if job._getRoot().subjobs:
            jid = "%d.%d" % (job._getRoot().id, job.id)
        else:
            jid = "%d" % job.id

        if output_location and job.outputdata and job.outputdata._name!='DQ2OutputDataset':
            output_location = os.path.join(output_location, jid)
            if job.outputdata:
                # Remove trailing number if job is copied

                pat = re.compile(r'\/[\d\.]+\/[\d\.]+$')
                if re.findall(pat,output_location):
                    output_location = re.sub(pat, '', output_location)
                    output_location = os.path.join(output_location, jid)

                job.outputdata.location = output_location

        if job.outputdata and job.outputdata._name=='DQ2OutputDataset':

            # output dataset name from master_prepare
            output_datasetname = self.output_datasetname
            output_lfn = self.output_lfn

            output_jobid = jid
            # Set subjob datasetname
            job.outputdata.datasetname=output_datasetname
            # Set master job datasetname
            if job._getRoot().subjobs:
                job._getRoot().outputdata.datasetname=output_datasetname
            # Create output dataset -> moved to the worker node code !
            if not job.outputdata.dataset_exists(output_datasetname):
                if job._getRoot().subjobs:
                    if job.id==0:
                        #job.outputdata.create_dataset(output_datasetname)
                        pass
                else:
                    #job.outputdata.create_dataset(output_datasetname)
                    pass
                if output_location and configDQ2['USE_STAGEOUT_SUBSCRIPTION']:
                    job.outputdata.create_subscription(output_datasetname, output_location)    
                
            else:
                if (job._getRoot().subjobs and job.id==0) or not job._getRoot().subjobs:
                    logger.warning("Dataset %s already exists - appending new files to this dataset", output_datasetname)
                    output_location = job.outputdata.get_locations(datasetname=output_datasetname, quiet=True)
                    logger.debug('Output3: %s,%s',output_location, job.outputdata.location)
                    if output_location:
                        output_location = output_location[0] 
                        if job._getRoot().subjobs:
                            job._getRoot().outputdata.location=output_location
                            job.outputdata.location=output_location
                        else:
                            job.outputdata.location=output_location
                            
                    logger.debug('Output4: %s,%s',output_location, job.outputdata.location)

        if getConfig('LCG')['JobLogHandler'] == 'DQ2' and (not job.outputdata or (job.outputdata and job.outputdata._name != 'DQ2OutputDataset')):
            raise ApplicationConfigurationError('Staging of log files in DQ2 requested, but DQ2 output dataset not specified.')
        
#       prepare inputsandbox

        inputbox = [File(os.path.join(__directory__,'athena-utility.sh')) ]
        if input_guids:     _append_file_buffer(inputbox,'input_guids',input_guids)
        if input_files:     _append_file_buffer(inputbox,'input_files',input_files)
        if add_files:     _append_file_buffer(inputbox,'add_files',add_files)
        if input_tag_guids: _append_file_buffer(inputbox,'input_tag_guids',input_tag_guids)
        if input_tag_files: _append_file_buffer(inputbox,'input_tag_files',input_tag_files)
        if input_esd_guids: _append_file_buffer(inputbox,'input_esd_guids',input_esd_guids)
        if input_esd_files: _append_file_buffer(inputbox,'input_esd_files',input_esd_files)

        if job.inputdata and job.inputdata._name == 'DQ2Dataset':
            for tag_file in job.inputdata.tag_info:
                if job.inputdata.tag_info[tag_file]['path'] != '':
                    inputbox.append( File( os.path.join( job.inputdata.tag_info[tag_file]['path'], tag_file) ) )

        # check for output data given in prepare info
        if job.outputdata and job.application.atlas_exetype == "ATHENA":
            for of in job.application.atlas_run_config['output']['alloutputs']:
                if not of in job.outputdata.outputdata:
                    job.outputdata.outputdata.append(of)

                                        
        if job.outputdata and job.outputdata.outputdata:
            _append_file_buffer(inputbox,'output_files',job.outputdata.outputdata)
        elif job.outputdata and not job.outputdata.outputdata:
            raise ApplicationConfigurationError('j.outputdata.outputdata is empty - Please specify output filename(s).')

        exe = os.path.join(__directory__,'run-athena-lcg.sh')
        outputbox = jobmasterconfig.outputbox
        requirements = jobmasterconfig.requirements.__copy__()
        environment  = jobmasterconfig.env.copy()
        # If ArgSplitter is used
        try:
            if job.application.args:
                environment['ATHENA_OPTIONS'] = environment['ATHENA_OPTIONS'] + ' ' + ' '.join(job.application.args)
                if job.application.options:
                    job.application.options = job.application.options + ' ' + job.application.args
                else:
                    job.application.options = job.application.args
        except AttributeError:
            pass
        
        if output_location and output_location.find('/castor/cern.ch/grid/atlas/t0')>=0:
            raise ApplicationConfigurationError('You are try to save the output to TIER0DISK - please use another area !')
        if not output_location:
            output_location = ''
        if configDQ2['USE_STAGEOUT_SUBSCRIPTION']:
            output_location = ''
        environment['OUTPUT_LOCATION'] = output_location
        environment['ATLASOutputDatasetLFC'] = config['ATLASOutputDatasetLFC']
        if job.outputdata and job.outputdata._name == 'DQ2OutputDataset':
            environment['OUTPUT_DATASETNAME'] = output_datasetname
            environment['OUTPUT_LFN'] = output_lfn
            environment['OUTPUT_JOBID'] = output_jobid
            environment['DQ2_URL_SERVER']= configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']
            environment['DQ2_OUTPUTFILE_NAMELENGTH'] = configDQ2['OUTPUTFILE_NAMELENGTH']
            if job.outputdata.use_shortfilename:
                environment['GANGA_SHORTFILENAME'] = '1'
            else:
                environment['GANGA_SHORTFILENAME'] = ''
                
            environment['DQ2_OUTPUT_SPACE_TOKENS']= ':'.join(configDQ2['DQ2_OUTPUT_SPACE_TOKENS'])
            environment['DQ2_BACKUP_OUTPUT_LOCATIONS']= ':'.join(configDQ2['DQ2_BACKUP_OUTPUT_LOCATIONS'])
            
        # CN: extra condition for TNTSplitter
        if job._getRoot().splitter and job._getRoot().splitter._name == 'TNTJobSplitter':
            # set up dq2 environment
            datasetname = job.inputdata.dataset
            environment['DATASETNAME']= ':'.join(datasetname)
            environment['DATASETLOCATION'] = ':'.join(job.inputdata.get_locations())
            environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER']
            environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL']
            environment['DATASETTYPE'] = job.inputdata.type
            environment['DATASETDATATYPE'] = job.inputdata.datatype
            if job.inputdata.accessprotocol:
                 environment['DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol
            if job.inputsandbox: inputbox += job.inputsandbox   

        if job.inputdata and job.inputdata._name == 'DQ2Dataset' and job.inputdata.tag_info:
            if job.inputdata.tag_info[job.inputdata.tag_info.keys()[0] ]['dataset'] != '' and job.inputdata.tag_info[tag_file]['path'] == '':
                environment['TAG_TYPE'] = 'DQ2'
            else:
                environment['TAG_TYPE'] = 'LOCAL'                

        # Fix DATASETNAME env variable for DQ2_COPY mode
        if job.inputdata and ( job.inputdata._name in [ 'DQ2Dataset', 'EventPicking']) and (job.inputdata.type in ['DQ2_LOCAL', 'DQ2_COPY', 'FILE_STAGER', 'TAG_LOCAL', 'TAG_COPY' ]):
            if job.inputdata.dataset:
                from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import resolve_container
                datasets = resolve_container(job.inputdata.dataset)
                environment['DATASETNAME'] = datasets[0]
                try:
                    environment['DATASETLOCATION'] = ':'.join(job.inputdata.get_locations(overlap=False)[ datasets[0] ])
                except:
                    printout = 'Job submission failed ! Dataset %s could not be found in DQ2 ! Maybe retry ?' %(datasets[0])
                    raise ApplicationConfigurationError(printout )

        if job.inputdata and job.inputdata._name == 'ATLASTier3Dataset':
            environment['DATASETTYPE'] = 'TIER3'

        # Work around for glite WMS spaced environement variable problem
        inputbox.append(FileBuffer('athena_options',environment['ATHENA_OPTIONS']+'\n'))

        # Write trf parameters
        trf_params = ' '
        for key, value in job.application.trf_parameter.iteritems():
            if key == 'dbrelease':
                environment['DBDATASETNAME'] = value.split(':')[0]
                environment['DBFILENAME'] = value.split(':')[1]
            else:
                trf_params = trf_params + key + '=' + str(value) + ' '
        if trf_params!=' ' and job.application.atlas_exetype=='TRF':
           _append_file_buffer(inputbox,'trf_params', [ trf_params ] ) 

        # set RecExCommon options
        environment['RECEXTYPE'] = job.application.recex_type

        # event based splitting:  set max_events and skip_events
        if job._getRoot().splitter and hasattr(job._getRoot().splitter, 'numevtsperjob') and job._getRoot().splitter.numevtsperjob > 0 :
                environment['ATHENA_MAX_EVENTS'] = str(job.application.max_events)
                environment['ATHENA_SKIP_EVENTS'] = str(job.application.skip_events)

        # pick event 
        if job._getRoot().splitter and job._getRoot().inputdata and job._getRoot().inputdata._name == 'EventPicking' :
                #Replace blank space 
                environment['ATHENA_RUN_EVENTS'] = str(job.application.run_event).replace(' ', '')
                environment['ATHENA_FILTER_POLICY'] = str(job.inputdata.pick_filter_policy)
        
# append a property for monitoring to the jobconfig of subjobs
        lcg_config = LCGJobConfig(File(exe), inputbox, [], outputbox, environment, [], requirements)
        lcg_config.monitoring_svc = mc['Athena']
        return lcg_config
Ejemplo n.º 5
0
    def prepare(self, app, appconfig, appmasterconfig, jobmasterconfig):
        """Prepare the job"""

        inputbox = []

        #       prepare environment
        environment = {}
        environment = jobmasterconfig.env.copy()
        environment["INPUTDATASETS"] = ""
        environment["INPUTFILES"] = ""
        environment["INPUTTURLS"] = ""

        alllfns = app.inputfiles + app.cavernfiles + app.mbfiles + app.dbfiles
        guids = app.turls
        guids.update(app.cavern_turls)
        guids.update(app.minbias_turls)
        guids.update(app.dbturls)

        infilenr = 0
        for infile in alllfns:
            environment["INPUTFILES"] += "lfn[%d]='%s';" % (infilenr, infile)
            environment["INPUTDATASETS"] += "dset[%d]='%s';" % (
                infilenr, app.dsetmap[infile])
            ##            insites=app.sitemap[infile]
            ##            # compare with environment["OUTSITE"] and reorder if needed.
            ##            newinsites=self.sortSites(insites,environment["OUTSITE"])
            ##            environment["INPUTSITES"]+="site[%d]='%s';"%(infilenr,newinsites)
            environment["INPUTTURLS"] += "turl[%d]='%s';" % (infilenr,
                                                             guids[infile])

            infilenr += 1

        logger.debug(
            "%s %s %s" %
            (str(environment["INPUTDATASETS"]), str(
                environment["INPUTTURLS"]), str(environment["INPUTFILES"])))

        if environment["INPUTDATASETS"]:
            # Work around for glite WMS spaced environement variable problem
            inputbox += [
                FileBuffer('inputdsets.conf',
                           environment['INPUTDATASETS'] + '\n')
            ]
        if environment["INPUTTURLS"]:
            # Work around for glite WMS spaced environement variable problem
            inputbox += [
                FileBuffer('inputturls.conf', environment['INPUTTURLS'] + '\n')
            ]
        if environment["INPUTFILES"]:
            # Work around for glite WMS spaced environement variable problem
            inputbox += [
                FileBuffer('inputfiles.conf', environment['INPUTFILES'] + '\n')
            ]


# now doing output files....
        job = app._getParent()  # Returns job or subjob object

        outfilelist = ""
        for type in app.outputpaths.keys():
            if type == "LOG" and "LOG" not in job.outputdata.outrootfiles:
                # logfiles are no longer saved in DQ2 datasets unless they are explicitly named in the outrootfiles dictionnary
                continue
            outfilelist += app.outputpaths[type] + app.subjobsOutfiles[
                job.id][type] + " "

        environment["OUTPUTFILES"] = outfilelist
        # Work around for glite WMS spaced environement variable problem
        inputbox += [
            FileBuffer('outputfiles.conf', environment['OUTPUTFILES'] + '\n')
        ]

        # setting up job wrapper arguments.
        args = app.args
        trfargs = ' '.join(app.args[4:])
        inputbox += [FileBuffer('trfargs.conf', trfargs + '\n')]
        jid = ""
        if job._getRoot().subjobs:
            jid = job._getRoot().id
        else:
            jid = "%d" % job.id
        environment["OUTPUT_JOBID"] = str(jid)  # used for versionning
        if app.dryrun:
            environment["DRYRUN"] = "TRUE"
        if app.dbrelease:
            environment["ATLASDBREL"] = app.dbrelease
        inputdata = []

        filename = "wrapper.sh"
        exe = os.path.join(os.path.dirname(__file__), filename)

        #       output sandbox
        outputbox = jobmasterconfig.outputbox

        if job.backend._name == "LCG" or job.backend._name == "Cronus" or job.backend._name == "Condor" or job.backend._name == "NG" or job.backend._name == "SGE":
            logger.debug("submission to %s" % job.backend._name)
            #       prepare job requirements
            requirements = jobmasterconfig.requirements

            if "INPUTTURLS" in environment:
                logger.debug(environment["INPUTTURLS"])
                if string.find(environment["INPUTTURLS"], "file:") >= 0:
                    raise ApplicationConfigurationError(
                        None,
                        "Input file was found to be local, and LCG backend does not support replication of local files to the GRID yet. Please register your input dataset in DQ2 before resubmitting this job. Aborting"
                    )
            if string.lower(app.se_name) == "local":
                raise ApplicationConfigurationError(
                    None,
                    "Output file cannot be committed to local filesystem on a grid job. Please change se_name"
                )

            lcg_job_config = LCGJobConfig(File(exe), inputbox, args, outputbox,
                                          environment, inputdata, requirements)
            lcg_job_config.monitoring_svc = mc['AthenaMC/LCG']
            return lcg_job_config
        else:
            logger.debug(
                "Backend %s not fully supported , will try our best anyway..."
                % job.backend._name)
            # if there are input data files and they are on the grid, prestage them on local area (use either app.datasets.input_dataset or /tmp/$login/data (and update environment["INPUTFILE"] accordingly inf the later is used...)
            # later development....

            return StandardJobConfig(File(exe), inputbox, args, outputbox,
                                     environment)
Ejemplo n.º 6
0
    def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig):
        """Prepare the job"""

        inputbox=[ ]

        #       prepare environment
        environment={}
        environment=jobmasterconfig.env.copy()
        environment["INPUTDATASETS"]=""
        environment["INPUTFILES"]=""
        environment["INPUTTURLS"]=""

        alllfns=app.inputfiles+app.cavernfiles+app.mbfiles+app.dbfiles
        guids=app.turls
        guids.update(app.cavern_turls)
        guids.update(app.minbias_turls)
        guids.update(app.dbturls)
        
        infilenr=0
        for infile in alllfns:
            environment["INPUTFILES"]+="lfn[%d]='%s';" %(infilenr,infile)
            environment["INPUTDATASETS"]+="dset[%d]='%s';"%(infilenr,app.dsetmap[infile])
##            insites=app.sitemap[infile]
##            # compare with environment["OUTSITE"] and reorder if needed.
##            newinsites=self.sortSites(insites,environment["OUTSITE"])
##            environment["INPUTSITES"]+="site[%d]='%s';"%(infilenr,newinsites)
            environment["INPUTTURLS"]+="turl[%d]='%s';"%(infilenr,guids[infile])
            
            infilenr += 1


        logger.debug("%s %s %s" % (str(environment["INPUTDATASETS"]),str(environment["INPUTTURLS"]),str(environment["INPUTFILES"])))
        
        if environment["INPUTDATASETS"] :
            # Work around for glite WMS spaced environement variable problem
            inputbox += [ FileBuffer('inputdsets.conf',environment['INPUTDATASETS']+'\n') ]
        if environment["INPUTTURLS"] :
            # Work around for glite WMS spaced environement variable problem
            inputbox += [ FileBuffer('inputturls.conf',environment['INPUTTURLS']+'\n') ]
        if environment["INPUTFILES"] :
            # Work around for glite WMS spaced environement variable problem
            inputbox += [ FileBuffer('inputfiles.conf',environment['INPUTFILES']+'\n') ]

# now doing output files....
        job = app._getParent() # Returns job or subjob object

        outfilelist=""
        for type in app.outputpaths.keys():
            if type=="LOG" and "LOG" not in job.outputdata.outrootfiles:
                # logfiles are no longer saved in DQ2 datasets unless they are explicitly named in the outrootfiles dictionnary
                continue
            outfilelist+=app.outputpaths[type]+app.subjobsOutfiles[job.id][type]+" "

        environment["OUTPUTFILES"]=outfilelist
        # Work around for glite WMS spaced environement variable problem
        inputbox += [ FileBuffer('outputfiles.conf',environment['OUTPUTFILES']+'\n') ]        

 # setting up job wrapper arguments.       
        args=app.args
        trfargs=' '.join(app.args[4:])
        inputbox += [ FileBuffer('trfargs.conf',trfargs+'\n') ]
        jid=""
        if job._getRoot().subjobs:
            jid = job._getRoot().id
        else:
            jid = "%d" % job.id
        environment["OUTPUT_JOBID"]=str(jid) # used for versionning
        if app.dryrun:
            environment["DRYRUN"] = "TRUE"
        if app.dbrelease:
            environment["ATLASDBREL"]=app.dbrelease
        inputdata = []

        filename="wrapper.sh"
        exe = os.path.join(os.path.dirname(__file__),filename)

#       output sandbox
        outputbox =jobmasterconfig.outputbox

        if job.backend._name=="LCG" or job.backend._name=="Cronus" or job.backend._name=="Condor" or job.backend._name=="NG" or job.backend._name=="SGE":
            logger.debug("submission to %s" % job.backend._name)
            #       prepare job requirements
            requirements = jobmasterconfig.requirements
 
            if "INPUTTURLS" in environment:
                logger.debug(environment["INPUTTURLS"])
                if string.find(environment["INPUTTURLS"],"file:")>=0:
                    raise ApplicationConfigurationError(None,"Input file was found to be local, and LCG backend does not support replication of local files to the GRID yet. Please register your input dataset in DQ2 before resubmitting this job. Aborting")
            if string.lower(app.se_name)=="local":
                raise ApplicationConfigurationError(None,"Output file cannot be committed to local filesystem on a grid job. Please change se_name")

            lcg_job_config = LCGJobConfig(File(exe),inputbox,args,outputbox,environment,inputdata,requirements) 
            lcg_job_config.monitoring_svc = mc['AthenaMC/LCG']
            return lcg_job_config
        else:
            logger.debug("Backend %s not fully supported , will try our best anyway..." % job.backend._name)
            # if there are input data files and they are on the grid, prestage them on local area (use either app.datasets.input_dataset or /tmp/$login/data (and update environment["INPUTFILE"] accordingly inf the later is used...)
            # later development....

            return StandardJobConfig(File(exe),inputbox,args,outputbox,environment)