def prepare(self, app, appconfig, appmasterconfig, jobmasterconfig): from Ganga.Lib.LCG import LCGJobConfig c = LCGJobConfig(app.exe, app.inputs, app.args, app.outputs, app.envs) c.monitoring_svc = mc['GangaTutorial'] return c
def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): from Ganga.Lib.LCG import LCGJobConfig c = LCGJobConfig(app.exe,app.inputs,app.args,app.outputs,app.envs) c.monitoring_svc = mc['GangaTutorial'] return c
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): """prepare the subjob specific configuration""" job = app._getParent() # Returns job or subjob object logger.debug("AthenaLCGRTHandler prepare called, %s", job.id) # prepare inputdata input_files = [] input_guids = [] input_tag_files = [] input_tag_guids = [] input_esd_files = [] input_esd_guids = [] add_files = [] if job.inputdata: # DQ2Dataset, ATLASLocalDataset and ATLASCastorDataset job splitting is done in AthenaSplitterJob if job._getRoot().subjobs: if job.inputdata._name == 'ATLASLocalDataset' or job.inputdata._name == 'ATLASCastorDataset': if not job.inputdata.names: raise ApplicationConfigurationError( None, 'No inputdata has been specified.') input_files = job.inputdata.names elif job.inputdata._name == 'ATLASDataset': if not job.inputdata.lfn: raise ApplicationConfigurationError( None, 'No inputdata has been specified.') input_files = job.inputdata.lfn elif job.inputdata._name == 'ATLASTier3Dataset': if not job.inputdata.names: raise ApplicationConfigurationError( None, 'No inputdata has been specified.') if job.inputdata.names: input_files = job.inputdata.names input_guids = input_files elif job.inputdata._name in [ 'DQ2Dataset', 'AMIDataset', 'EventPicking' ]: if not job.inputdata.names: raise ApplicationConfigurationError( None, 'No inputdata has been specified. Failure in job %s.%s. Dataset %s' % (job._getRoot().id, job.id, job.inputdata.dataset)) input_guids = job.inputdata.guids input_files = job.inputdata.names if job.inputdata.tag_info: # check for conflicts with TAG_LOCAL or TAG_COPY if job.inputdata.type in ['TAG_LOCAL', 'TAG_COPY']: raise ApplicationConfigurationError( None, "Cannot provide both tag_info and run as '%s'. Please use one or the other!" % job.inputdata.type) # check if FILE_STAGER is used if job.inputdata.type == 'FILE_STAGER': logger.warning( "TAG jobs currently can't use the FILE_STAGER. Switching to DQ2_COPY instead." ) job.inputdata.type = 'DQ2_COPY' # add additional file info for tags for tag_file in job.inputdata.tag_info: for ref in job.inputdata.tag_info[tag_file][ 'refs']: add_files.append(ref[1] + ':' + ref[0] + ':' + ref[2]) if not job.inputdata.type in [ 'DQ2_LOCAL', 'LFC', 'TAG', 'TNT_LOCAL', 'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER', 'TAG_LOCAL', 'TAG_COPY' ]: job.inputdata.type = 'DQ2_LOCAL' if not job.inputdata.datatype in [ 'DATA', 'MC', 'MuonCalibStream' ]: job.inputdata.datatype = 'MC' else: if job.inputdata._name == 'ATLASCastorDataset': input_files = ATLASCastorDataset.get_filenames(app) elif job.inputdata._name == 'ATLASLocalDataset': input_files = ATLASLocalDataset.get_filenames(app) elif job.inputdata._name == 'ATLASDataset': input_files = ATLASDataset.get_filenames(app) elif job.inputdata._name == 'ATLASTier3Dataset': if job.inputdata.names: input_files = job.inputdata.names input_guids = input_files elif job.inputdata.pfnListFile: logger.info('Loading file names from %s' % job.inputdata.pfnListFile.name) pfnListFile = open(job.inputdata.pfnListFile.name) job.inputdata.names = [ line.strip() for line in pfnListFile ] pfnListFile.close() input_files = job.inputdata.names input_guids = input_files else: raise ApplicationConfigurationError( None, 'No inputdata has been specified.') elif job.inputdata._name in [ 'DQ2Dataset', 'AMIDataset', 'EventPicking' ]: if not job.inputdata.type in [ 'DQ2_LOCAL', 'LFC', 'TAG', 'TNT_LOCAL', 'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER', 'TAG_LOCAL', 'TAG_COPY' ]: job.inputdata.type = 'DQ2_LOCAL' if not job.inputdata.datatype in [ 'DATA', 'MC', 'MuonCalibStream' ]: job.inputdata.datatype = 'MC' input_guids, input_files = _splitlist( job.inputdata.get_contents()) if job.inputdata.tagdataset: input_tag_guids, input_tag_files = _splitlist( job.inputdata.get_tag_contents()) if job.inputdata.use_aodesd_backnav: input_esd_guids, input_esd_files = _splitlist( job.inputdata.get_contents(backnav=True)) job.inputdata.names = input_files job.inputdata.guids = input_guids # prepare outputdata output_location = '' if job.outputdata: if job.outputdata._name == 'DQ2OutputDataset': if job.outputdata.location: if type(job.outputdata.location) == str and isDQ2SRMSite( job.outputdata.location): output_location = job.outputdata.location else: logger.warning('Unknown output location %s.', job.outputdata.location) #if job.backend.requirements._name == 'AtlasLCGRequirements': # if job.backend.requirements.cloud: # if whichCloud(output_location) != job.backend.requirements.cloud: # printout = 'Job submission failed ! j.outputdata.location=%s is not in the same cloud as j.backend.requirements.cloud=%s' %(job.outputdata.location, job.backend.requirements.cloud ) # raise ApplicationConfigurationError(None, printout) # if job.backend.requirements.sites: # if whichCloud(output_location) != whichCloud(job.backend.requirements.sites[0]): # printout = 'Job submission failed ! j.outputdata.location=%s is not in the same cloud as j.backend.requirements.sites=%s'%(job.outputdata.location, job.backend.requirements.sites) # raise ApplicationConfigurationError(None,printout ) elif job._getRoot().subjobs and job._getRoot( ).outputdata.location: if isDQ2SRMSite(job._getRoot().outputdata.location): output_location = job._getRoot().outputdata.location else: logger.warning('Unknown output location %s.', job.getRoot().outputdata.location) logger.debug('Output: %s,%s', output_location, job.outputdata.location) else: if job.outputdata.location: output_location = job.outputdata.location else: try: output_location = config['LCGOutputLocation'] except ConfigError: logger.warning( 'No default output location specified in the configuration.' ) if job.outputdata.location: job.outputdata.location = output_location logger.debug('Output: %s,%s', output_location, job.outputdata.location) if job._getRoot().subjobs: jid = "%d.%d" % (job._getRoot().id, job.id) else: jid = "%d" % job.id if output_location and job.outputdata and job.outputdata._name != 'DQ2OutputDataset': output_location = os.path.join(output_location, jid) if job.outputdata: # Remove trailing number if job is copied pat = re.compile(r'\/[\d\.]+\/[\d\.]+$') if re.findall(pat, output_location): output_location = re.sub(pat, '', output_location) output_location = os.path.join(output_location, jid) job.outputdata.location = output_location if job.outputdata and job.outputdata._name == 'DQ2OutputDataset': # output dataset name from master_prepare output_datasetname = self.output_datasetname output_lfn = self.output_lfn output_jobid = jid # Set subjob datasetname job.outputdata.datasetname = output_datasetname # Set master job datasetname if job._getRoot().subjobs: job._getRoot().outputdata.datasetname = output_datasetname # Create output dataset -> moved to the worker node code ! if not job.outputdata.dataset_exists(output_datasetname): if job._getRoot().subjobs: if job.id == 0: #job.outputdata.create_dataset(output_datasetname) pass else: #job.outputdata.create_dataset(output_datasetname) pass if output_location and configDQ2['USE_STAGEOUT_SUBSCRIPTION']: job.outputdata.create_subscription(output_datasetname, output_location) else: if (job._getRoot().subjobs and job.id == 0) or not job._getRoot().subjobs: logger.warning( "Dataset %s already exists - appending new files to this dataset", output_datasetname) output_location = job.outputdata.get_locations( datasetname=output_datasetname, quiet=True) logger.debug('Output3: %s,%s', output_location, job.outputdata.location) if output_location: output_location = output_location[0] if job._getRoot().subjobs: job._getRoot( ).outputdata.location = output_location job.outputdata.location = output_location else: job.outputdata.location = output_location logger.debug('Output4: %s,%s', output_location, job.outputdata.location) if getConfig('LCG')['JobLogHandler'] == 'DQ2' and ( not job.outputdata or (job.outputdata and job.outputdata._name != 'DQ2OutputDataset')): raise ApplicationConfigurationError( None, 'Staging of log files in DQ2 requested, but DQ2 output dataset not specified.' ) # prepare inputsandbox inputbox = [File(os.path.join(__directory__, 'athena-utility.sh'))] if input_guids: _append_file_buffer(inputbox, 'input_guids', input_guids) if input_files: _append_file_buffer(inputbox, 'input_files', input_files) if add_files: _append_file_buffer(inputbox, 'add_files', add_files) if input_tag_guids: _append_file_buffer(inputbox, 'input_tag_guids', input_tag_guids) if input_tag_files: _append_file_buffer(inputbox, 'input_tag_files', input_tag_files) if input_esd_guids: _append_file_buffer(inputbox, 'input_esd_guids', input_esd_guids) if input_esd_files: _append_file_buffer(inputbox, 'input_esd_files', input_esd_files) if job.inputdata and job.inputdata._name == 'DQ2Dataset': for tag_file in job.inputdata.tag_info: if job.inputdata.tag_info[tag_file]['path'] != '': inputbox.append( File( os.path.join( job.inputdata.tag_info[tag_file]['path'], tag_file))) # check for output data given in prepare info if job.outputdata and job.application.atlas_exetype == "ATHENA": for of in job.application.atlas_run_config['output']['alloutputs']: if not of in job.outputdata.outputdata: job.outputdata.outputdata.append(of) if job.outputdata and job.outputdata.outputdata: _append_file_buffer(inputbox, 'output_files', job.outputdata.outputdata) elif job.outputdata and not job.outputdata.outputdata: raise ApplicationConfigurationError( None, 'j.outputdata.outputdata is empty - Please specify output filename(s).' ) exe = os.path.join(__directory__, 'run-athena-lcg.sh') outputbox = jobmasterconfig.outputbox requirements = jobmasterconfig.requirements.__copy__() environment = jobmasterconfig.env.copy() # If ArgSplitter is used try: if job.application.args: environment['ATHENA_OPTIONS'] = environment[ 'ATHENA_OPTIONS'] + ' ' + ' '.join(job.application.args) if job.application.options: job.application.options = job.application.options + ' ' + job.application.args else: job.application.options = job.application.args except AttributeError: pass if output_location and output_location.find( '/castor/cern.ch/grid/atlas/t0') >= 0: raise ApplicationConfigurationError( None, 'You are try to save the output to TIER0DISK - please use another area !' ) if not output_location: output_location = '' if configDQ2['USE_STAGEOUT_SUBSCRIPTION']: output_location = '' environment['OUTPUT_LOCATION'] = output_location environment['ATLASOutputDatasetLFC'] = config['ATLASOutputDatasetLFC'] if job.outputdata and job.outputdata._name == 'DQ2OutputDataset': environment['OUTPUT_DATASETNAME'] = output_datasetname environment['OUTPUT_LFN'] = output_lfn environment['OUTPUT_JOBID'] = output_jobid environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER'] environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL'] environment['DQ2_OUTPUTFILE_NAMELENGTH'] = configDQ2[ 'OUTPUTFILE_NAMELENGTH'] if job.outputdata.use_shortfilename: environment['GANGA_SHORTFILENAME'] = '1' else: environment['GANGA_SHORTFILENAME'] = '' environment['DQ2_OUTPUT_SPACE_TOKENS'] = ':'.join( configDQ2['DQ2_OUTPUT_SPACE_TOKENS']) environment['DQ2_BACKUP_OUTPUT_LOCATIONS'] = ':'.join( configDQ2['DQ2_BACKUP_OUTPUT_LOCATIONS']) # CN: extra condition for TNTSplitter if job._getRoot().splitter and job._getRoot( ).splitter._name == 'TNTJobSplitter': # set up dq2 environment datasetname = job.inputdata.dataset environment['DATASETNAME'] = ':'.join(datasetname) environment['DATASETLOCATION'] = ':'.join( job.inputdata.get_locations()) environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER'] environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL'] environment['DATASETTYPE'] = job.inputdata.type environment['DATASETDATATYPE'] = job.inputdata.datatype if job.inputdata.accessprotocol: environment[ 'DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol if job.inputsandbox: inputbox += job.inputsandbox if job.inputdata and job.inputdata._name == 'DQ2Dataset' and job.inputdata.tag_info: if job.inputdata.tag_info[job.inputdata.tag_info.keys( )[0]]['dataset'] != '' and job.inputdata.tag_info[tag_file][ 'path'] == '': environment['TAG_TYPE'] = 'DQ2' else: environment['TAG_TYPE'] = 'LOCAL' # Fix DATASETNAME env variable for DQ2_COPY mode if job.inputdata and (job.inputdata._name in [ 'DQ2Dataset', 'AMIDataset', 'EventPicking' ]) and (job.inputdata.type in [ 'DQ2_LOCAL', 'DQ2_COPY', 'FILE_STAGER', 'TAG_LOCAL', 'TAG_COPY' ]): if job.inputdata.dataset: from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import resolve_container datasets = resolve_container(job.inputdata.dataset) environment['DATASETNAME'] = datasets[0] try: environment['DATASETLOCATION'] = ':'.join( job.inputdata.get_locations( overlap=False)[datasets[0]]) except: printout = 'Job submission failed ! Dataset %s could not be found in DQ2 ! Maybe retry ?' % ( datasets[0]) raise ApplicationConfigurationError(None, printout) if job.inputdata and job.inputdata._name == 'ATLASTier3Dataset': environment['DATASETTYPE'] = 'TIER3' # Work around for glite WMS spaced environement variable problem inputbox.append( FileBuffer('athena_options', environment['ATHENA_OPTIONS'] + '\n')) # Write trf parameters trf_params = ' ' for key, value in job.application.trf_parameter.iteritems(): if key == 'dbrelease': environment['DBDATASETNAME'] = value.split(':')[0] environment['DBFILENAME'] = value.split(':')[1] else: trf_params = trf_params + key + '=' + str(value) + ' ' if trf_params != ' ' and job.application.atlas_exetype == 'TRF': _append_file_buffer(inputbox, 'trf_params', [trf_params]) # set RecExCommon options environment['RECEXTYPE'] = job.application.recex_type # event based splitting: set max_events and skip_events if job._getRoot().splitter and hasattr( job._getRoot().splitter, 'numevtsperjob') and job._getRoot().splitter.numevtsperjob > 0: environment['ATHENA_MAX_EVENTS'] = str(job.application.max_events) environment['ATHENA_SKIP_EVENTS'] = str( job.application.skip_events) # pick event if job._getRoot().splitter and job._getRoot( ).inputdata and job._getRoot().inputdata._name == 'EventPicking': #Replace blank space environment['ATHENA_RUN_EVENTS'] = str( job.application.run_event).replace(' ', '') environment['ATHENA_FILTER_POLICY'] = str( job.inputdata.pick_filter_policy) # append a property for monitoring to the jobconfig of subjobs lcg_config = LCGJobConfig(File(exe), inputbox, [], outputbox, environment, [], requirements) lcg_config.monitoring_svc = mc['Athena'] return lcg_config
def prepare(self,app,appsubconfig,appmasterconfig,jobmasterconfig): """prepare the subjob specific configuration""" job = app._getParent() # Returns job or subjob object logger.debug("AthenaLCGRTHandler prepare called, %s", job.id) # prepare inputdata input_files = [] input_guids = [] input_tag_files = [] input_tag_guids = [] input_esd_files = [] input_esd_guids = [] add_files = [] if job.inputdata: # DQ2Dataset, ATLASLocalDataset and ATLASCastorDataset job splitting is done in AthenaSplitterJob if job._getRoot().subjobs: if job.inputdata._name == 'ATLASLocalDataset': if not job.inputdata.names: raise ApplicationConfigurationError('No inputdata has been specified.') input_files = job.inputdata.names elif job.inputdata._name == 'ATLASTier3Dataset': if not job.inputdata.names: raise ApplicationConfigurationError('No inputdata has been specified.') if job.inputdata.names: input_files = job.inputdata.names input_guids = input_files elif job.inputdata._name in [ 'DQ2Dataset', 'EventPicking']: if not job.inputdata.names: raise ApplicationConfigurationError('No inputdata has been specified. Failure in job %s.%s. Dataset %s' %(job._getRoot().id, job.id, job.inputdata.dataset) ) input_guids = job.inputdata.guids input_files = job.inputdata.names if job.inputdata.tag_info: # check for conflicts with TAG_LOCAL or TAG_COPY if job.inputdata.type in ['TAG_LOCAL', 'TAG_COPY']: raise ApplicationConfigurationError("Cannot provide both tag_info and run as '%s'. Please use one or the other!" % job.inputdata.type) # check if FILE_STAGER is used if job.inputdata.type == 'FILE_STAGER': logger.warning("TAG jobs currently can't use the FILE_STAGER. Switching to DQ2_COPY instead.") job.inputdata.type = 'DQ2_COPY' # add additional file info for tags for tag_file in job.inputdata.tag_info: for ref in job.inputdata.tag_info[tag_file]['refs']: add_files.append( ref[1] + ':' + ref[0] + ':' + ref[2] ) if not job.inputdata.type in ['DQ2_LOCAL', 'LFC', 'TAG', 'TNT_LOCAL', 'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER', 'TAG_LOCAL', 'TAG_COPY' ]: job.inputdata.type ='DQ2_LOCAL' if not job.inputdata.datatype in ['DATA', 'MC', 'MuonCalibStream']: job.inputdata.datatype ='MC' else: if job.inputdata._name == 'ATLASLocalDataset': input_files = ATLASLocalDataset.get_filenames(app) elif job.inputdata._name == 'ATLASTier3Dataset': if job.inputdata.names: input_files = job.inputdata.names input_guids = input_files elif job.inputdata.pfnListFile: logger.info('Loading file names from %s'%job.inputdata.pfnListFile.name) pfnListFile = open(job.inputdata.pfnListFile.name) job.inputdata.names = [ line.strip() for line in pfnListFile] pfnListFile.close() input_files = job.inputdata.names input_guids = input_files else: raise ApplicationConfigurationError('No inputdata has been specified.') elif job.inputdata._name in [ 'DQ2Dataset', 'EventPicking']: if not job.inputdata.type in ['DQ2_LOCAL', 'LFC', 'TAG', 'TNT_LOCAL', 'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER', 'TAG_LOCAL', 'TAG_COPY' ]: job.inputdata.type ='DQ2_LOCAL' if not job.inputdata.datatype in ['DATA', 'MC', 'MuonCalibStream']: job.inputdata.datatype ='MC' input_guids, input_files = _splitlist(job.inputdata.get_contents()) if job.inputdata.use_aodesd_backnav: input_esd_guids, input_esd_files = _splitlist(job.inputdata.get_contents(backnav=True)) job.inputdata.names = input_files job.inputdata.guids = input_guids # prepare outputdata output_location = '' if job.outputdata: if job.outputdata._name=='DQ2OutputDataset': if job.outputdata.location: if type(job.outputdata.location) == str and isDQ2SRMSite(job.outputdata.location): output_location = job.outputdata.location else: logger.warning('Unknown output location %s.',job.outputdata.location) #if job.backend.requirements._name == 'AtlasLCGRequirements': # if job.backend.requirements.cloud: # if whichCloud(output_location) != job.backend.requirements.cloud: # printout = 'Job submission failed ! j.outputdata.location=%s is not in the same cloud as j.backend.requirements.cloud=%s' %(job.outputdata.location, job.backend.requirements.cloud ) # raise ApplicationConfigurationError(None, printout) # if job.backend.requirements.sites: # if whichCloud(output_location) != whichCloud(job.backend.requirements.sites[0]): # printout = 'Job submission failed ! j.outputdata.location=%s is not in the same cloud as j.backend.requirements.sites=%s'%(job.outputdata.location, job.backend.requirements.sites) # raise ApplicationConfigurationError(None,printout ) elif job._getRoot().subjobs and job._getRoot().outputdata.location: if isDQ2SRMSite(job._getRoot().outputdata.location): output_location = job._getRoot().outputdata.location else: logger.warning('Unknown output location %s.',job.getRoot().outputdata.location) logger.debug('Output: %s,%s',output_location, job.outputdata.location) else: if job.outputdata.location: output_location = job.outputdata.location else: try: output_location = config['LCGOutputLocation'] except ConfigError: logger.warning('No default output location specified in the configuration.') if job.outputdata.location: job.outputdata.location = output_location logger.debug('Output: %s,%s',output_location, job.outputdata.location) if job._getRoot().subjobs: jid = "%d.%d" % (job._getRoot().id, job.id) else: jid = "%d" % job.id if output_location and job.outputdata and job.outputdata._name!='DQ2OutputDataset': output_location = os.path.join(output_location, jid) if job.outputdata: # Remove trailing number if job is copied pat = re.compile(r'\/[\d\.]+\/[\d\.]+$') if re.findall(pat,output_location): output_location = re.sub(pat, '', output_location) output_location = os.path.join(output_location, jid) job.outputdata.location = output_location if job.outputdata and job.outputdata._name=='DQ2OutputDataset': # output dataset name from master_prepare output_datasetname = self.output_datasetname output_lfn = self.output_lfn output_jobid = jid # Set subjob datasetname job.outputdata.datasetname=output_datasetname # Set master job datasetname if job._getRoot().subjobs: job._getRoot().outputdata.datasetname=output_datasetname # Create output dataset -> moved to the worker node code ! if not job.outputdata.dataset_exists(output_datasetname): if job._getRoot().subjobs: if job.id==0: #job.outputdata.create_dataset(output_datasetname) pass else: #job.outputdata.create_dataset(output_datasetname) pass if output_location and configDQ2['USE_STAGEOUT_SUBSCRIPTION']: job.outputdata.create_subscription(output_datasetname, output_location) else: if (job._getRoot().subjobs and job.id==0) or not job._getRoot().subjobs: logger.warning("Dataset %s already exists - appending new files to this dataset", output_datasetname) output_location = job.outputdata.get_locations(datasetname=output_datasetname, quiet=True) logger.debug('Output3: %s,%s',output_location, job.outputdata.location) if output_location: output_location = output_location[0] if job._getRoot().subjobs: job._getRoot().outputdata.location=output_location job.outputdata.location=output_location else: job.outputdata.location=output_location logger.debug('Output4: %s,%s',output_location, job.outputdata.location) if getConfig('LCG')['JobLogHandler'] == 'DQ2' and (not job.outputdata or (job.outputdata and job.outputdata._name != 'DQ2OutputDataset')): raise ApplicationConfigurationError('Staging of log files in DQ2 requested, but DQ2 output dataset not specified.') # prepare inputsandbox inputbox = [File(os.path.join(__directory__,'athena-utility.sh')) ] if input_guids: _append_file_buffer(inputbox,'input_guids',input_guids) if input_files: _append_file_buffer(inputbox,'input_files',input_files) if add_files: _append_file_buffer(inputbox,'add_files',add_files) if input_tag_guids: _append_file_buffer(inputbox,'input_tag_guids',input_tag_guids) if input_tag_files: _append_file_buffer(inputbox,'input_tag_files',input_tag_files) if input_esd_guids: _append_file_buffer(inputbox,'input_esd_guids',input_esd_guids) if input_esd_files: _append_file_buffer(inputbox,'input_esd_files',input_esd_files) if job.inputdata and job.inputdata._name == 'DQ2Dataset': for tag_file in job.inputdata.tag_info: if job.inputdata.tag_info[tag_file]['path'] != '': inputbox.append( File( os.path.join( job.inputdata.tag_info[tag_file]['path'], tag_file) ) ) # check for output data given in prepare info if job.outputdata and job.application.atlas_exetype == "ATHENA": for of in job.application.atlas_run_config['output']['alloutputs']: if not of in job.outputdata.outputdata: job.outputdata.outputdata.append(of) if job.outputdata and job.outputdata.outputdata: _append_file_buffer(inputbox,'output_files',job.outputdata.outputdata) elif job.outputdata and not job.outputdata.outputdata: raise ApplicationConfigurationError('j.outputdata.outputdata is empty - Please specify output filename(s).') exe = os.path.join(__directory__,'run-athena-lcg.sh') outputbox = jobmasterconfig.outputbox requirements = jobmasterconfig.requirements.__copy__() environment = jobmasterconfig.env.copy() # If ArgSplitter is used try: if job.application.args: environment['ATHENA_OPTIONS'] = environment['ATHENA_OPTIONS'] + ' ' + ' '.join(job.application.args) if job.application.options: job.application.options = job.application.options + ' ' + job.application.args else: job.application.options = job.application.args except AttributeError: pass if output_location and output_location.find('/castor/cern.ch/grid/atlas/t0')>=0: raise ApplicationConfigurationError('You are try to save the output to TIER0DISK - please use another area !') if not output_location: output_location = '' if configDQ2['USE_STAGEOUT_SUBSCRIPTION']: output_location = '' environment['OUTPUT_LOCATION'] = output_location environment['ATLASOutputDatasetLFC'] = config['ATLASOutputDatasetLFC'] if job.outputdata and job.outputdata._name == 'DQ2OutputDataset': environment['OUTPUT_DATASETNAME'] = output_datasetname environment['OUTPUT_LFN'] = output_lfn environment['OUTPUT_JOBID'] = output_jobid environment['DQ2_URL_SERVER']= configDQ2['DQ2_URL_SERVER'] environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL'] environment['DQ2_OUTPUTFILE_NAMELENGTH'] = configDQ2['OUTPUTFILE_NAMELENGTH'] if job.outputdata.use_shortfilename: environment['GANGA_SHORTFILENAME'] = '1' else: environment['GANGA_SHORTFILENAME'] = '' environment['DQ2_OUTPUT_SPACE_TOKENS']= ':'.join(configDQ2['DQ2_OUTPUT_SPACE_TOKENS']) environment['DQ2_BACKUP_OUTPUT_LOCATIONS']= ':'.join(configDQ2['DQ2_BACKUP_OUTPUT_LOCATIONS']) # CN: extra condition for TNTSplitter if job._getRoot().splitter and job._getRoot().splitter._name == 'TNTJobSplitter': # set up dq2 environment datasetname = job.inputdata.dataset environment['DATASETNAME']= ':'.join(datasetname) environment['DATASETLOCATION'] = ':'.join(job.inputdata.get_locations()) environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER'] environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL'] environment['DATASETTYPE'] = job.inputdata.type environment['DATASETDATATYPE'] = job.inputdata.datatype if job.inputdata.accessprotocol: environment['DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol if job.inputsandbox: inputbox += job.inputsandbox if job.inputdata and job.inputdata._name == 'DQ2Dataset' and job.inputdata.tag_info: if job.inputdata.tag_info[job.inputdata.tag_info.keys()[0] ]['dataset'] != '' and job.inputdata.tag_info[tag_file]['path'] == '': environment['TAG_TYPE'] = 'DQ2' else: environment['TAG_TYPE'] = 'LOCAL' # Fix DATASETNAME env variable for DQ2_COPY mode if job.inputdata and ( job.inputdata._name in [ 'DQ2Dataset', 'EventPicking']) and (job.inputdata.type in ['DQ2_LOCAL', 'DQ2_COPY', 'FILE_STAGER', 'TAG_LOCAL', 'TAG_COPY' ]): if job.inputdata.dataset: from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import resolve_container datasets = resolve_container(job.inputdata.dataset) environment['DATASETNAME'] = datasets[0] try: environment['DATASETLOCATION'] = ':'.join(job.inputdata.get_locations(overlap=False)[ datasets[0] ]) except: printout = 'Job submission failed ! Dataset %s could not be found in DQ2 ! Maybe retry ?' %(datasets[0]) raise ApplicationConfigurationError(printout ) if job.inputdata and job.inputdata._name == 'ATLASTier3Dataset': environment['DATASETTYPE'] = 'TIER3' # Work around for glite WMS spaced environement variable problem inputbox.append(FileBuffer('athena_options',environment['ATHENA_OPTIONS']+'\n')) # Write trf parameters trf_params = ' ' for key, value in job.application.trf_parameter.iteritems(): if key == 'dbrelease': environment['DBDATASETNAME'] = value.split(':')[0] environment['DBFILENAME'] = value.split(':')[1] else: trf_params = trf_params + key + '=' + str(value) + ' ' if trf_params!=' ' and job.application.atlas_exetype=='TRF': _append_file_buffer(inputbox,'trf_params', [ trf_params ] ) # set RecExCommon options environment['RECEXTYPE'] = job.application.recex_type # event based splitting: set max_events and skip_events if job._getRoot().splitter and hasattr(job._getRoot().splitter, 'numevtsperjob') and job._getRoot().splitter.numevtsperjob > 0 : environment['ATHENA_MAX_EVENTS'] = str(job.application.max_events) environment['ATHENA_SKIP_EVENTS'] = str(job.application.skip_events) # pick event if job._getRoot().splitter and job._getRoot().inputdata and job._getRoot().inputdata._name == 'EventPicking' : #Replace blank space environment['ATHENA_RUN_EVENTS'] = str(job.application.run_event).replace(' ', '') environment['ATHENA_FILTER_POLICY'] = str(job.inputdata.pick_filter_policy) # append a property for monitoring to the jobconfig of subjobs lcg_config = LCGJobConfig(File(exe), inputbox, [], outputbox, environment, [], requirements) lcg_config.monitoring_svc = mc['Athena'] return lcg_config
def prepare(self, app, appconfig, appmasterconfig, jobmasterconfig): """Prepare the job""" inputbox = [] # prepare environment environment = {} environment = jobmasterconfig.env.copy() environment["INPUTDATASETS"] = "" environment["INPUTFILES"] = "" environment["INPUTTURLS"] = "" alllfns = app.inputfiles + app.cavernfiles + app.mbfiles + app.dbfiles guids = app.turls guids.update(app.cavern_turls) guids.update(app.minbias_turls) guids.update(app.dbturls) infilenr = 0 for infile in alllfns: environment["INPUTFILES"] += "lfn[%d]='%s';" % (infilenr, infile) environment["INPUTDATASETS"] += "dset[%d]='%s';" % ( infilenr, app.dsetmap[infile]) ## insites=app.sitemap[infile] ## # compare with environment["OUTSITE"] and reorder if needed. ## newinsites=self.sortSites(insites,environment["OUTSITE"]) ## environment["INPUTSITES"]+="site[%d]='%s';"%(infilenr,newinsites) environment["INPUTTURLS"] += "turl[%d]='%s';" % (infilenr, guids[infile]) infilenr += 1 logger.debug( "%s %s %s" % (str(environment["INPUTDATASETS"]), str( environment["INPUTTURLS"]), str(environment["INPUTFILES"]))) if environment["INPUTDATASETS"]: # Work around for glite WMS spaced environement variable problem inputbox += [ FileBuffer('inputdsets.conf', environment['INPUTDATASETS'] + '\n') ] if environment["INPUTTURLS"]: # Work around for glite WMS spaced environement variable problem inputbox += [ FileBuffer('inputturls.conf', environment['INPUTTURLS'] + '\n') ] if environment["INPUTFILES"]: # Work around for glite WMS spaced environement variable problem inputbox += [ FileBuffer('inputfiles.conf', environment['INPUTFILES'] + '\n') ] # now doing output files.... job = app._getParent() # Returns job or subjob object outfilelist = "" for type in app.outputpaths.keys(): if type == "LOG" and "LOG" not in job.outputdata.outrootfiles: # logfiles are no longer saved in DQ2 datasets unless they are explicitly named in the outrootfiles dictionnary continue outfilelist += app.outputpaths[type] + app.subjobsOutfiles[ job.id][type] + " " environment["OUTPUTFILES"] = outfilelist # Work around for glite WMS spaced environement variable problem inputbox += [ FileBuffer('outputfiles.conf', environment['OUTPUTFILES'] + '\n') ] # setting up job wrapper arguments. args = app.args trfargs = ' '.join(app.args[4:]) inputbox += [FileBuffer('trfargs.conf', trfargs + '\n')] jid = "" if job._getRoot().subjobs: jid = job._getRoot().id else: jid = "%d" % job.id environment["OUTPUT_JOBID"] = str(jid) # used for versionning if app.dryrun: environment["DRYRUN"] = "TRUE" if app.dbrelease: environment["ATLASDBREL"] = app.dbrelease inputdata = [] filename = "wrapper.sh" exe = os.path.join(os.path.dirname(__file__), filename) # output sandbox outputbox = jobmasterconfig.outputbox if job.backend._name == "LCG" or job.backend._name == "Cronus" or job.backend._name == "Condor" or job.backend._name == "NG" or job.backend._name == "SGE": logger.debug("submission to %s" % job.backend._name) # prepare job requirements requirements = jobmasterconfig.requirements if "INPUTTURLS" in environment: logger.debug(environment["INPUTTURLS"]) if string.find(environment["INPUTTURLS"], "file:") >= 0: raise ApplicationConfigurationError( None, "Input file was found to be local, and LCG backend does not support replication of local files to the GRID yet. Please register your input dataset in DQ2 before resubmitting this job. Aborting" ) if string.lower(app.se_name) == "local": raise ApplicationConfigurationError( None, "Output file cannot be committed to local filesystem on a grid job. Please change se_name" ) lcg_job_config = LCGJobConfig(File(exe), inputbox, args, outputbox, environment, inputdata, requirements) lcg_job_config.monitoring_svc = mc['AthenaMC/LCG'] return lcg_job_config else: logger.debug( "Backend %s not fully supported , will try our best anyway..." % job.backend._name) # if there are input data files and they are on the grid, prestage them on local area (use either app.datasets.input_dataset or /tmp/$login/data (and update environment["INPUTFILE"] accordingly inf the later is used...) # later development.... return StandardJobConfig(File(exe), inputbox, args, outputbox, environment)
def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): """Prepare the job""" inputbox=[ ] # prepare environment environment={} environment=jobmasterconfig.env.copy() environment["INPUTDATASETS"]="" environment["INPUTFILES"]="" environment["INPUTTURLS"]="" alllfns=app.inputfiles+app.cavernfiles+app.mbfiles+app.dbfiles guids=app.turls guids.update(app.cavern_turls) guids.update(app.minbias_turls) guids.update(app.dbturls) infilenr=0 for infile in alllfns: environment["INPUTFILES"]+="lfn[%d]='%s';" %(infilenr,infile) environment["INPUTDATASETS"]+="dset[%d]='%s';"%(infilenr,app.dsetmap[infile]) ## insites=app.sitemap[infile] ## # compare with environment["OUTSITE"] and reorder if needed. ## newinsites=self.sortSites(insites,environment["OUTSITE"]) ## environment["INPUTSITES"]+="site[%d]='%s';"%(infilenr,newinsites) environment["INPUTTURLS"]+="turl[%d]='%s';"%(infilenr,guids[infile]) infilenr += 1 logger.debug("%s %s %s" % (str(environment["INPUTDATASETS"]),str(environment["INPUTTURLS"]),str(environment["INPUTFILES"]))) if environment["INPUTDATASETS"] : # Work around for glite WMS spaced environement variable problem inputbox += [ FileBuffer('inputdsets.conf',environment['INPUTDATASETS']+'\n') ] if environment["INPUTTURLS"] : # Work around for glite WMS spaced environement variable problem inputbox += [ FileBuffer('inputturls.conf',environment['INPUTTURLS']+'\n') ] if environment["INPUTFILES"] : # Work around for glite WMS spaced environement variable problem inputbox += [ FileBuffer('inputfiles.conf',environment['INPUTFILES']+'\n') ] # now doing output files.... job = app._getParent() # Returns job or subjob object outfilelist="" for type in app.outputpaths.keys(): if type=="LOG" and "LOG" not in job.outputdata.outrootfiles: # logfiles are no longer saved in DQ2 datasets unless they are explicitly named in the outrootfiles dictionnary continue outfilelist+=app.outputpaths[type]+app.subjobsOutfiles[job.id][type]+" " environment["OUTPUTFILES"]=outfilelist # Work around for glite WMS spaced environement variable problem inputbox += [ FileBuffer('outputfiles.conf',environment['OUTPUTFILES']+'\n') ] # setting up job wrapper arguments. args=app.args trfargs=' '.join(app.args[4:]) inputbox += [ FileBuffer('trfargs.conf',trfargs+'\n') ] jid="" if job._getRoot().subjobs: jid = job._getRoot().id else: jid = "%d" % job.id environment["OUTPUT_JOBID"]=str(jid) # used for versionning if app.dryrun: environment["DRYRUN"] = "TRUE" if app.dbrelease: environment["ATLASDBREL"]=app.dbrelease inputdata = [] filename="wrapper.sh" exe = os.path.join(os.path.dirname(__file__),filename) # output sandbox outputbox =jobmasterconfig.outputbox if job.backend._name=="LCG" or job.backend._name=="Cronus" or job.backend._name=="Condor" or job.backend._name=="NG" or job.backend._name=="SGE": logger.debug("submission to %s" % job.backend._name) # prepare job requirements requirements = jobmasterconfig.requirements if "INPUTTURLS" in environment: logger.debug(environment["INPUTTURLS"]) if string.find(environment["INPUTTURLS"],"file:")>=0: raise ApplicationConfigurationError(None,"Input file was found to be local, and LCG backend does not support replication of local files to the GRID yet. Please register your input dataset in DQ2 before resubmitting this job. Aborting") if string.lower(app.se_name)=="local": raise ApplicationConfigurationError(None,"Output file cannot be committed to local filesystem on a grid job. Please change se_name") lcg_job_config = LCGJobConfig(File(exe),inputbox,args,outputbox,environment,inputdata,requirements) lcg_job_config.monitoring_svc = mc['AthenaMC/LCG'] return lcg_job_config else: logger.debug("Backend %s not fully supported , will try our best anyway..." % job.backend._name) # if there are input data files and they are on the grid, prestage them on local area (use either app.datasets.input_dataset or /tmp/$login/data (and update environment["INPUTFILE"] accordingly inf the later is used...) # later development.... return StandardJobConfig(File(exe),inputbox,args,outputbox,environment)