def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): """prepare the subjob specific configuration""" job = app._getParent() # Returns job or subjob object logger.debug("AthenaLocalRTHandler prepare called, %s", job.id) input_files = [] input_guids = [] input_tag_files = [] input_tag_guids = [] input_esd_files = [] input_esd_guids = [] # If job has inputdata if job.inputdata: # DQ2Dataset, ATLASLocalDataset and ATLASCastorDataset job splitting is done in AthenaSplitterJob if job._getRoot().subjobs: if job.inputdata._name == 'ATLASLocalDataset' or job.inputdata._name == 'ATLASCastorDataset': if not job.inputdata.names: raise ApplicationConfigurationError( None, 'No inputdata has been specified.') input_files = job.inputdata.names elif job.inputdata._name == 'ATLASDataset': if not job.inputdata.lfn: raise ApplicationConfigurationError( None, 'No inputdata has been specified.') input_files = job.inputdata.lfn elif job.inputdata._name == 'ATLASTier3Dataset': if not job.inputdata.names: raise ApplicationConfigurationError( None, 'No inputdata has been specified.') if job.inputdata.names: input_files = job.inputdata.names input_guids = input_files elif job.inputdata._name == 'DQ2Dataset': if not job.inputdata.names: raise ApplicationConfigurationError( None, 'No inputdata has been specified.') input_guids = job.inputdata.guids input_files = job.inputdata.names if not job.inputdata.type in [ 'DQ2_LOCAL', 'FILE_STAGER', 'LFC', 'TAG', 'TNT_LOCAL', 'TNT_DOWNLOAD' ]: job.inputdata.type = 'DQ2_LOCAL' else: if job.inputdata._name == 'ATLASCastorDataset': input_files = ATLASCastorDataset.get_filenames(app) elif job.inputdata._name == 'ATLASLocalDataset': input_files = ATLASLocalDataset.get_filenames(app) elif job.inputdata._name == 'ATLASDataset': input_files = ATLASDataset.get_filenames(app) elif job.inputdata._name == 'ATLASTier3Dataset': if job.inputdata.names: input_files = job.inputdata.names input_guids = input_files elif job.inputdata.pfnListFile: logger.info('Loading file names from %s' % job.inputdata.pfnListFile.name) pfnListFile = open(job.inputdata.pfnListFile.name) job.inputdata.names = [ line.strip() for line in pfnListFile ] pfnListFile.close() input_files = job.inputdata.names input_guids = input_files else: raise ApplicationConfigurationError( None, 'No inputdata has been specified.') elif job.inputdata._name == 'DQ2Dataset': if not job.inputdata.type in [ 'DQ2_LOCAL', 'FILE_STAGER', 'LFC', 'TAG', 'TNT_LOCAL', 'TNT_DOWNLOAD' ]: job.inputdata.type = 'DQ2_LOCAL' contents = job.inputdata.get_contents() input_files = [lfn for guid, lfn in contents] input_guids = [guid for guid, lfn in contents] if job.inputdata.tagdataset: tag_contents = job.inputdata.get_tag_contents() input_tag_files = [lfn for guid, lfn in tag_contents] input_tag_guids = [guid for guid, lfn in tag_contents] if job.inputdata.use_aodesd_backnav: esd_contents = job.inputdata.get_contents(backnav=True) input_esd_files = [lfn for guid, lfn in esd_contents] input_esd_guids = [guid for guid, lfn in esd_contents] job.inputdata.names = input_files job.inputdata.guids = input_guids # Outputdataset output_location = '' if job.outputdata: if job.outputdata._name == 'DQ2OutputDataset': if job.outputdata.location: if isDQ2SRMSite(job.outputdata.location): output_location = job.outputdata.location else: logger.warning('Unknown output location %s.', job.outputdata.location) elif job._getRoot().subjobs and job._getRoot( ).outputdata.location: if isDQ2SRMSite(job._getRoot().outputdata.location): output_location = job._getRoot().outputdata.location else: logger.warning('Unknown output location %s.', job.getRoot().outputdata.location) logger.debug('Output: %s,%s', output_location, job.outputdata.location) elif job.outputdata.location == '' and job.outputdata._name == 'DQ2OutputDataset': output_location = '' elif job.outputdata.location: output_location = expandfilename(job.outputdata.location) else: try: output_location = config['LocalOutputLocation'] if job.outputdata: job.outputdata.location = expandfilename( output_location) except ConfigError: logger.warning( 'No default output location specified in the configuration.' ) else: try: output_location = config['LocalOutputLocation'] except ConfigError: logger.warning( 'No default output location specified in the configuration.' ) if job._getRoot().subjobs: jid = "%d.%d" % (job._getRoot().id, job.id) else: jid = "%d" % job.id if output_location and job.outputdata and job.outputdata._name != 'DQ2OutputDataset': if job._getRoot().subjobs: if config['NoSubDirsAtAllForLocalOutput']: output_location = output_location elif config['SingleDirForLocalOutput']: output_location = os.path.join(output_location, "%d" % (job._getRoot().id)) elif config['IndividualSubjobDirsForLocalOutput']: output_location = os.path.join( output_location, "%d/%d" % (job._getRoot().id, job.id)) else: output_location = os.path.join(output_location, jid) if job.outputdata: # Remove trailing number if job is copied pat = re.compile(r'\/[\d\.]+\/[\d\.]+$') if re.findall(pat, output_location): output_location = re.sub(pat, '', output_location) if config['NoSubDirsAtAllForLocalOutput']: output_location = output_location elif config['SingleDirForLocalOutput']: output_location = os.path.join( output_location, "%d" % (job._getRoot().id)) elif config['IndividualSubjobDirsForLocalOutput']: output_location = os.path.join( output_location, "%d/%d" % (job._getRoot().id, job.id)) else: output_location = os.path.join(output_location, jid) job.outputdata.location = output_location if job.outputdata and job.outputdata._name == 'DQ2OutputDataset': # output dataset name from master_prepare output_datasetname = self.output_datasetname output_lfn = self.output_lfn output_jobid = jid # Set subjob datasetname job.outputdata.datasetname = output_datasetname # Set master job datasetname if job._getRoot().subjobs: job._getRoot().outputdata.datasetname = output_datasetname # Create output dataset -> moved to the worker node code ! if not job.outputdata.dataset_exists(output_datasetname): if job._getRoot().subjobs: if job.id == 0: #job.outputdata.create_dataset(output_datasetname) pass else: #job.outputdata.create_dataset(output_datasetname) pass else: if (job._getRoot().subjobs and job.id == 0) or not job._getRoot().subjobs: logger.warning( "Dataset %s already exists - appending new files to this dataset", output_datasetname) output_location = job.outputdata.get_locations( datasetname=output_datasetname, quiet=True) logger.debug('Output3: %s,%s', output_location, job.outputdata.location) if output_location: output_location = output_location[0] if job._getRoot().subjobs: job._getRoot( ).outputdata.location = output_location job.outputdata.location = output_location else: job.outputdata.location = output_location logger.debug('Output4: %s,%s', output_location, job.outputdata.location) inputbox = [ File(os.path.join(os.path.dirname(__file__), 'athena-utility.sh')) ] if input_guids: inputbox += [ FileBuffer('input_guids', '\n'.join(input_guids) + '\n') ] if input_files: inputbox += [ FileBuffer('input_files', '\n'.join(input_files) + '\n') ] if input_tag_guids: inputbox += [ FileBuffer('input_tag_guids', '\n'.join(input_tag_guids) + '\n') ] if input_tag_files: inputbox += [ FileBuffer('input_tag_files', '\n'.join(input_tag_files) + '\n') ] if input_esd_guids: inputbox += [ FileBuffer('input_esd_guids', '\n'.join(input_esd_guids) + '\n') ] if input_esd_files: inputbox += [ FileBuffer('input_esd_files', '\n'.join(input_esd_files) + '\n') ] # check for output data given in prepare info if job.outputdata and job.application.atlas_exetype == "ATHENA": for of in job.application.atlas_run_config['output']['alloutputs']: if not of in job.outputdata.outputdata: job.outputdata.outputdata.append(of) if job.outputdata and job.outputdata.outputdata: inputbox += [ FileBuffer('output_files', '\n'.join(job.outputdata.outputdata) + '\n') ] elif job.outputdata and not job.outputdata.outputdata: raise ApplicationConfigurationError( None, 'j.outputdata.outputdata is empty - Please specify output filename(s).' ) exe = os.path.join(os.path.dirname(__file__), 'run-athena-local.sh') outputbox = jobmasterconfig.outputbox environment = jobmasterconfig.env.copy() ## create and add sample files for FileStager if job.inputdata and job.inputdata._name == 'StagerDataset': if not job.inputdata.dataset: raise ApplicationConfigurationError( None, 'dataset name not specified in job.inputdata') ## ship fs-copy.py with the job as it's going to be used as a copy command wrapper by FileStager inputbox += [ File(os.path.join(os.path.dirname(__file__), 'fs-copy.py')) ] (jo_path, ic_path) = job.inputdata.make_FileStager_jobOptions( job=job, max_events=app.max_events) inputbox += [File(jo_path), File(ic_path)] ## re-make the environment['ATHENA_OPTIONS'] athena_options = os.path.basename(File(jo_path).name) for option_file in app.option_file: athena_option = os.path.basename(option_file.name) athena_options += ' ' + athena_option if app.options: athena_options = app.options + ' ' + athena_options environment['ATHENA_OPTIONS'] = athena_options environment['DATASETTYPE'] = 'FILE_STAGER' ## ask to send back the FileStager.out/err generated by fs-copy.py outputbox += ['FileStager.out', 'FileStager.err'] # If ArgSplitter is used try: if job.application.args: environment['ATHENA_OPTIONS'] = environment[ 'ATHENA_OPTIONS'] + ' ' + ' '.join(job.application.args) if job.application.options: job.application.options = job.application.options + ' ' + job.application.args else: job.application.options = job.application.args except AttributeError: pass if job.outputdata and job.outputdata._name=='DQ2OutputDataset' and output_location == [ ]: raise ApplicationConfigurationError( None, 'j.outputdata.outputdata is empty - Please specify output filename(s).' ) # set EOS env setting environment['EOS_COMMAND_PATH'] = config['PathToEOSBinary'] # flag for single output dir if (config['SingleDirForLocalOutput'] or config['NoSubDirsAtAllForLocalOutput']) and job._getParent(): environment['SINGLE_OUTPUT_DIR'] = jid # change the filename newoutput = [] for outf in job.outputdata.outputdata: newfile, newfileExt = os.path.splitext(outf) jid = "%d.%d" % (job._getParent().id, job.id) newoutput.append("%s.%s%s" % (newfile, jid, newfileExt)) job.outputdata.outputdata = newoutput[:] environment['OUTPUT_LOCATION'] = output_location if job.outputdata and job.outputdata._name == 'DQ2OutputDataset': environment['OUTPUT_DATASETNAME'] = output_datasetname environment['OUTPUT_LFN'] = output_lfn environment['OUTPUT_JOBID'] = output_jobid environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER'] environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL'] environment['DQ2_OUTPUTFILE_NAMELENGTH'] = str( configDQ2['OUTPUTFILE_NAMELENGTH']) if job.outputdata.use_shortfilename: environment['GANGA_SHORTFILENAME'] = '1' else: environment['GANGA_SHORTFILENAME'] = '' try: environment['GANGA_GLITE_UI'] = configLCG['GLITE_SETUP'] except: pass environment['DQ2_OUTPUT_SPACE_TOKENS'] = ':'.join( configDQ2['DQ2_OUTPUT_SPACE_TOKENS']) environment['DQ2_BACKUP_OUTPUT_LOCATIONS'] = ':'.join( configDQ2['DQ2_BACKUP_OUTPUT_LOCATIONS']) # CN: extra condition for TNTSplitter if job._getRoot().splitter and job._getRoot( ).splitter._name == 'TNTJobSplitter': # set up dq2 environment datasetname = job.inputdata.dataset environment['DATASETNAME'] = ':'.join(datasetname) environment['DATASETLOCATION'] = ':'.join( job.inputdata.get_locations()) environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER'] environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL'] #environment['DATASETTYPE']=job.inputdata.type # At present, DQ2 download is the only thing that works environment['DATASETTYPE'] = "DQ2_DOWNLOAD" if job.inputdata.accessprotocol: environment[ 'DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol if job.inputsandbox: inputbox += job.inputsandbox # Fix DATASETNAME env variable for DQ2_COPY mode if job.inputdata and job.inputdata._name in [ 'DQ2Dataset' ] and job.inputdata.type in ['DQ2_LOCAL', 'DQ2_COPY', 'FILE_STAGER']: if job.inputdata.dataset: from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import resolve_container datasets = resolve_container(job.inputdata.dataset) environment['DATASETNAME'] = datasets[0] try: environment['DATASETLOCATION'] = ':'.join( job.inputdata.get_locations( overlap=False)[datasets[0]]) except: printout = 'Job submission failed ! Dataset %s could not be found in DQ2 ! Maybe retry ?' % ( datasets[0]) raise ApplicationConfigurationError(None, printout) if job.inputdata and job.inputdata._name == 'ATLASTier3Dataset': environment['DATASETTYPE'] = 'TIER3' # USE_POOLFILECATALOG_FAILOVER of Local/ATLASLocalDataset if job.inputdata and job.inputdata._name == 'ATLASLocalDataset': if job.inputdata.use_poolfilecatalog_failover: environment['USE_POOLFILECATALOG_FAILOVER'] = '1' # CREATE_POOLFILECATALOG of Local/ATLASLocalDataset environment['CREATE_POOLFILECATALOG'] = '1' if job.inputdata and job.inputdata._name == 'ATLASLocalDataset': if not job.inputdata.create_poolfilecatalog: environment['CREATE_POOLFILECATALOG'] = '0' # Write trf parameters trf_params = ' ' for key, value in job.application.trf_parameter.iteritems(): if key == 'dbrelease': environment['DBDATASETNAME'] = value.split(':')[0] environment['DBFILENAME'] = value.split(':')[1] else: trf_params = trf_params + key + '=' + str(value) + ' ' if trf_params != ' ' and job.application.atlas_exetype == 'TRF': _append_file_buffer(inputbox, 'trf_params', [trf_params]) if not 'db_dq2localid.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox, 'db_dq2localid.py') # set RecExCommon options environment['RECEXTYPE'] = job.application.recex_type # Athena run dir if job.application.atlas_exetype == "ATHENA" and job.application.atlas_run_dir != "": environment['ATLAS_RUN_DIR'] = job.application.atlas_run_dir # Set DQ2_LOCAL_SITE_ID if hasattr(job.backend, 'extraopts'): if job.backend.extraopts.find('site=hh') > 0: environment['DQ2_LOCAL_SITE_ID'] = 'DESY-HH_SCRATCHDISK' environment[ 'GANGA_LCG_CE'] = 'grid-ce5.desy.de:2119' # hack for FILE_STAGER at NAF elif job.backend.extraopts.find('site=zn') > 0: environment['DQ2_LOCAL_SITE_ID'] = 'DESY-ZN_SCRATCHDISK' environment[ 'GANGA_LCG_CE'] = 'lcg-ce0.ifh.de:2119' # hack for FILE_STAGER at NAF else: environment['DQ2_LOCAL_SITE_ID'] = configDQ2[ 'DQ2_LOCAL_SITE_ID'] else: environment['DQ2_LOCAL_SITE_ID'] = configDQ2['DQ2_LOCAL_SITE_ID'] return StandardJobConfig(File(exe), inputbox, [], outputbox, environment)
def prepare(self,app,appsubconfig,appmasterconfig,jobmasterconfig): """prepare the subjob specific configuration""" job = app._getParent() # Returns job or subjob object logger.debug("AthenaLocalRTHandler prepare called, %s", job.id ) input_files = [] input_guids = [] input_tag_files = [] input_tag_guids = [] input_esd_files = [] input_esd_guids = [] # If job has inputdata if job.inputdata: # DQ2Dataset, ATLASLocalDataset and ATLASCastorDataset job splitting is done in AthenaSplitterJob if job._getRoot().subjobs: if job.inputdata._name == 'ATLASLocalDataset' or job.inputdata._name == 'ATLASCastorDataset': if not job.inputdata.names: raise ApplicationConfigurationError(None,'No inputdata has been specified.') input_files = job.inputdata.names elif job.inputdata._name == 'ATLASDataset': if not job.inputdata.lfn: raise ApplicationConfigurationError(None,'No inputdata has been specified.') input_files = job.inputdata.lfn elif job.inputdata._name == 'ATLASTier3Dataset': if not job.inputdata.names: raise ApplicationConfigurationError(None,'No inputdata has been specified.') if job.inputdata.names: input_files = job.inputdata.names input_guids = input_files elif job.inputdata._name == 'DQ2Dataset': if not job.inputdata.names: raise ApplicationConfigurationError(None,'No inputdata has been specified.') input_guids = job.inputdata.guids input_files = job.inputdata.names if not job.inputdata.type in ['DQ2_LOCAL', 'FILE_STAGER', 'LFC', 'TAG', 'TNT_LOCAL', 'TNT_DOWNLOAD' ]: job.inputdata.type ='DQ2_LOCAL' else: if job.inputdata._name == 'ATLASCastorDataset': input_files = ATLASCastorDataset.get_filenames(app) elif job.inputdata._name == 'ATLASLocalDataset': input_files = ATLASLocalDataset.get_filenames(app) elif job.inputdata._name == 'ATLASDataset': input_files = ATLASDataset.get_filenames(app) elif job.inputdata._name == 'ATLASTier3Dataset': if job.inputdata.names: input_files = job.inputdata.names input_guids = input_files elif job.inputdata.pfnListFile: logger.info('Loading file names from %s'%job.inputdata.pfnListFile.name) pfnListFile = open(job.inputdata.pfnListFile.name) job.inputdata.names = [ line.strip() for line in pfnListFile] pfnListFile.close() input_files = job.inputdata.names input_guids = input_files else: raise ApplicationConfigurationError(None,'No inputdata has been specified.') elif job.inputdata._name == 'DQ2Dataset': if not job.inputdata.type in ['DQ2_LOCAL', 'FILE_STAGER', 'LFC', 'TAG', 'TNT_LOCAL', 'TNT_DOWNLOAD' ]: job.inputdata.type ='DQ2_LOCAL' contents = job.inputdata.get_contents() input_files = [ lfn for guid, lfn in contents ] input_guids = [ guid for guid, lfn in contents ] if job.inputdata.tagdataset: tag_contents = job.inputdata.get_tag_contents() input_tag_files = [ lfn for guid, lfn in tag_contents ] input_tag_guids = [ guid for guid, lfn in tag_contents ] if job.inputdata.use_aodesd_backnav: esd_contents = job.inputdata.get_contents(backnav=True) input_esd_files = [ lfn for guid, lfn in esd_contents ] input_esd_guids = [ guid for guid, lfn in esd_contents ] job.inputdata.names = input_files job.inputdata.guids = input_guids # Outputdataset output_location='' if job.outputdata: if job.outputdata._name=='DQ2OutputDataset': if job.outputdata.location: if isDQ2SRMSite(job.outputdata.location): output_location = job.outputdata.location else: logger.warning('Unknown output location %s.',job.outputdata.location) elif job._getRoot().subjobs and job._getRoot().outputdata.location: if isDQ2SRMSite(job._getRoot().outputdata.location): output_location = job._getRoot().outputdata.location else: logger.warning('Unknown output location %s.',job.getRoot().outputdata.location) logger.debug('Output: %s,%s',output_location, job.outputdata.location) elif job.outputdata.location=='' and job.outputdata._name=='DQ2OutputDataset': output_location = '' elif job.outputdata.location: output_location = expandfilename(job.outputdata.location) else: try: output_location=config['LocalOutputLocation'] if job.outputdata: job.outputdata.location = expandfilename(output_location) except ConfigError: logger.warning('No default output location specified in the configuration.') else: try: output_location=config['LocalOutputLocation'] except ConfigError: logger.warning('No default output location specified in the configuration.') if job._getRoot().subjobs: jid = "%d.%d" % (job._getRoot().id, job.id) else: jid = "%d" % job.id if output_location and job.outputdata and job.outputdata._name!='DQ2OutputDataset': if job._getRoot().subjobs: if config['NoSubDirsAtAllForLocalOutput']: output_location = output_location elif config['SingleDirForLocalOutput']: output_location = os.path.join(output_location, "%d" % (job._getRoot().id)) elif config['IndividualSubjobDirsForLocalOutput']: output_location = os.path.join(output_location, "%d/%d" % (job._getRoot().id, job.id)) else: output_location = os.path.join(output_location, jid) if job.outputdata: # Remove trailing number if job is copied pat = re.compile(r'\/[\d\.]+\/[\d\.]+$') if re.findall(pat,output_location): output_location = re.sub(pat, '', output_location) if config['NoSubDirsAtAllForLocalOutput']: output_location = output_location elif config['SingleDirForLocalOutput']: output_location = os.path.join(output_location, "%d" % (job._getRoot().id)) elif config['IndividualSubjobDirsForLocalOutput']: output_location = os.path.join(output_location, "%d/%d" % (job._getRoot().id, job.id)) else: output_location = os.path.join(output_location, jid) job.outputdata.location = output_location if job.outputdata and job.outputdata._name=='DQ2OutputDataset': # output dataset name from master_prepare output_datasetname = self.output_datasetname output_lfn = self.output_lfn output_jobid = jid # Set subjob datasetname job.outputdata.datasetname=output_datasetname # Set master job datasetname if job._getRoot().subjobs: job._getRoot().outputdata.datasetname=output_datasetname # Create output dataset -> moved to the worker node code ! if not job.outputdata.dataset_exists(output_datasetname): if job._getRoot().subjobs: if job.id==0: #job.outputdata.create_dataset(output_datasetname) pass else: #job.outputdata.create_dataset(output_datasetname) pass else: if (job._getRoot().subjobs and job.id==0) or not job._getRoot().subjobs: logger.warning("Dataset %s already exists - appending new files to this dataset", output_datasetname) output_location = job.outputdata.get_locations(datasetname=output_datasetname, quiet=True) logger.debug('Output3: %s,%s',output_location, job.outputdata.location) if output_location: output_location = output_location[0] if job._getRoot().subjobs: job._getRoot().outputdata.location=output_location job.outputdata.location=output_location else: job.outputdata.location=output_location logger.debug('Output4: %s,%s',output_location, job.outputdata.location) inputbox = [File(os.path.join(os.path.dirname(__file__),'athena-utility.sh'))] if input_guids: inputbox += [ FileBuffer('input_guids','\n'.join(input_guids)+'\n') ] if input_files: inputbox += [ FileBuffer('input_files','\n'.join(input_files)+'\n') ] if input_tag_guids: inputbox += [ FileBuffer('input_tag_guids','\n'.join(input_tag_guids)+'\n') ] if input_tag_files: inputbox += [ FileBuffer('input_tag_files','\n'.join(input_tag_files)+'\n') ] if input_esd_guids: inputbox += [ FileBuffer('input_esd_guids','\n'.join(input_esd_guids)+'\n') ] if input_esd_files: inputbox += [ FileBuffer('input_esd_files','\n'.join(input_esd_files)+'\n') ] # check for output data given in prepare info if job.outputdata and job.application.atlas_exetype == "ATHENA": for of in job.application.atlas_run_config['output']['alloutputs']: if not of in job.outputdata.outputdata: job.outputdata.outputdata.append(of) if job.outputdata and job.outputdata.outputdata: inputbox += [ FileBuffer('output_files','\n'.join(job.outputdata.outputdata)+'\n') ] elif job.outputdata and not job.outputdata.outputdata: raise ApplicationConfigurationError(None,'j.outputdata.outputdata is empty - Please specify output filename(s).') exe = os.path.join(os.path.dirname(__file__),'run-athena-local.sh') outputbox = jobmasterconfig.outputbox environment = jobmasterconfig.env.copy() ## create and add sample files for FileStager if job.inputdata and job.inputdata._name == 'StagerDataset': if not job.inputdata.dataset: raise ApplicationConfigurationError(None,'dataset name not specified in job.inputdata') ## ship fs-copy.py with the job as it's going to be used as a copy command wrapper by FileStager inputbox += [ File( os.path.join( os.path.dirname(__file__), 'fs-copy.py') ) ] (jo_path, ic_path) = job.inputdata.make_FileStager_jobOptions(job=job, max_events=app.max_events) inputbox += [ File(jo_path), File(ic_path) ] ## re-make the environment['ATHENA_OPTIONS'] athena_options = os.path.basename( File(jo_path).name ) for option_file in app.option_file: athena_option = os.path.basename(option_file.name) athena_options += ' ' + athena_option if app.options: athena_options = app.options + ' ' + athena_options environment['ATHENA_OPTIONS'] = athena_options environment['DATASETTYPE'] = 'FILE_STAGER' ## ask to send back the FileStager.out/err generated by fs-copy.py outputbox += ['FileStager.out', 'FileStager.err'] # If ArgSplitter is used try: if job.application.args: environment['ATHENA_OPTIONS'] = environment['ATHENA_OPTIONS'] + ' ' + ' '.join(job.application.args) if job.application.options: job.application.options = job.application.options + ' ' + job.application.args else: job.application.options=job.application.args except AttributeError: pass if job.outputdata and job.outputdata._name=='DQ2OutputDataset' and output_location == [ ]: raise ApplicationConfigurationError(None,'j.outputdata.outputdata is empty - Please specify output filename(s).') # set EOS env setting environment['EOS_COMMAND_PATH'] = config['PathToEOSBinary'] # flag for single output dir if (config['SingleDirForLocalOutput'] or config['NoSubDirsAtAllForLocalOutput']) and job._getParent(): environment['SINGLE_OUTPUT_DIR'] = jid # change the filename newoutput = [] for outf in job.outputdata.outputdata: newfile, newfileExt = os.path.splitext(outf) jid = "%d.%d" % (job._getParent().id, job.id) newoutput.append("%s.%s%s" % (newfile, jid, newfileExt) ) job.outputdata.outputdata = newoutput[:] environment['OUTPUT_LOCATION'] = output_location if job.outputdata and job.outputdata._name == 'DQ2OutputDataset': environment['OUTPUT_DATASETNAME'] = output_datasetname environment['OUTPUT_LFN'] = output_lfn environment['OUTPUT_JOBID'] = output_jobid environment['DQ2_URL_SERVER']=configDQ2['DQ2_URL_SERVER'] environment['DQ2_URL_SERVER_SSL']=configDQ2['DQ2_URL_SERVER_SSL'] environment['DQ2_OUTPUTFILE_NAMELENGTH'] = str(configDQ2['OUTPUTFILE_NAMELENGTH']) if job.outputdata.use_shortfilename: environment['GANGA_SHORTFILENAME'] = '1' else: environment['GANGA_SHORTFILENAME'] = '' try: environment['GANGA_GLITE_UI']=configLCG['GLITE_SETUP'] except: pass environment['DQ2_OUTPUT_SPACE_TOKENS']= ':'.join(configDQ2['DQ2_OUTPUT_SPACE_TOKENS']) environment['DQ2_BACKUP_OUTPUT_LOCATIONS']= ':'.join(configDQ2['DQ2_BACKUP_OUTPUT_LOCATIONS']) # CN: extra condition for TNTSplitter if job._getRoot().splitter and job._getRoot().splitter._name == 'TNTJobSplitter': # set up dq2 environment datasetname = job.inputdata.dataset environment['DATASETNAME']= ':'.join(datasetname) environment['DATASETLOCATION'] = ':'.join(job.inputdata.get_locations()) environment['DQ2_URL_SERVER']=configDQ2['DQ2_URL_SERVER'] environment['DQ2_URL_SERVER_SSL']=configDQ2['DQ2_URL_SERVER_SSL'] #environment['DATASETTYPE']=job.inputdata.type # At present, DQ2 download is the only thing that works environment['DATASETTYPE']="DQ2_DOWNLOAD" if job.inputdata.accessprotocol: environment['DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol if job.inputsandbox: inputbox += job.inputsandbox # Fix DATASETNAME env variable for DQ2_COPY mode if job.inputdata and job.inputdata._name in [ 'DQ2Dataset' ] and job.inputdata.type in [ 'DQ2_LOCAL', 'DQ2_COPY', 'FILE_STAGER' ]: if job.inputdata.dataset: from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import resolve_container datasets = resolve_container(job.inputdata.dataset) environment['DATASETNAME'] = datasets[0] try: environment['DATASETLOCATION'] = ':'.join(job.inputdata.get_locations(overlap=False)[ datasets[0] ]) except: printout = 'Job submission failed ! Dataset %s could not be found in DQ2 ! Maybe retry ?' %(datasets[0]) raise ApplicationConfigurationError(None,printout ) if job.inputdata and job.inputdata._name == 'ATLASTier3Dataset': environment['DATASETTYPE'] = 'TIER3' # USE_POOLFILECATALOG_FAILOVER of Local/ATLASLocalDataset if job.inputdata and job.inputdata._name == 'ATLASLocalDataset': if job.inputdata.use_poolfilecatalog_failover: environment['USE_POOLFILECATALOG_FAILOVER'] = '1' # CREATE_POOLFILECATALOG of Local/ATLASLocalDataset environment['CREATE_POOLFILECATALOG'] = '1' if job.inputdata and job.inputdata._name == 'ATLASLocalDataset': if not job.inputdata.create_poolfilecatalog: environment['CREATE_POOLFILECATALOG'] = '0' # Write trf parameters trf_params = ' ' for key, value in job.application.trf_parameter.iteritems(): if key == 'dbrelease': environment['DBDATASETNAME'] = value.split(':')[0] environment['DBFILENAME'] = value.split(':')[1] else: trf_params = trf_params + key + '=' + str(value) + ' ' if trf_params!=' ' and job.application.atlas_exetype=='TRF': _append_file_buffer(inputbox,'trf_params', [ trf_params ]) if not 'db_dq2localid.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox, 'db_dq2localid.py') # set RecExCommon options environment['RECEXTYPE'] = job.application.recex_type # Athena run dir if job.application.atlas_exetype == "ATHENA" and job.application.atlas_run_dir != "": environment['ATLAS_RUN_DIR'] = job.application.atlas_run_dir # Set DQ2_LOCAL_SITE_ID if hasattr(job.backend, 'extraopts'): if job.backend.extraopts.find('site=hh')>0: environment['DQ2_LOCAL_SITE_ID'] = 'DESY-HH_SCRATCHDISK' environment['GANGA_LCG_CE'] = 'grid-ce5.desy.de:2119' # hack for FILE_STAGER at NAF elif job.backend.extraopts.find('site=zn')>0: environment['DQ2_LOCAL_SITE_ID'] = 'DESY-ZN_SCRATCHDISK' environment['GANGA_LCG_CE'] = 'lcg-ce0.ifh.de:2119' # hack for FILE_STAGER at NAF else: environment['DQ2_LOCAL_SITE_ID'] = configDQ2['DQ2_LOCAL_SITE_ID'] else: environment['DQ2_LOCAL_SITE_ID'] = configDQ2['DQ2_LOCAL_SITE_ID'] return StandardJobConfig(File(exe), inputbox, [], outputbox, environment)
def prepare(self, app, appsubconfig, appmasterconfig, jobmasterconfig): """prepare the subjob specific configuration""" job = app._getParent() # Returns job or subjob object logger.debug("AthenaLCGRTHandler prepare called, %s", job.id) # prepare inputdata input_files = [] input_guids = [] input_tag_files = [] input_tag_guids = [] input_esd_files = [] input_esd_guids = [] add_files = [] if job.inputdata: # DQ2Dataset, ATLASLocalDataset and ATLASCastorDataset job splitting is done in AthenaSplitterJob if job._getRoot().subjobs: if job.inputdata._name == 'ATLASLocalDataset' or job.inputdata._name == 'ATLASCastorDataset': if not job.inputdata.names: raise ApplicationConfigurationError( None, 'No inputdata has been specified.') input_files = job.inputdata.names elif job.inputdata._name == 'ATLASDataset': if not job.inputdata.lfn: raise ApplicationConfigurationError( None, 'No inputdata has been specified.') input_files = job.inputdata.lfn elif job.inputdata._name == 'ATLASTier3Dataset': if not job.inputdata.names: raise ApplicationConfigurationError( None, 'No inputdata has been specified.') if job.inputdata.names: input_files = job.inputdata.names input_guids = input_files elif job.inputdata._name in [ 'DQ2Dataset', 'AMIDataset', 'EventPicking' ]: if not job.inputdata.names: raise ApplicationConfigurationError( None, 'No inputdata has been specified. Failure in job %s.%s. Dataset %s' % (job._getRoot().id, job.id, job.inputdata.dataset)) input_guids = job.inputdata.guids input_files = job.inputdata.names if job.inputdata.tag_info: # check for conflicts with TAG_LOCAL or TAG_COPY if job.inputdata.type in ['TAG_LOCAL', 'TAG_COPY']: raise ApplicationConfigurationError( None, "Cannot provide both tag_info and run as '%s'. Please use one or the other!" % job.inputdata.type) # check if FILE_STAGER is used if job.inputdata.type == 'FILE_STAGER': logger.warning( "TAG jobs currently can't use the FILE_STAGER. Switching to DQ2_COPY instead." ) job.inputdata.type = 'DQ2_COPY' # add additional file info for tags for tag_file in job.inputdata.tag_info: for ref in job.inputdata.tag_info[tag_file][ 'refs']: add_files.append(ref[1] + ':' + ref[0] + ':' + ref[2]) if not job.inputdata.type in [ 'DQ2_LOCAL', 'LFC', 'TAG', 'TNT_LOCAL', 'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER', 'TAG_LOCAL', 'TAG_COPY' ]: job.inputdata.type = 'DQ2_LOCAL' if not job.inputdata.datatype in [ 'DATA', 'MC', 'MuonCalibStream' ]: job.inputdata.datatype = 'MC' else: if job.inputdata._name == 'ATLASCastorDataset': input_files = ATLASCastorDataset.get_filenames(app) elif job.inputdata._name == 'ATLASLocalDataset': input_files = ATLASLocalDataset.get_filenames(app) elif job.inputdata._name == 'ATLASDataset': input_files = ATLASDataset.get_filenames(app) elif job.inputdata._name == 'ATLASTier3Dataset': if job.inputdata.names: input_files = job.inputdata.names input_guids = input_files elif job.inputdata.pfnListFile: logger.info('Loading file names from %s' % job.inputdata.pfnListFile.name) pfnListFile = open(job.inputdata.pfnListFile.name) job.inputdata.names = [ line.strip() for line in pfnListFile ] pfnListFile.close() input_files = job.inputdata.names input_guids = input_files else: raise ApplicationConfigurationError( None, 'No inputdata has been specified.') elif job.inputdata._name in [ 'DQ2Dataset', 'AMIDataset', 'EventPicking' ]: if not job.inputdata.type in [ 'DQ2_LOCAL', 'LFC', 'TAG', 'TNT_LOCAL', 'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER', 'TAG_LOCAL', 'TAG_COPY' ]: job.inputdata.type = 'DQ2_LOCAL' if not job.inputdata.datatype in [ 'DATA', 'MC', 'MuonCalibStream' ]: job.inputdata.datatype = 'MC' input_guids, input_files = _splitlist( job.inputdata.get_contents()) if job.inputdata.tagdataset: input_tag_guids, input_tag_files = _splitlist( job.inputdata.get_tag_contents()) if job.inputdata.use_aodesd_backnav: input_esd_guids, input_esd_files = _splitlist( job.inputdata.get_contents(backnav=True)) job.inputdata.names = input_files job.inputdata.guids = input_guids # prepare outputdata output_location = '' if job.outputdata: if job.outputdata._name == 'DQ2OutputDataset': if job.outputdata.location: if type(job.outputdata.location) == str and isDQ2SRMSite( job.outputdata.location): output_location = job.outputdata.location else: logger.warning('Unknown output location %s.', job.outputdata.location) #if job.backend.requirements._name == 'AtlasLCGRequirements': # if job.backend.requirements.cloud: # if whichCloud(output_location) != job.backend.requirements.cloud: # printout = 'Job submission failed ! j.outputdata.location=%s is not in the same cloud as j.backend.requirements.cloud=%s' %(job.outputdata.location, job.backend.requirements.cloud ) # raise ApplicationConfigurationError(None, printout) # if job.backend.requirements.sites: # if whichCloud(output_location) != whichCloud(job.backend.requirements.sites[0]): # printout = 'Job submission failed ! j.outputdata.location=%s is not in the same cloud as j.backend.requirements.sites=%s'%(job.outputdata.location, job.backend.requirements.sites) # raise ApplicationConfigurationError(None,printout ) elif job._getRoot().subjobs and job._getRoot( ).outputdata.location: if isDQ2SRMSite(job._getRoot().outputdata.location): output_location = job._getRoot().outputdata.location else: logger.warning('Unknown output location %s.', job.getRoot().outputdata.location) logger.debug('Output: %s,%s', output_location, job.outputdata.location) else: if job.outputdata.location: output_location = job.outputdata.location else: try: output_location = config['LCGOutputLocation'] except ConfigError: logger.warning( 'No default output location specified in the configuration.' ) if job.outputdata.location: job.outputdata.location = output_location logger.debug('Output: %s,%s', output_location, job.outputdata.location) if job._getRoot().subjobs: jid = "%d.%d" % (job._getRoot().id, job.id) else: jid = "%d" % job.id if output_location and job.outputdata and job.outputdata._name != 'DQ2OutputDataset': output_location = os.path.join(output_location, jid) if job.outputdata: # Remove trailing number if job is copied pat = re.compile(r'\/[\d\.]+\/[\d\.]+$') if re.findall(pat, output_location): output_location = re.sub(pat, '', output_location) output_location = os.path.join(output_location, jid) job.outputdata.location = output_location if job.outputdata and job.outputdata._name == 'DQ2OutputDataset': # output dataset name from master_prepare output_datasetname = self.output_datasetname output_lfn = self.output_lfn output_jobid = jid # Set subjob datasetname job.outputdata.datasetname = output_datasetname # Set master job datasetname if job._getRoot().subjobs: job._getRoot().outputdata.datasetname = output_datasetname # Create output dataset -> moved to the worker node code ! if not job.outputdata.dataset_exists(output_datasetname): if job._getRoot().subjobs: if job.id == 0: #job.outputdata.create_dataset(output_datasetname) pass else: #job.outputdata.create_dataset(output_datasetname) pass if output_location and configDQ2['USE_STAGEOUT_SUBSCRIPTION']: job.outputdata.create_subscription(output_datasetname, output_location) else: if (job._getRoot().subjobs and job.id == 0) or not job._getRoot().subjobs: logger.warning( "Dataset %s already exists - appending new files to this dataset", output_datasetname) output_location = job.outputdata.get_locations( datasetname=output_datasetname, quiet=True) logger.debug('Output3: %s,%s', output_location, job.outputdata.location) if output_location: output_location = output_location[0] if job._getRoot().subjobs: job._getRoot( ).outputdata.location = output_location job.outputdata.location = output_location else: job.outputdata.location = output_location logger.debug('Output4: %s,%s', output_location, job.outputdata.location) if getConfig('LCG')['JobLogHandler'] == 'DQ2' and ( not job.outputdata or (job.outputdata and job.outputdata._name != 'DQ2OutputDataset')): raise ApplicationConfigurationError( None, 'Staging of log files in DQ2 requested, but DQ2 output dataset not specified.' ) # prepare inputsandbox inputbox = [File(os.path.join(__directory__, 'athena-utility.sh'))] if input_guids: _append_file_buffer(inputbox, 'input_guids', input_guids) if input_files: _append_file_buffer(inputbox, 'input_files', input_files) if add_files: _append_file_buffer(inputbox, 'add_files', add_files) if input_tag_guids: _append_file_buffer(inputbox, 'input_tag_guids', input_tag_guids) if input_tag_files: _append_file_buffer(inputbox, 'input_tag_files', input_tag_files) if input_esd_guids: _append_file_buffer(inputbox, 'input_esd_guids', input_esd_guids) if input_esd_files: _append_file_buffer(inputbox, 'input_esd_files', input_esd_files) if job.inputdata and job.inputdata._name == 'DQ2Dataset': for tag_file in job.inputdata.tag_info: if job.inputdata.tag_info[tag_file]['path'] != '': inputbox.append( File( os.path.join( job.inputdata.tag_info[tag_file]['path'], tag_file))) # check for output data given in prepare info if job.outputdata and job.application.atlas_exetype == "ATHENA": for of in job.application.atlas_run_config['output']['alloutputs']: if not of in job.outputdata.outputdata: job.outputdata.outputdata.append(of) if job.outputdata and job.outputdata.outputdata: _append_file_buffer(inputbox, 'output_files', job.outputdata.outputdata) elif job.outputdata and not job.outputdata.outputdata: raise ApplicationConfigurationError( None, 'j.outputdata.outputdata is empty - Please specify output filename(s).' ) exe = os.path.join(__directory__, 'run-athena-lcg.sh') outputbox = jobmasterconfig.outputbox requirements = jobmasterconfig.requirements.__copy__() environment = jobmasterconfig.env.copy() # If ArgSplitter is used try: if job.application.args: environment['ATHENA_OPTIONS'] = environment[ 'ATHENA_OPTIONS'] + ' ' + ' '.join(job.application.args) if job.application.options: job.application.options = job.application.options + ' ' + job.application.args else: job.application.options = job.application.args except AttributeError: pass if output_location and output_location.find( '/castor/cern.ch/grid/atlas/t0') >= 0: raise ApplicationConfigurationError( None, 'You are try to save the output to TIER0DISK - please use another area !' ) if not output_location: output_location = '' if configDQ2['USE_STAGEOUT_SUBSCRIPTION']: output_location = '' environment['OUTPUT_LOCATION'] = output_location environment['ATLASOutputDatasetLFC'] = config['ATLASOutputDatasetLFC'] if job.outputdata and job.outputdata._name == 'DQ2OutputDataset': environment['OUTPUT_DATASETNAME'] = output_datasetname environment['OUTPUT_LFN'] = output_lfn environment['OUTPUT_JOBID'] = output_jobid environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER'] environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL'] environment['DQ2_OUTPUTFILE_NAMELENGTH'] = configDQ2[ 'OUTPUTFILE_NAMELENGTH'] if job.outputdata.use_shortfilename: environment['GANGA_SHORTFILENAME'] = '1' else: environment['GANGA_SHORTFILENAME'] = '' environment['DQ2_OUTPUT_SPACE_TOKENS'] = ':'.join( configDQ2['DQ2_OUTPUT_SPACE_TOKENS']) environment['DQ2_BACKUP_OUTPUT_LOCATIONS'] = ':'.join( configDQ2['DQ2_BACKUP_OUTPUT_LOCATIONS']) # CN: extra condition for TNTSplitter if job._getRoot().splitter and job._getRoot( ).splitter._name == 'TNTJobSplitter': # set up dq2 environment datasetname = job.inputdata.dataset environment['DATASETNAME'] = ':'.join(datasetname) environment['DATASETLOCATION'] = ':'.join( job.inputdata.get_locations()) environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER'] environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL'] environment['DATASETTYPE'] = job.inputdata.type environment['DATASETDATATYPE'] = job.inputdata.datatype if job.inputdata.accessprotocol: environment[ 'DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol if job.inputsandbox: inputbox += job.inputsandbox if job.inputdata and job.inputdata._name == 'DQ2Dataset' and job.inputdata.tag_info: if job.inputdata.tag_info[job.inputdata.tag_info.keys( )[0]]['dataset'] != '' and job.inputdata.tag_info[tag_file][ 'path'] == '': environment['TAG_TYPE'] = 'DQ2' else: environment['TAG_TYPE'] = 'LOCAL' # Fix DATASETNAME env variable for DQ2_COPY mode if job.inputdata and (job.inputdata._name in [ 'DQ2Dataset', 'AMIDataset', 'EventPicking' ]) and (job.inputdata.type in [ 'DQ2_LOCAL', 'DQ2_COPY', 'FILE_STAGER', 'TAG_LOCAL', 'TAG_COPY' ]): if job.inputdata.dataset: from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import resolve_container datasets = resolve_container(job.inputdata.dataset) environment['DATASETNAME'] = datasets[0] try: environment['DATASETLOCATION'] = ':'.join( job.inputdata.get_locations( overlap=False)[datasets[0]]) except: printout = 'Job submission failed ! Dataset %s could not be found in DQ2 ! Maybe retry ?' % ( datasets[0]) raise ApplicationConfigurationError(None, printout) if job.inputdata and job.inputdata._name == 'ATLASTier3Dataset': environment['DATASETTYPE'] = 'TIER3' # Work around for glite WMS spaced environement variable problem inputbox.append( FileBuffer('athena_options', environment['ATHENA_OPTIONS'] + '\n')) # Write trf parameters trf_params = ' ' for key, value in job.application.trf_parameter.iteritems(): if key == 'dbrelease': environment['DBDATASETNAME'] = value.split(':')[0] environment['DBFILENAME'] = value.split(':')[1] else: trf_params = trf_params + key + '=' + str(value) + ' ' if trf_params != ' ' and job.application.atlas_exetype == 'TRF': _append_file_buffer(inputbox, 'trf_params', [trf_params]) # set RecExCommon options environment['RECEXTYPE'] = job.application.recex_type # event based splitting: set max_events and skip_events if job._getRoot().splitter and hasattr( job._getRoot().splitter, 'numevtsperjob') and job._getRoot().splitter.numevtsperjob > 0: environment['ATHENA_MAX_EVENTS'] = str(job.application.max_events) environment['ATHENA_SKIP_EVENTS'] = str( job.application.skip_events) # pick event if job._getRoot().splitter and job._getRoot( ).inputdata and job._getRoot().inputdata._name == 'EventPicking': #Replace blank space environment['ATHENA_RUN_EVENTS'] = str( job.application.run_event).replace(' ', '') environment['ATHENA_FILTER_POLICY'] = str( job.inputdata.pick_filter_policy) # append a property for monitoring to the jobconfig of subjobs lcg_config = LCGJobConfig(File(exe), inputbox, [], outputbox, environment, [], requirements) lcg_config.monitoring_svc = mc['Athena'] return lcg_config
def prepare(self,app,appsubconfig,appmasterconfig,jobmasterconfig): """prepare the subjob specific configuration""" job = app._getParent() # Returns job or subjob object logger.debug("AthenaLCGRTHandler prepare called, %s", job.id) # prepare inputdata input_files = [] input_guids = [] input_tag_files = [] input_tag_guids = [] input_esd_files = [] input_esd_guids = [] add_files = [] if job.inputdata: # DQ2Dataset, ATLASLocalDataset and ATLASCastorDataset job splitting is done in AthenaSplitterJob if job._getRoot().subjobs: if job.inputdata._name == 'ATLASLocalDataset' or job.inputdata._name == 'ATLASCastorDataset': if not job.inputdata.names: raise ApplicationConfigurationError(None,'No inputdata has been specified.') input_files = job.inputdata.names elif job.inputdata._name == 'ATLASDataset': if not job.inputdata.lfn: raise ApplicationConfigurationError(None,'No inputdata has been specified.') input_files = job.inputdata.lfn elif job.inputdata._name == 'ATLASTier3Dataset': if not job.inputdata.names: raise ApplicationConfigurationError(None,'No inputdata has been specified.') if job.inputdata.names: input_files = job.inputdata.names input_guids = input_files elif job.inputdata._name in [ 'DQ2Dataset', 'AMIDataset', 'EventPicking']: if not job.inputdata.names: raise ApplicationConfigurationError(None,'No inputdata has been specified. Failure in job %s.%s. Dataset %s' %(job._getRoot().id, job.id, job.inputdata.dataset) ) input_guids = job.inputdata.guids input_files = job.inputdata.names if job.inputdata.tag_info: # check for conflicts with TAG_LOCAL or TAG_COPY if job.inputdata.type in ['TAG_LOCAL', 'TAG_COPY']: raise ApplicationConfigurationError(None, "Cannot provide both tag_info and run as '%s'. Please use one or the other!" % job.inputdata.type) # check if FILE_STAGER is used if job.inputdata.type == 'FILE_STAGER': logger.warning("TAG jobs currently can't use the FILE_STAGER. Switching to DQ2_COPY instead.") job.inputdata.type = 'DQ2_COPY' # add additional file info for tags for tag_file in job.inputdata.tag_info: for ref in job.inputdata.tag_info[tag_file]['refs']: add_files.append( ref[1] + ':' + ref[0] + ':' + ref[2] ) if not job.inputdata.type in ['DQ2_LOCAL', 'LFC', 'TAG', 'TNT_LOCAL', 'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER', 'TAG_LOCAL', 'TAG_COPY' ]: job.inputdata.type ='DQ2_LOCAL' if not job.inputdata.datatype in ['DATA', 'MC', 'MuonCalibStream']: job.inputdata.datatype ='MC' else: if job.inputdata._name == 'ATLASCastorDataset': input_files = ATLASCastorDataset.get_filenames(app) elif job.inputdata._name == 'ATLASLocalDataset': input_files = ATLASLocalDataset.get_filenames(app) elif job.inputdata._name == 'ATLASDataset': input_files = ATLASDataset.get_filenames(app) elif job.inputdata._name == 'ATLASTier3Dataset': if job.inputdata.names: input_files = job.inputdata.names input_guids = input_files elif job.inputdata.pfnListFile: logger.info('Loading file names from %s'%job.inputdata.pfnListFile.name) pfnListFile = open(job.inputdata.pfnListFile.name) job.inputdata.names = [ line.strip() for line in pfnListFile] pfnListFile.close() input_files = job.inputdata.names input_guids = input_files else: raise ApplicationConfigurationError(None,'No inputdata has been specified.') elif job.inputdata._name in [ 'DQ2Dataset', 'AMIDataset', 'EventPicking']: if not job.inputdata.type in ['DQ2_LOCAL', 'LFC', 'TAG', 'TNT_LOCAL', 'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER', 'TAG_LOCAL', 'TAG_COPY' ]: job.inputdata.type ='DQ2_LOCAL' if not job.inputdata.datatype in ['DATA', 'MC', 'MuonCalibStream']: job.inputdata.datatype ='MC' input_guids, input_files = _splitlist(job.inputdata.get_contents()) if job.inputdata.tagdataset: input_tag_guids, input_tag_files = _splitlist(job.inputdata.get_tag_contents()) if job.inputdata.use_aodesd_backnav: input_esd_guids, input_esd_files = _splitlist(job.inputdata.get_contents(backnav=True)) job.inputdata.names = input_files job.inputdata.guids = input_guids # prepare outputdata output_location = '' if job.outputdata: if job.outputdata._name=='DQ2OutputDataset': if job.outputdata.location: if type(job.outputdata.location) == str and isDQ2SRMSite(job.outputdata.location): output_location = job.outputdata.location else: logger.warning('Unknown output location %s.',job.outputdata.location) #if job.backend.requirements._name == 'AtlasLCGRequirements': # if job.backend.requirements.cloud: # if whichCloud(output_location) != job.backend.requirements.cloud: # printout = 'Job submission failed ! j.outputdata.location=%s is not in the same cloud as j.backend.requirements.cloud=%s' %(job.outputdata.location, job.backend.requirements.cloud ) # raise ApplicationConfigurationError(None, printout) # if job.backend.requirements.sites: # if whichCloud(output_location) != whichCloud(job.backend.requirements.sites[0]): # printout = 'Job submission failed ! j.outputdata.location=%s is not in the same cloud as j.backend.requirements.sites=%s'%(job.outputdata.location, job.backend.requirements.sites) # raise ApplicationConfigurationError(None,printout ) elif job._getRoot().subjobs and job._getRoot().outputdata.location: if isDQ2SRMSite(job._getRoot().outputdata.location): output_location = job._getRoot().outputdata.location else: logger.warning('Unknown output location %s.',job.getRoot().outputdata.location) logger.debug('Output: %s,%s',output_location, job.outputdata.location) else: if job.outputdata.location: output_location = job.outputdata.location else: try: output_location = config['LCGOutputLocation'] except ConfigError: logger.warning('No default output location specified in the configuration.') if job.outputdata.location: job.outputdata.location = output_location logger.debug('Output: %s,%s',output_location, job.outputdata.location) if job._getRoot().subjobs: jid = "%d.%d" % (job._getRoot().id, job.id) else: jid = "%d" % job.id if output_location and job.outputdata and job.outputdata._name!='DQ2OutputDataset': output_location = os.path.join(output_location, jid) if job.outputdata: # Remove trailing number if job is copied pat = re.compile(r'\/[\d\.]+\/[\d\.]+$') if re.findall(pat,output_location): output_location = re.sub(pat, '', output_location) output_location = os.path.join(output_location, jid) job.outputdata.location = output_location if job.outputdata and job.outputdata._name=='DQ2OutputDataset': # output dataset name from master_prepare output_datasetname = self.output_datasetname output_lfn = self.output_lfn output_jobid = jid # Set subjob datasetname job.outputdata.datasetname=output_datasetname # Set master job datasetname if job._getRoot().subjobs: job._getRoot().outputdata.datasetname=output_datasetname # Create output dataset -> moved to the worker node code ! if not job.outputdata.dataset_exists(output_datasetname): if job._getRoot().subjobs: if job.id==0: #job.outputdata.create_dataset(output_datasetname) pass else: #job.outputdata.create_dataset(output_datasetname) pass if output_location and configDQ2['USE_STAGEOUT_SUBSCRIPTION']: job.outputdata.create_subscription(output_datasetname, output_location) else: if (job._getRoot().subjobs and job.id==0) or not job._getRoot().subjobs: logger.warning("Dataset %s already exists - appending new files to this dataset", output_datasetname) output_location = job.outputdata.get_locations(datasetname=output_datasetname, quiet=True) logger.debug('Output3: %s,%s',output_location, job.outputdata.location) if output_location: output_location = output_location[0] if job._getRoot().subjobs: job._getRoot().outputdata.location=output_location job.outputdata.location=output_location else: job.outputdata.location=output_location logger.debug('Output4: %s,%s',output_location, job.outputdata.location) if getConfig('LCG')['JobLogHandler'] == 'DQ2' and (not job.outputdata or (job.outputdata and job.outputdata._name != 'DQ2OutputDataset')): raise ApplicationConfigurationError(None,'Staging of log files in DQ2 requested, but DQ2 output dataset not specified.') # prepare inputsandbox inputbox = [File(os.path.join(__directory__,'athena-utility.sh')) ] if input_guids: _append_file_buffer(inputbox,'input_guids',input_guids) if input_files: _append_file_buffer(inputbox,'input_files',input_files) if add_files: _append_file_buffer(inputbox,'add_files',add_files) if input_tag_guids: _append_file_buffer(inputbox,'input_tag_guids',input_tag_guids) if input_tag_files: _append_file_buffer(inputbox,'input_tag_files',input_tag_files) if input_esd_guids: _append_file_buffer(inputbox,'input_esd_guids',input_esd_guids) if input_esd_files: _append_file_buffer(inputbox,'input_esd_files',input_esd_files) if job.inputdata and job.inputdata._name == 'DQ2Dataset': for tag_file in job.inputdata.tag_info: if job.inputdata.tag_info[tag_file]['path'] != '': inputbox.append( File( os.path.join( job.inputdata.tag_info[tag_file]['path'], tag_file) ) ) # check for output data given in prepare info if job.outputdata and job.application.atlas_exetype == "ATHENA": for of in job.application.atlas_run_config['output']['alloutputs']: if not of in job.outputdata.outputdata: job.outputdata.outputdata.append(of) if job.outputdata and job.outputdata.outputdata: _append_file_buffer(inputbox,'output_files',job.outputdata.outputdata) elif job.outputdata and not job.outputdata.outputdata: raise ApplicationConfigurationError(None,'j.outputdata.outputdata is empty - Please specify output filename(s).') exe = os.path.join(__directory__,'run-athena-lcg.sh') outputbox = jobmasterconfig.outputbox requirements = jobmasterconfig.requirements.__copy__() environment = jobmasterconfig.env.copy() # If ArgSplitter is used try: if job.application.args: environment['ATHENA_OPTIONS'] = environment['ATHENA_OPTIONS'] + ' ' + ' '.join(job.application.args) if job.application.options: job.application.options = job.application.options + ' ' + job.application.args else: job.application.options = job.application.args except AttributeError: pass if output_location and output_location.find('/castor/cern.ch/grid/atlas/t0')>=0: raise ApplicationConfigurationError(None,'You are try to save the output to TIER0DISK - please use another area !') if not output_location: output_location = '' if configDQ2['USE_STAGEOUT_SUBSCRIPTION']: output_location = '' environment['OUTPUT_LOCATION'] = output_location environment['ATLASOutputDatasetLFC'] = config['ATLASOutputDatasetLFC'] if job.outputdata and job.outputdata._name == 'DQ2OutputDataset': environment['OUTPUT_DATASETNAME'] = output_datasetname environment['OUTPUT_LFN'] = output_lfn environment['OUTPUT_JOBID'] = output_jobid environment['DQ2_URL_SERVER']= configDQ2['DQ2_URL_SERVER'] environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL'] environment['DQ2_OUTPUTFILE_NAMELENGTH'] = configDQ2['OUTPUTFILE_NAMELENGTH'] if job.outputdata.use_shortfilename: environment['GANGA_SHORTFILENAME'] = '1' else: environment['GANGA_SHORTFILENAME'] = '' environment['DQ2_OUTPUT_SPACE_TOKENS']= ':'.join(configDQ2['DQ2_OUTPUT_SPACE_TOKENS']) environment['DQ2_BACKUP_OUTPUT_LOCATIONS']= ':'.join(configDQ2['DQ2_BACKUP_OUTPUT_LOCATIONS']) # CN: extra condition for TNTSplitter if job._getRoot().splitter and job._getRoot().splitter._name == 'TNTJobSplitter': # set up dq2 environment datasetname = job.inputdata.dataset environment['DATASETNAME']= ':'.join(datasetname) environment['DATASETLOCATION'] = ':'.join(job.inputdata.get_locations()) environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER'] environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL'] environment['DATASETTYPE'] = job.inputdata.type environment['DATASETDATATYPE'] = job.inputdata.datatype if job.inputdata.accessprotocol: environment['DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol if job.inputsandbox: inputbox += job.inputsandbox if job.inputdata and job.inputdata._name == 'DQ2Dataset' and job.inputdata.tag_info: if job.inputdata.tag_info[job.inputdata.tag_info.keys()[0] ]['dataset'] != '' and job.inputdata.tag_info[tag_file]['path'] == '': environment['TAG_TYPE'] = 'DQ2' else: environment['TAG_TYPE'] = 'LOCAL' # Fix DATASETNAME env variable for DQ2_COPY mode if job.inputdata and ( job.inputdata._name in [ 'DQ2Dataset', 'AMIDataset', 'EventPicking']) and (job.inputdata.type in ['DQ2_LOCAL', 'DQ2_COPY', 'FILE_STAGER', 'TAG_LOCAL', 'TAG_COPY' ]): if job.inputdata.dataset: from GangaAtlas.Lib.ATLASDataset.DQ2Dataset import resolve_container datasets = resolve_container(job.inputdata.dataset) environment['DATASETNAME'] = datasets[0] try: environment['DATASETLOCATION'] = ':'.join(job.inputdata.get_locations(overlap=False)[ datasets[0] ]) except: printout = 'Job submission failed ! Dataset %s could not be found in DQ2 ! Maybe retry ?' %(datasets[0]) raise ApplicationConfigurationError(None,printout ) if job.inputdata and job.inputdata._name == 'ATLASTier3Dataset': environment['DATASETTYPE'] = 'TIER3' # Work around for glite WMS spaced environement variable problem inputbox.append(FileBuffer('athena_options',environment['ATHENA_OPTIONS']+'\n')) # Write trf parameters trf_params = ' ' for key, value in job.application.trf_parameter.iteritems(): if key == 'dbrelease': environment['DBDATASETNAME'] = value.split(':')[0] environment['DBFILENAME'] = value.split(':')[1] else: trf_params = trf_params + key + '=' + str(value) + ' ' if trf_params!=' ' and job.application.atlas_exetype=='TRF': _append_file_buffer(inputbox,'trf_params', [ trf_params ] ) # set RecExCommon options environment['RECEXTYPE'] = job.application.recex_type # event based splitting: set max_events and skip_events if job._getRoot().splitter and hasattr(job._getRoot().splitter, 'numevtsperjob') and job._getRoot().splitter.numevtsperjob > 0 : environment['ATHENA_MAX_EVENTS'] = str(job.application.max_events) environment['ATHENA_SKIP_EVENTS'] = str(job.application.skip_events) # pick event if job._getRoot().splitter and job._getRoot().inputdata and job._getRoot().inputdata._name == 'EventPicking' : #Replace blank space environment['ATHENA_RUN_EVENTS'] = str(job.application.run_event).replace(' ', '') environment['ATHENA_FILTER_POLICY'] = str(job.inputdata.pick_filter_policy) # append a property for monitoring to the jobconfig of subjobs lcg_config = LCGJobConfig(File(exe), inputbox, [], outputbox, environment, [], requirements) lcg_config.monitoring_svc = mc['Athena'] return lcg_config