def master_prepare(self, app, appconfig): """Prepare the master job""" job = app._getParent() # Returns job or subjob object logger.debug('AthenaLCGRTHandler master_prepare called: %s', job.id) if job._getRoot().subjobs: jobid = "%d" % (job._getRoot().id) else: jobid = "%d" % job.id # Generate output dataset name if job.outputdata: if job.outputdata._name == 'DQ2OutputDataset': dq2_datasetname = job.outputdata.datasetname dq2_isGroupDS = job.outputdata.isGroupDS dq2_groupname = job.outputdata.groupname else: dq2_datasetname = '' dq2_isGroupDS = False dq2_groupname = '' self.output_datasetname, self.output_lfn = dq2outputdatasetname( dq2_datasetname, jobid, dq2_isGroupDS, dq2_groupname) # Check if all sites are in the same cloud if job.backend.requirements.sites: firstCloud = whichCloud(job.backend.requirements.sites[0]) for site in job.backend.requirements.sites: cloud = whichCloud(site) if cloud != firstCloud: printout = 'Job submission failed ! Site specified with j.backend.requirements.sites=%s are not in the same cloud !' % ( job.backend.requirements.sites) raise ApplicationConfigurationError(None, printout) #this next for loop instructs ganga to use option_files that live in the appropriate shared directory (the job #will already have been prepared #(if is_prepared is True, then we've most likely submitted a job via GangaRobot. We know what we're doing. #if app.is_prepared is not True: # for position in xrange(len(app.option_file)): # app.option_file[position]=File(os.path.join(app.is_prepared.name,os.path.basename(app.option_file[position].name))) # Expand Athena jobOptions if not app.atlas_exetype in ['EXE']: athena_options = ' '.join([ os.path.basename(opt_file.name) for opt_file in app.option_file ]) #if app.options: athena_options = ' -c ' + app.options + ' ' + athena_options if app.options: athena_options = app.options + ' ' + athena_options inputbox = [File(opt_file.name) for opt_file in app.option_file] else: athena_options = ' '.join([ os.path.basename(opt_file.name) for opt_file in app.option_file ]) inputbox = [] athena_usersetupfile = os.path.basename(app.user_setupfile.name) # prepare input sandbox inputbox.append(File(os.path.join(__directory__, 'athena-utility.sh'))) if job.inputdata and job.inputdata._name == "AMIDataset" and job.inputdata.goodRunListXML.name != '': inputbox.append(File(job.inputdata.goodRunListXML.name)) if job.inputdata and job.inputdata._name == 'ATLASDataset': if job.inputdata.lfc: _append_files(inputbox, 'ganga-stagein-lfc.py') else: _append_files(inputbox, 'ganga-stagein.py') if app.user_area.name: #we will now use the user_area that's stored in the users shared directory if app.is_prepared is not True: tmp_user_name = os.path.join( os.path.join(shared_path, app.is_prepared.name), os.path.basename(app.user_area.name)) inputbox.append(File(tmp_user_name)) else: inputbox.append(File(app.user_area.name)) #if app.group_area.name: inputbox += [ File(app.group_area.name) ] if app.group_area.name and str(app.group_area.name).find('http') < 0: #we will now use the group_area that's stored in the users shared directory if app.is_prepared is not True: tmp_group_name = os.path.join( os.path.join(shared_path, app.is_prepared.name), os.path.basename(app.group_area.name)) inputbox.append(File(tmp_group_name)) else: inputbox.append(File(app.group_area.name)) if app.user_setupfile.name: inputbox.append(File(app.user_setupfile.name)) # CN: added TNTJobSplitter clause if job.inputdata and (job.inputdata._name in [ 'DQ2Dataset', 'AMIDataset', 'EventPicking' ]) or (job._getRoot().splitter and job._getRoot().splitter._name == 'TNTJobSplitter'): _append_files(inputbox, 'ganga-stage-in-out-dq2.py', 'dq2_get', 'dq2info.tar.gz') if job.inputdata and job.inputdata.type == 'LFC' and not ( job._getRoot().splitter and job._getRoot().splitter._name == 'TNTJobSplitter'): _append_files(inputbox, 'dq2_get_old') if job.inputdata and job.inputdata._name == 'ATLASTier3Dataset': _append_files(inputbox, 'ganga-stage-in-out-dq2.py', 'dq2info.tar.gz') ## insert more scripts to inputsandbox for FileStager if job.inputdata and (job.inputdata._name in [ 'DQ2Dataset', 'AMIDataset', 'EventPicking' ]) and job.inputdata.type in ['FILE_STAGER']: _append_files(inputbox, 'make_filestager_joption.py', 'dm_util.py', 'fs-copy.py') #_append_files(inputbox,'make_filestager_joption.py','dm_util.py') if job.outputdata and job.outputdata._name == 'DQ2OutputDataset': #if not job.outputdata.location: # raise ApplicationConfigurationError(None,'j.outputdata.location is empty - Please specify a DQ2 output location - job not submitted !') if not 'ganga-stage-in-out-dq2.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox, 'ganga-stage-in-out-dq2.py') _append_files(inputbox, 'ganga-joboption-parse.py') if not 'dq2info.tar.gz' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox, 'dq2info.tar.gz') # add libDCache.so and libRFIO.so to fix broken access in athena 12.0.x if not 'ganga-stage-in-out-dq2.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox, 'ganga-stage-in-out-dq2.py') if not 'dq2tracerreport.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox, 'dq2tracerreport.py') if not 'db_dq2localid.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox, 'db_dq2localid.py') if not 'getstats.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox, 'getstats.py') if str(app.atlas_release).find('12.') >= 0: _append_files(inputbox, 'libDCache.so', 'libRFIO.so', 'libdcap.so') elif str(app.atlas_release).find('13.') >= 0: _append_files(inputbox, 'libdcap.so') else: _append_files(inputbox, 'libdcap.so') if job.inputsandbox: inputbox += job.inputsandbox # prepare environment if not app.atlas_release: raise ApplicationConfigurationError( None, 'j.application.atlas_release is empty - No ATLAS release version found. Run prepare() or specify a version explictly.' ) environment = { 'ATLAS_RELEASE': app.atlas_release, 'ATHENA_OPTIONS': athena_options, 'ATHENA_USERSETUPFILE': athena_usersetupfile, 'ATLAS_PROJECT': app.atlas_project, 'ATLAS_EXETYPE': app.atlas_exetype, 'GANGA_VERSION': configSystem['GANGA_VERSION'] } environment['DCACHE_RA_BUFFER'] = config['DCACHE_RA_BUFFER'] if app.atlas_environment: for var in app.atlas_environment: try: vars = re.match("^(\w+)=(.*)", var).group(1) value = re.match("^(\w+)=(.*)", var).group(2) environment[vars] = value except: logger.warning( 'Athena.atlas_environment variable not correctly configured: %s', var) pass if app.atlas_production and app.atlas_release.find( '12.') >= 0 and app.atlas_project != 'AtlasPoint1': temp_atlas_production = re.sub('\.', '_', app.atlas_production) prod_url = config[ 'PRODUCTION_ARCHIVE_BASEURL'] + '/AtlasProduction_' + temp_atlas_production + '_noarch.tar.gz' logger.info('Using Production cache from: %s', prod_url) environment['ATLAS_PRODUCTION_ARCHIVE'] = prod_url if app.atlas_production and (app.atlas_project == 'AtlasPoint1' or app.atlas_release.find('12.') <= 0): environment['ATLAS_PRODUCTION'] = app.atlas_production if app.user_area.name: environment['USER_AREA'] = os.path.basename(app.user_area.name) #if app.group_area.name: environment['GROUP_AREA']=os.path.basename(app.group_area.name) if app.group_area.name: if str(app.group_area.name).find('http') >= 0: environment['GROUP_AREA_REMOTE'] = str(app.group_area.name) else: environment['GROUP_AREA'] = os.path.basename( app.group_area.name) if app.max_events: if (app.max_events != -999) and (app.max_events > -2): environment['ATHENA_MAX_EVENTS'] = str(app.max_events) if job.backend.requirements._name == 'AtlasLCGRequirements': requirements = AtlasLCGRequirements() elif job.backend.requirements._name == 'AtlasCREAMRequirements': requirements = AtlasCREAMRequirements() else: requirements = AtlasLCGRequirements() if job.inputdata and job.inputdata._name == 'ATLASDataset': if job.inputdata.lfc: environment['GANGA_LFC_HOST'] = job.inputdata.lfc if 'ganga-stage-in-out-dq2.py' in [ os.path.basename(file.name) for file in inputbox ]: environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER'] environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL'] if job.inputdata and (job.inputdata._name in ['DQ2Dataset', 'AMIDataset', 'EventPicking']): if job.inputdata.dataset: datasetname = job.inputdata.dataset environment['DATASETNAME'] = ':'.join(datasetname) environment['DATASETLOCATION'] = ':'.join( job.inputdata.get_locations()) environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER'] environment['DQ2_URL_SERVER_SSL'] = configDQ2[ 'DQ2_URL_SERVER_SSL'] environment['DATASETTYPE'] = job.inputdata.type if job.inputdata.failover: environment['DATASETFAILOVER'] = 1 environment['DATASETDATATYPE'] = job.inputdata.datatype if job.inputdata.accessprotocol: environment[ 'DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol if job.inputdata.check_md5sum: environment['GANGA_CHECKMD5SUM'] = 1 else: raise ApplicationConfigurationError( None, 'j.inputdata.dataset is empty - DQ2 dataset name needs to be specified.' ) # Raise submission exception if (not job.backend.CE and not (job.backend.requirements._name in [ 'AtlasLCGRequirements', 'AtlasCREAMRequirements' ] and job.backend.requirements.sites) and not (job.splitter and job.splitter._name == 'DQ2JobSplitter') and not (job.splitter and job.splitter._name == 'TNTJobSplitter') and not (job.splitter and job.splitter._name == 'AnaTaskSplitterJob') and not (job.splitter and job.splitter._name == 'ATLASTier3Splitter')): raise ApplicationConfigurationError( None, 'Job submission failed ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !' ) if job.inputdata.match_ce_all or job.inputdata.min_num_files > 0: raise ApplicationConfigurationError( None, 'Job submission failed ! Usage of j.inputdata.match_ce_all or min_num_files is obsolete ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !' ) #if job.inputdata.number_of_files and (job.splitter and job.splitter._name == 'DQ2JobSplitter'): # allLoc = job.inputdata.get_locations(complete=0) # completeLoc = job.inputdata.get_locations(complete=1) # incompleteLoc = [] # for loc in allLoc: # if loc not in completeLoc: # incompleteLoc.append(loc) # if incompleteLoc: # raise ApplicationConfigurationError(None,'Job submission failed ! Dataset is incomplete ! Usage of j.inputdata.number_of_files and DQ2JobSplitter is not allowed for incomplete datasets !') # Add TAG datasetname if job.inputdata.tagdataset: environment['TAGDATASETNAME'] = ':'.join( job.inputdata.tagdataset) # prepare job requirements requirementsSoftware = getLCGReleaseTag(app) releaseBlacklist = job.backend.requirements.list_release_blacklist() if requirementsSoftware and requirementsSoftware[0] in releaseBlacklist: logger.error( 'The athena release %s you are using is not recommended for distributed analysis !', requirementsSoftware[0]) logger.error( 'For details, please have a look at https://twiki.cern.ch/twiki/bin/view/Atlas/DAGangaFAQ#Athena_Versions_Issues or ask for help and advice on the distributed analysis help list !' ) requirements.software = requirementsSoftware else: requirements.software = requirementsSoftware # Set athena architecture: 32 or 64 bit environment['ATLAS_ARCH'] = '32' if requirementsSoftware and requirementsSoftware[0].find( 'x86_64') >= 0: environment['ATLAS_ARCH'] = '64' # add software requirement of dq2clients if job.inputdata and job.inputdata._name in [ 'DQ2Dataset', 'AMIDataset', 'EventPicking' ] and job.inputdata.type in [ 'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER' ] or app.atlas_dbrelease or configDQ2['USE_ACCESS_INFO']: try: # override the default one if the dq2client_version is presented # in the job backend's requirements object dq2client_version = job.backend.requirements.dq2client_version except AttributeError: pass if dq2client_version: #requirements.software += ['VO-atlas-dq2clients-%s' % dq2client_version] environment['DQ2_CLIENT_VERSION'] = dq2client_version if app.atlas_dbrelease: if not app._name == "AthenaTask" and not ( job.splitter and (job.splitter._name == 'DQ2JobSplitter' or job.splitter._name == 'ATLASTier3Splitter')): raise ApplicationConfigurationError( None, 'Job submission failed ! Please use DQ2JobSplitter if you are using j.application.atlas_dbrelease !' ) try: environment['ATLAS_DBRELEASE'] = app.atlas_dbrelease.split( ':')[0] environment['ATLAS_DBFILE'] = app.atlas_dbrelease.split(':')[1] except: logger.warning( 'Problems with the atlas_dbrelease configuration') # Fill AtlasLCGRequirements access mode if configDQ2['USE_ACCESS_INFO']: logger.warning( "config['DQ2']['USE_ACCESS_INFO']=True - You are using the improved worker node input access method - make sure you are using at least athena version 15.0.0 or the latest FileStager tag !" ) import pickle, StringIO #if job.backend.requirements.sites: info = job.backend.requirements.list_access_info() fileHandle = StringIO.StringIO() pickle.dump(info, fileHandle) fileHandle.seek(-1) lines = fileHandle.read() inputbox.append(FileBuffer('access_info.pickle', lines)) _append_files(inputbox, 'access_info.py') if not 'make_filestager_joption.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox, 'make_filestager_joption.py', 'dm_util.py', 'fs-copy.py') # jobscript exe = os.path.join(__directory__, 'run-athena-lcg.sh') # output sandbox outputbox = [ 'output_guids', 'output_location', 'output_data', 'stats.pickle' ] ## retrieve the FileStager log if configDQ2['USE_ACCESS_INFO'] or ( job.inputdata and (job.inputdata._name in ['DQ2Dataset', 'AMIDataset', 'EventPicking']) and job.inputdata.type in ['FILE_STAGER']): outputbox += ['FileStager.out', 'FileStager.err'] if job.outputsandbox: outputbox += job.outputsandbox # Switch for DEBUG print-out in logfiles if app.useNoDebugLogs: environment['GANGA_LOG_DEBUG'] = '0' else: environment['GANGA_LOG_DEBUG'] = '1' return LCGJobConfig(File(exe), inputbox, [], outputbox, environment, [], requirements)
def whichCloudExt(site): if site.startswith("NDGF"): return "NG" return whichCloud(site)
def whichCloudExt(site): if site.startswith("NDGF"): return "NG" return whichCloud(site)
def master_prepare( self, app, appconfig): """Prepare the master job""" job = app._getParent() # Returns job or subjob object logger.debug('TagPrepareLCGRTHandler master_prepare called: %s', job.id ) self.username = gridProxy.identity(safe=True) # Check if all sites are in the same cloud if job.backend.requirements.sites: firstCloud = whichCloud(job.backend.requirements.sites[0]) for site in job.backend.requirements.sites: cloud = whichCloud(site) if cloud != firstCloud: printout = 'Job submission failed ! Site specified with j.backend.requirements.sites=%s are not in the same cloud !' %(job.backend.requirements.sites) raise ApplicationConfigurationError(None,printout ) # prepare input sandbox inputbox = [ ( File(os.path.join(__athdirectory__,'athena-utility.sh')) ), ( File(os.path.join(__directory__,'get_tag_info.py')))] # CN: added TNTJobSplitter clause if job.inputdata and job.inputdata._name == 'DQ2Dataset': _append_files(inputbox,os.path.join(__athdirectory__, 'ganga-stage-in-out-dq2.py'), os.path.join(__athdirectory__, 'dq2_get'), os.path.join(__athdirectory__, 'dq2info.tar.gz')) ## insert more scripts to inputsandbox for FileStager if job.inputdata and job.inputdata._name == 'DQ2Dataset' and job.inputdata.type in ['FILE_STAGER']: _append_files(inputbox,'make_filestager_joption.py','dm_util.py','fs-copy.py') #_append_files(inputbox,'make_filestager_joption.py','dm_util.py') # add libDCache.so and libRFIO.so to fix broken access in athena 12.0.x if not 'ganga-stage-in-out-dq2.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox, os.path.join(__athdirectory__, 'ganga-stage-in-out-dq2.py')) if not 'dq2tracerreport.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox, os.path.join(__athdirectory__,'dq2tracerreport.py')) if not 'db_dq2localid.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox, os.path.join(__athdirectory__, 'db_dq2localid.py')) if not 'getstats.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox, os.path.join(__athdirectory__, 'getstats.py')) _append_files(inputbox,os.path.join(__athdirectory__, 'libdcap.so')) if job.inputsandbox: inputbox += job.inputsandbox # prepare environment environment={ 'MAXNUMREFS' : str(app.max_num_refs), 'STREAM_REF' : app.stream_ref, 'ATLAS_RELEASE' : app.atlas_release, 'ATHENA_OPTIONS' : '', 'ATHENA_USERSETUPFILE' : '', 'ATLAS_PROJECT' : '', 'ATLAS_EXETYPE' : 'ATHENA', 'GANGA_VERSION' : configSystem['GANGA_VERSION'] } environment['DCACHE_RA_BUFFER'] = config['DCACHE_RA_BUFFER'] requirements = AtlasLCGRequirements() if job.inputdata and job.inputdata._name == 'DQ2Dataset': if job.inputdata.dataset: datasetname = job.inputdata.dataset environment['DATASETNAME'] = ':'.join(datasetname) environment['DATASETLOCATION'] = ':'.join(job.inputdata.get_locations()) environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER'] environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL'] environment['DATASETTYPE'] = job.inputdata.type if job.inputdata.failover: environment['DATASETFAILOVER'] = 1 environment['DATASETDATATYPE'] = job.inputdata.datatype if job.inputdata.accessprotocol: environment['DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol if job.inputdata.check_md5sum: environment['GANGA_CHECKMD5SUM'] = 1 else: raise ApplicationConfigurationError(None,'j.inputdata.dataset is empty - DQ2 dataset name needs to be specified.') # Raise submission exception if (not job.backend.CE and not (job.backend.requirements._name == 'AtlasLCGRequirements' and job.backend.requirements.sites) and not (job.splitter and job.splitter._name == 'DQ2JobSplitter') and not (job.splitter and job.splitter._name == 'TNTJobSplitter') and not (job.splitter and job.splitter._name == 'AnaTaskSplitterJob')): raise ApplicationConfigurationError(None,'Job submission failed ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !') if job.inputdata.match_ce_all or job.inputdata.min_num_files>0: raise ApplicationConfigurationError(None,'Job submission failed ! Usage of j.inputdata.match_ce_all or min_num_files is obsolete ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !') #if job.inputdata.number_of_files and (job.splitter and job.splitter._name == 'DQ2JobSplitter'): # allLoc = job.inputdata.get_locations(complete=0) # completeLoc = job.inputdata.get_locations(complete=1) # incompleteLoc = [] # for loc in allLoc: # if loc not in completeLoc: # incompleteLoc.append(loc) # if incompleteLoc: # raise ApplicationConfigurationError(None,'Job submission failed ! Dataset is incomplete ! Usage of j.inputdata.number_of_files and DQ2JobSplitter is not allowed for incomplete datasets !') # prepare job requirements cmtconfig = app.atlas_cmtconfig if not cmtconfig in ['i686-slc4-gcc34-opt', 'i686-slc5-gcc43-opt']: cmtconfig = 'i686-slc4-gcc34-opt' requirements.software = ['VO-atlas-offline-%s-%s' %(app.atlas_release, cmtconfig )] # add software requirement of dq2clients if job.inputdata and job.inputdata.type in [ 'DQ2_DOWNLOAD', 'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER']: try: # override the default one if the dq2client_version is presented # in the job backend's requirements object dq2client_version = job.backend.requirements.dq2client_version except AttributeError: pass if dq2client_version: requirements.software += ['VO-atlas-dq2clients-%s' % dq2client_version] environment['DQ2_CLIENT_VERSION'] = dq2client_version # jobscript exe = os.path.join(__directory__,'run-tagprepare-lcg.sh') #exe = os.path.join(__directory__,'get_tag_info.py') # output sandbox outputbox = [ 'taginfo.pkl' ] if job.outputsandbox: outputbox += job.outputsandbox return LCGJobConfig(File(exe),inputbox,[],outputbox,environment,[],requirements)
def master_prepare( self, app, appconfig): """Prepare the master job""" job = app._getParent() # Returns job or subjob object logger.debug('AthenaLCGRTHandler master_prepare called: %s', job.id ) if job._getRoot().subjobs: jobid = "%d" % (job._getRoot().id) else: jobid = "%d" % job.id # Generate output dataset name if job.outputdata: if job.outputdata._name=='DQ2OutputDataset': dq2_datasetname = job.outputdata.datasetname dq2_isGroupDS = job.outputdata.isGroupDS dq2_groupname = job.outputdata.groupname else: dq2_datasetname = '' dq2_isGroupDS = False dq2_groupname = '' self.output_datasetname, self.output_lfn = dq2outputdatasetname(dq2_datasetname, jobid, dq2_isGroupDS, dq2_groupname) # Check if all sites are in the same cloud if job.backend.requirements.sites: firstCloud = whichCloud(job.backend.requirements.sites[0]) for site in job.backend.requirements.sites: cloud = whichCloud(site) if cloud != firstCloud: printout = 'Job submission failed ! Site specified with j.backend.requirements.sites=%s are not in the same cloud !' %(job.backend.requirements.sites) raise ApplicationConfigurationError(printout ) #this next for loop instructs ganga to use option_files that live in the appropriate shared directory (the job #will already have been prepared #(if is_prepared is True, then we've most likely submitted a job via GangaRobot. We know what we're doing. #if app.is_prepared is not True: # for position in xrange(len(app.option_file)): # app.option_file[position]=File(os.path.join(app.is_prepared.name,os.path.basename(app.option_file[position].name))) # Expand Athena jobOptions if not app.atlas_exetype in ['EXE']: athena_options = ' '.join([os.path.basename(opt_file.name) for opt_file in app.option_file]) #if app.options: athena_options = ' -c ' + app.options + ' ' + athena_options if app.options: athena_options = app.options + ' ' + athena_options inputbox = [ File(opt_file.name) for opt_file in app.option_file ] else: athena_options = ' '.join([os.path.basename(opt_file.name) for opt_file in app.option_file]) inputbox = [] athena_usersetupfile = os.path.basename(app.user_setupfile.name) # prepare input sandbox inputbox.append( File(os.path.join(__directory__,'athena-utility.sh')) ) if app.user_area.name: #we will now use the user_area that's stored in the users shared directory if app.is_prepared is not True: tmp_user_name = os.path.join(os.path.join(shared_path,app.is_prepared.name),os.path.basename(app.user_area.name)) inputbox.append(File(tmp_user_name)) else: inputbox.append(File(app.user_area.name)) #if app.group_area.name: inputbox += [ File(app.group_area.name) ] if app.group_area.name and str(app.group_area.name).find('http')<0: #we will now use the group_area that's stored in the users shared directory if app.is_prepared is not True: tmp_group_name = os.path.join(os.path.join(shared_path,app.is_prepared.name),os.path.basename(app.group_area.name)) inputbox.append(File(tmp_group_name)) else: inputbox.append(File(app.group_area.name)) if app.user_setupfile.name: inputbox.append(File(app.user_setupfile.name)) # CN: added TNTJobSplitter clause if job.inputdata and (job.inputdata._name in [ 'DQ2Dataset', 'EventPicking' ] ) or (job._getRoot().splitter and job._getRoot().splitter._name == 'TNTJobSplitter'): _append_files(inputbox,'ganga-stage-in-out-dq2.py','dq2_get','dq2info.tar.gz') if job.inputdata and job.inputdata.type == 'LFC' and not (job._getRoot().splitter and job._getRoot().splitter._name == 'TNTJobSplitter'): _append_files(inputbox,'dq2_get_old') if job.inputdata and job.inputdata._name == 'ATLASTier3Dataset': _append_files(inputbox,'ganga-stage-in-out-dq2.py','dq2info.tar.gz') ## insert more scripts to inputsandbox for FileStager if job.inputdata and (job.inputdata._name in [ 'DQ2Dataset', 'EventPicking']) and job.inputdata.type in ['FILE_STAGER']: _append_files(inputbox,'make_filestager_joption.py','dm_util.py','fs-copy.py') #_append_files(inputbox,'make_filestager_joption.py','dm_util.py') if job.outputdata and job.outputdata._name == 'DQ2OutputDataset': #if not job.outputdata.location: # raise ApplicationConfigurationError('j.outputdata.location is empty - Please specify a DQ2 output location - job not submitted !') if not 'ganga-stage-in-out-dq2.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox,'ganga-stage-in-out-dq2.py') _append_files(inputbox,'ganga-joboption-parse.py') if not 'dq2info.tar.gz' in [os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox,'dq2info.tar.gz') # add libDCache.so and libRFIO.so to fix broken access in athena 12.0.x if not 'ganga-stage-in-out-dq2.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox, 'ganga-stage-in-out-dq2.py') if not 'dq2tracerreport.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox, 'dq2tracerreport.py') if not 'db_dq2localid.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox, 'db_dq2localid.py') if not 'getstats.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox, 'getstats.py') if str(app.atlas_release).find('12.')>=0: _append_files(inputbox, 'libDCache.so','libRFIO.so','libdcap.so') elif str(app.atlas_release).find('13.')>=0: _append_files(inputbox,'libdcap.so') else: _append_files(inputbox,'libdcap.so') if job.inputsandbox: inputbox += job.inputsandbox # prepare environment if not app.atlas_release: raise ApplicationConfigurationError('j.application.atlas_release is empty - No ATLAS release version found. Run prepare() or specify a version explictly.') environment={ 'ATLAS_RELEASE' : app.atlas_release, 'ATHENA_OPTIONS' : athena_options, 'ATHENA_USERSETUPFILE' : athena_usersetupfile, 'ATLAS_PROJECT' : app.atlas_project, 'ATLAS_EXETYPE' : app.atlas_exetype, 'GANGA_VERSION' : configSystem['GANGA_VERSION'] } environment['DCACHE_RA_BUFFER'] = config['DCACHE_RA_BUFFER'] if app.atlas_environment: for var in app.atlas_environment: try: vars = re.match("^(\w+)=(.*)",var).group(1) value = re.match("^(\w+)=(.*)",var).group(2) environment[vars]=value except: logger.warning('Athena.atlas_environment variable not correctly configured: %s', var) pass if app.atlas_production and app.atlas_release.find('12.')>=0 and app.atlas_project != 'AtlasPoint1': temp_atlas_production = re.sub('\.','_',app.atlas_production) prod_url = config['PRODUCTION_ARCHIVE_BASEURL']+'/AtlasProduction_'+ temp_atlas_production +'_noarch.tar.gz' logger.info('Using Production cache from: %s', prod_url) environment['ATLAS_PRODUCTION_ARCHIVE'] = prod_url if app.atlas_production and (app.atlas_project == 'AtlasPoint1' or app.atlas_release.find('12.')<=0): environment['ATLAS_PRODUCTION'] = app.atlas_production if app.user_area.name: environment['USER_AREA'] = os.path.basename(app.user_area.name) #if app.group_area.name: environment['GROUP_AREA']=os.path.basename(app.group_area.name) if app.group_area.name: if str(app.group_area.name).find('http')>=0: environment['GROUP_AREA_REMOTE'] = str(app.group_area.name) else: environment['GROUP_AREA'] = os.path.basename(app.group_area.name) if app.max_events: if (app.max_events != -999) and (app.max_events > -2): environment['ATHENA_MAX_EVENTS'] = str(app.max_events) if job.backend.requirements._name == 'AtlasLCGRequirements': requirements = AtlasLCGRequirements() elif job.backend.requirements._name == 'AtlasCREAMRequirements': requirements = AtlasCREAMRequirements() else: requirements = AtlasLCGRequirements() if 'ganga-stage-in-out-dq2.py' in [ os.path.basename(file.name) for file in inputbox ]: environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER'] environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL'] if job.inputdata and (job.inputdata._name in [ 'DQ2Dataset', 'EventPicking']): if job.inputdata.dataset: datasetname = job.inputdata.dataset environment['DATASETNAME'] = ':'.join(datasetname) environment['DATASETLOCATION'] = ':'.join(job.inputdata.get_locations()) environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER'] environment['DQ2_URL_SERVER_SSL'] = configDQ2['DQ2_URL_SERVER_SSL'] environment['DATASETTYPE'] = job.inputdata.type if job.inputdata.failover: environment['DATASETFAILOVER'] = 1 environment['DATASETDATATYPE'] = job.inputdata.datatype if job.inputdata.accessprotocol: environment['DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol if job.inputdata.check_md5sum: environment['GANGA_CHECKMD5SUM'] = 1 else: raise ApplicationConfigurationError('j.inputdata.dataset is empty - DQ2 dataset name needs to be specified.') # Raise submission exception if (not job.backend.CE and not (job.backend.requirements._name in [ 'AtlasLCGRequirements', 'AtlasCREAMRequirements' ] and job.backend.requirements.sites) and not (job.splitter and job.splitter._name == 'DQ2JobSplitter') and not (job.splitter and job.splitter._name == 'TNTJobSplitter') and not (job.splitter and job.splitter._name == 'AnaTaskSplitterJob') and not (job.splitter and job.splitter._name == 'ATLASTier3Splitter')): raise ApplicationConfigurationError('Job submission failed ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !') if job.inputdata.match_ce_all or job.inputdata.min_num_files>0: raise ApplicationConfigurationError('Job submission failed ! Usage of j.inputdata.match_ce_all or min_num_files is obsolete ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !') #if job.inputdata.number_of_files and (job.splitter and job.splitter._name == 'DQ2JobSplitter'): # allLoc = job.inputdata.get_locations(complete=0) # completeLoc = job.inputdata.get_locations(complete=1) # incompleteLoc = [] # for loc in allLoc: # if loc not in completeLoc: # incompleteLoc.append(loc) # if incompleteLoc: # raise ApplicationConfigurationError(None,'Job submission failed ! Dataset is incomplete ! Usage of j.inputdata.number_of_files and DQ2JobSplitter is not allowed for incomplete datasets !') # prepare job requirements requirementsSoftware = getLCGReleaseTag( app ) releaseBlacklist = job.backend.requirements.list_release_blacklist() if requirementsSoftware and requirementsSoftware[0] in releaseBlacklist: logger.error('The athena release %s you are using is not recommended for distributed analysis !', requirementsSoftware[0]) logger.error('For details, please have a look at https://twiki.cern.ch/twiki/bin/view/Atlas/DAGangaFAQ#Athena_Versions_Issues or ask for help and advice on the distributed analysis help list !') requirements.software = requirementsSoftware else: requirements.software = requirementsSoftware # Set athena architecture: 32 or 64 bit environment['ATLAS_ARCH'] = '32' if requirementsSoftware and requirementsSoftware[0].find('x86_64')>=0: environment['ATLAS_ARCH'] = '64' # add software requirement of dq2clients if job.inputdata and job.inputdata._name in [ 'DQ2Dataset', 'EventPicking' ] and job.inputdata.type in [ 'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER'] or app.atlas_dbrelease or configDQ2['USE_ACCESS_INFO']: try: # override the default one if the dq2client_version is presented # in the job backend's requirements object dq2client_version = job.backend.requirements.dq2client_version except AttributeError: pass if dq2client_version: #requirements.software += ['VO-atlas-dq2clients-%s' % dq2client_version] environment['DQ2_CLIENT_VERSION'] = dq2client_version if app.atlas_dbrelease: if not app._name == "AthenaTask" and not (job.splitter and (job.splitter._name == 'DQ2JobSplitter' or job.splitter._name == 'ATLASTier3Splitter')): raise ApplicationConfigurationError('Job submission failed ! Please use DQ2JobSplitter if you are using j.application.atlas_dbrelease !') try: environment['ATLAS_DBRELEASE'] = app.atlas_dbrelease.split(':')[0] environment['ATLAS_DBFILE'] = app.atlas_dbrelease.split(':')[1] except: logger.warning('Problems with the atlas_dbrelease configuration') # Fill AtlasLCGRequirements access mode if configDQ2['USE_ACCESS_INFO']: logger.warning("config['DQ2']['USE_ACCESS_INFO']=True - You are using the improved worker node input access method - make sure you are using at least athena version 15.0.0 or the latest FileStager tag !" ) import pickle, StringIO #if job.backend.requirements.sites: info = job.backend.requirements.list_access_info() fileHandle = StringIO.StringIO() pickle.dump(info,fileHandle) fileHandle.seek(-1) lines = fileHandle.read() inputbox.append(FileBuffer( 'access_info.pickle', lines)) _append_files(inputbox, 'access_info.py') if not 'make_filestager_joption.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox,'make_filestager_joption.py','dm_util.py','fs-copy.py') # jobscript exe = os.path.join(__directory__,'run-athena-lcg.sh') # output sandbox outputbox = [ 'output_guids', 'output_location', 'output_data', 'stats.pickle' ] ## retrieve the FileStager log if configDQ2['USE_ACCESS_INFO'] or (job.inputdata and (job.inputdata._name in [ 'DQ2Dataset', 'EventPicking']) and job.inputdata.type in ['FILE_STAGER']): outputbox += ['FileStager.out', 'FileStager.err'] if job.outputsandbox: outputbox += job.outputsandbox # Switch for DEBUG print-out in logfiles if app.useNoDebugLogs: environment['GANGA_LOG_DEBUG'] = '0' else: environment['GANGA_LOG_DEBUG'] = '1' return LCGJobConfig(File(exe),inputbox,[],outputbox,environment,[],requirements)
def master_prepare(self, app, appconfig): """Prepare the master job""" job = app._getParent() # Returns job or subjob object logger.debug('TagPrepareLCGRTHandler master_prepare called: %s', job.id) self.username = gridProxy.identity(safe=True) # Check if all sites are in the same cloud if job.backend.requirements.sites: firstCloud = whichCloud(job.backend.requirements.sites[0]) for site in job.backend.requirements.sites: cloud = whichCloud(site) if cloud != firstCloud: printout = 'Job submission failed ! Site specified with j.backend.requirements.sites=%s are not in the same cloud !' % ( job.backend.requirements.sites) raise ApplicationConfigurationError(None, printout) # prepare input sandbox inputbox = [(File(os.path.join(__athdirectory__, 'athena-utility.sh'))), (File(os.path.join(__directory__, 'get_tag_info.py')))] # CN: added TNTJobSplitter clause if job.inputdata and job.inputdata._name == 'DQ2Dataset': _append_files( inputbox, os.path.join(__athdirectory__, 'ganga-stage-in-out-dq2.py'), os.path.join(__athdirectory__, 'dq2_get'), os.path.join(__athdirectory__, 'dq2info.tar.gz')) ## insert more scripts to inputsandbox for FileStager if job.inputdata and job.inputdata._name == 'DQ2Dataset' and job.inputdata.type in [ 'FILE_STAGER' ]: _append_files(inputbox, 'make_filestager_joption.py', 'dm_util.py', 'fs-copy.py') #_append_files(inputbox,'make_filestager_joption.py','dm_util.py') # add libDCache.so and libRFIO.so to fix broken access in athena 12.0.x if not 'ganga-stage-in-out-dq2.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files( inputbox, os.path.join(__athdirectory__, 'ganga-stage-in-out-dq2.py')) if not 'dq2tracerreport.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox, os.path.join(__athdirectory__, 'dq2tracerreport.py')) if not 'db_dq2localid.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox, os.path.join(__athdirectory__, 'db_dq2localid.py')) if not 'getstats.py' in [ os.path.basename(file.name) for file in inputbox ]: _append_files(inputbox, os.path.join(__athdirectory__, 'getstats.py')) _append_files(inputbox, os.path.join(__athdirectory__, 'libdcap.so')) if job.inputsandbox: inputbox += job.inputsandbox # prepare environment environment = { 'MAXNUMREFS': str(app.max_num_refs), 'STREAM_REF': app.stream_ref, 'ATLAS_RELEASE': app.atlas_release, 'ATHENA_OPTIONS': '', 'ATHENA_USERSETUPFILE': '', 'ATLAS_PROJECT': '', 'ATLAS_EXETYPE': 'ATHENA', 'GANGA_VERSION': configSystem['GANGA_VERSION'] } environment['DCACHE_RA_BUFFER'] = config['DCACHE_RA_BUFFER'] requirements = AtlasLCGRequirements() if job.inputdata and job.inputdata._name == 'DQ2Dataset': if job.inputdata.dataset: datasetname = job.inputdata.dataset environment['DATASETNAME'] = ':'.join(datasetname) environment['DATASETLOCATION'] = ':'.join( job.inputdata.get_locations()) environment['DQ2_URL_SERVER'] = configDQ2['DQ2_URL_SERVER'] environment['DQ2_URL_SERVER_SSL'] = configDQ2[ 'DQ2_URL_SERVER_SSL'] environment['DATASETTYPE'] = job.inputdata.type if job.inputdata.failover: environment['DATASETFAILOVER'] = 1 environment['DATASETDATATYPE'] = job.inputdata.datatype if job.inputdata.accessprotocol: environment[ 'DQ2_LOCAL_PROTOCOL'] = job.inputdata.accessprotocol if job.inputdata.check_md5sum: environment['GANGA_CHECKMD5SUM'] = 1 else: raise ApplicationConfigurationError( None, 'j.inputdata.dataset is empty - DQ2 dataset name needs to be specified.' ) # Raise submission exception if (not job.backend.CE and not (job.backend.requirements._name == 'AtlasLCGRequirements' and job.backend.requirements.sites) and not (job.splitter and job.splitter._name == 'DQ2JobSplitter') and not (job.splitter and job.splitter._name == 'TNTJobSplitter') and not (job.splitter and job.splitter._name == 'AnaTaskSplitterJob')): raise ApplicationConfigurationError( None, 'Job submission failed ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !' ) if job.inputdata.match_ce_all or job.inputdata.min_num_files > 0: raise ApplicationConfigurationError( None, 'Job submission failed ! Usage of j.inputdata.match_ce_all or min_num_files is obsolete ! Please use DQ2JobSplitter or specify j.backend.requirements.sites or j.backend.requirements.CE !' ) #if job.inputdata.number_of_files and (job.splitter and job.splitter._name == 'DQ2JobSplitter'): # allLoc = job.inputdata.get_locations(complete=0) # completeLoc = job.inputdata.get_locations(complete=1) # incompleteLoc = [] # for loc in allLoc: # if loc not in completeLoc: # incompleteLoc.append(loc) # if incompleteLoc: # raise ApplicationConfigurationError(None,'Job submission failed ! Dataset is incomplete ! Usage of j.inputdata.number_of_files and DQ2JobSplitter is not allowed for incomplete datasets !') # prepare job requirements cmtconfig = app.atlas_cmtconfig if not cmtconfig in ['i686-slc4-gcc34-opt', 'i686-slc5-gcc43-opt']: cmtconfig = 'i686-slc4-gcc34-opt' requirements.software = [ 'VO-atlas-offline-%s-%s' % (app.atlas_release, cmtconfig) ] # add software requirement of dq2clients if job.inputdata and job.inputdata.type in [ 'DQ2_DOWNLOAD', 'TNT_DOWNLOAD', 'DQ2_COPY', 'FILE_STAGER' ]: try: # override the default one if the dq2client_version is presented # in the job backend's requirements object dq2client_version = job.backend.requirements.dq2client_version except AttributeError: pass if dq2client_version: requirements.software += [ 'VO-atlas-dq2clients-%s' % dq2client_version ] environment['DQ2_CLIENT_VERSION'] = dq2client_version # jobscript exe = os.path.join(__directory__, 'run-tagprepare-lcg.sh') #exe = os.path.join(__directory__,'get_tag_info.py') # output sandbox outputbox = ['taginfo.pkl'] if job.outputsandbox: outputbox += job.outputsandbox return LCGJobConfig(File(exe), inputbox, [], outputbox, environment, [], requirements)