def getFileList(procData, DefaultNFilesPerJob): global nonLocalSamples global isLocalSample isLocalSample = False FileList = [] nonMiniAODSamples = [] miniAODSamples = getByLabel(procData, 'miniAOD', []) dsetSamples = getByLabel(procData, 'dset', []) for s in dsetSamples: if ("/MINIAOD" in s): miniAODSamples += [s] else: nonMiniAODSamples += [s] for sample in miniAODSamples: instance = "" if (len(getByLabel(procData, 'dbsURL', '')) > 0): instance = "instance=prod/" + getByLabel(procData, 'dbsURL', '') listSites = DASQuery('site dataset=' + sample + ' ' + instance + ' | grep site.name,site.replica_fraction') IsOnLocalTier = False MaxFraction = 0 FractionOnLocal = -1 for site in listSites.split('\n'): if (localTier == ""): continue try: # MaxFraction = max(MaxFraction, float(site.split()[1].replace('%','')) ) MaxFraction = max( MaxFraction, float(site.split()[2].replace('%', '').replace('"', ''))) except: MaxFraction = max(MaxFraction, 0.0) if (localTier in site): #FractionOnLocal = float(site.split()[1].replace('%','')); FractionOnLocal = float(site.split()[2].replace('%', '').replace( '"', '')) if (FractionOnLocal == MaxFraction): IsOnLocalTier = True print( "Sample is found to be on the local grid tier %s (%f%%) for %s" ) % (localTier, FractionOnLocal, sample) isLocalSample = IsOnLocalTier if (localTier != "" and not IsOnLocalTier): nonLocalSamples += [sample] list = [] if (IsOnLocalTier or "/MINIAOD" in sample): list = [] if (DatasetFileDB == "DAS"): list = DASQuery('file dataset=' + sample + ' ' + instance).split() #print list # print "\n" elif (DatasetFileDB == "DBS"): curlCommand = "curl -ks --key $X509_USER_PROXY --cert $X509_USER_PROXY -X GET " dbsPath = "https://cmsweb.cern.ch/dbs/prod/global/DBSReader" sedTheList = ' | sed \"s#logical_file_name#\\nlogical_file_name#g\" | sed \"s#logical_file_name\': \'##g\" | sed \"s#\'}, {u\'##g\" | sed \"s#\'}]##g\" | grep store ' list = commands.getstatusoutput(initialCommand + curlCommand + '"' + dbsPath + '/files?dataset=' + sample + '"' + sedTheList)[1].split() list = [x for x in list if ".root" in x ] #make sure that we only consider root files for i in range(0, len(list)): if IsOnLocalTier: if (hostname.find("iihe.ac.be") != -1): list[ i] = "dcap://maite.iihe.ac.be/pnfs/iihe/cms/ph/sc4" + list[ i] elif (hostname.find("ucl.ac.be") != -1): list[i] = "/storage/data/cms" + list[i] else: list[i] = "root://eoscms//eos/cms" + list[i] #print list[i] else: list[i] = "root://cms-xrd-global.cern.ch/" + list[ i] #works worldwide #list[i] = "root://xrootd-cms.infn.it/"+list[i] #optimal for EU side #list[i] = "root://cmsxrootd.fnal.gov/"+list[i] #optimal for US side else: print "Processing private local sample: " + sample list = storeTools.fillFromStore(sample, 0, -1, True) list = storeTools.keepOnlyFilesFromGoodRun( list, os.path.expandvars(getByLabel(procData, 'lumiMask', ''))) # print len(list) split = getByLabel(procData, 'split', -1) if (split > 0): NFilesPerJob = max(1, len(list) / split) else: NFilesPerJob = DefaultNFilesPerJob if ((len(list) / NFilesPerJob) > 100): NFilesPerJob = len(list) / 100 #make sure the number of jobs isn't too big for g in range(0, len(list), NFilesPerJob): # print g groupList = '' for f in list[g:g + NFilesPerJob]: groupList += '"' + f + '",\\n' #print f FileList.append(groupList) for sample in nonMiniAODSamples: print "Processing a non EDM/miniAOD sample in : " + opt.indir + '/' + origdtag + '_' + str( segment) + '.root' for segment in range(0, split): eventsFile = opt.indir + '/' + origdtag + '_' + str( segment) + '.root' if (eventsFile.find('/store/') == 0): eventsFile = commands.getstatusoutput('cmsPfn ' + eventsFile)[1] FileList.append('"' + eventsFile + '"') return FileList
def getFileList(procData,DefaultNFilesPerJob): global nonLocalSamples global isLocalSample isLocalSample = False FileList = []; miniAODSamples = getByLabel(procData,'miniAOD','') isMINIAODDataset = ("/MINIAOD" in getByLabel(procData,'dset','')) or ("amagitte" in getByLabel(procData,'dset','')) if(isMINIAODDataset or len(getByLabel(procData,'miniAOD',''))>0): instance = "" if(len(getByLabel(procData,'dbsURL',''))>0): instance = "instance=prod/"+ getByLabel(procData,'dbsURL','') listSites = commands.getstatusoutput('das_client.py --query="site dataset='+getByLabel(procData,'dset','') + ' ' + instance + ' | grep site.name,site.dataset_fraction " --limit=0')[1] IsOnLocalTier=False for site in listSites.split('\n'): if(localTier != "" and localTier in site and '100.00%' in site): IsOnLocalTier=True print ("Sample is found to be on the local grid tier (%s): %s") %(localTier, site) break isLocalSample = IsOnLocalTier if(localTier != "" and not IsOnLocalTier): nonLocalSamples += [getByLabel(procData,'dset','')] list = [] if(IsOnLocalTier or isMINIAODDataset): list = [] if(DatasetFileDB=="DAS"): list = commands.getstatusoutput('das_client.py --query="file dataset='+getByLabel(procData,'dset','') + ' ' + instance + '" --limit=0')[1].split() elif(DatasetFileDB=="DBS"): curlCommand="curl -ks --key $X509_USER_PROXY --cert $X509_USER_PROXY -X GET " dbsPath="https://cmsweb.cern.ch/dbs/prod/global/DBSReader" sedTheList=' | sed \"s#logical_file_name#\\nlogical_file_name#g\" | sed \"s#logical_file_name\': \'##g\" | sed \"s#\'}, {u\'##g\" | sed \"s#\'}]##g\" | grep store ' list = commands.getstatusoutput(initialCommand + curlCommand+'"'+dbsPath+'/files?dataset='+getByLabel(procData,'dset','')+'"'+sedTheList)[1].split() list = [x for x in list if ".root" in x] #make sure that we only consider root files for i in range(0,len(list)): if IsOnLocalTier: if (hostname.find("iihe.ac.be")!=-1): list[i] = "dcap://maite.iihe.ac.be:/pnfs/iihe/cms/ph/sc4"+list[i] elif(hostname.find("ucl.ac.be" )!=-1): list[i] = "/storage/data/cms"+list[i] else: list[i] = "root://eoscms//eos/cms"+list[i] else: list[i] = "root://cms-xrd-global.cern.ch/"+list[i] #works worldwide #list[i] = "root://xrootd-cms.infn.it/"+list[i] #optimal for EU side #list[i] = "root://cmsxrootd.fnal.gov/"+list[i] #optimal for US side elif(len(getByLabel(procData,'miniAOD',''))>0): print "Processing private local sample: " + getByLabel(procData,'miniAOD','') list = storeTools.fillFromStore(getByLabel(procData,'miniAOD',''),0,-1,True); else: print "Processing an unknown type of sample (assuming it's a private local sample): " + getByLabel(procData,'miniAOD','') list = storeTools.fillFromStore(getByLabel(procData,'miniAOD',''),0,-1,True); list = storeTools.keepOnlyFilesFromGoodRun(list, getByLabel(procData,'lumiMask','')) split=getByLabel(procData,'split',-1) if(split>0): NFilesPerJob = max(1,len(list)/split) else: NFilesPerJob = DefaultNFilesPerJob for g in range(0, len(list), NFilesPerJob): groupList = '' for f in list[g:g+NFilesPerJob]: groupList += '"'+f+'",\\n'; FileList.append(groupList) else: print "Processing a non EDM/miniAOD sample in : " + opt.indir + '/' + origdtag + '_' + str(segment) + '.root' for segment in range(0,split) : eventsFile=opt.indir + '/' + origdtag + '_' + str(segment) + '.root' if(eventsFile.find('/store/')==0) : eventsFile = commands.getstatusoutput('cmsPfn ' + eventsFile)[1] FileList.append('"'+eventsFile+'"') return FileList
def getFileList(procData,DefaultNFilesPerJob): global nonLocalSamples global isLocalSample isLocalSample = False FileList = []; nonMiniAODSamples = [] miniAODSamples = getByLabel(procData,'miniAOD',[]) dsetSamples = getByLabel(procData,'dset',[]) for s in dsetSamples: if("/MINIAOD" in s): miniAODSamples+=[s] else: nonMiniAODSamples+=[s] for sample in miniAODSamples: instance = "" if(len(getByLabel(procData,'dbsURL',''))>0): instance = "instance=prod/"+ getByLabel(procData,'dbsURL','') listSites = DASQuery('site dataset='+sample + ' ' + instance + ' | grep site.name,site.replica_fraction') IsOnLocalTier=False MaxFraction=0; FractionOnLocal=-1; for site in listSites.split('\n'): if(localTier==""):continue; try: MaxFraction = max(MaxFraction, float(site.split()[2].replace('%','').replace('"','')) ) except: MaxFraction = max(MaxFraction, 0.0); if(localTier in site): FractionOnLocal = float(site.split()[2].replace('%','').replace('"','')); if(FractionOnLocal == MaxFraction): IsOnLocalTier=True print ("Sample is found to be on the local grid tier %s (%f%%) for %s") %(localTier, FractionOnLocal, sample) isLocalSample = IsOnLocalTier if(localTier != "" and not IsOnLocalTier): nonLocalSamples += [sample] list = [] if(IsOnLocalTier or "/MINIAOD" in sample): list = [] if(DatasetFileDB=="DAS"): list = DASQuery('file dataset='+sample + ' ' + instance).split() elif(DatasetFileDB=="DBS"): curlCommand="curl -ks --key $X509_USER_PROXY --cert $X509_USER_PROXY -X GET " dbsPath="https://cmsweb.cern.ch/dbs/prod/global/DBSReader" sedTheList=' | sed \"s#logical_file_name#\\nlogical_file_name#g\" | sed \"s#logical_file_name\': \'##g\" | sed \"s#\'}, {u\'##g\" | sed \"s#\'}]##g\" | grep store ' list = commands.getstatusoutput(initialCommand + curlCommand+'"'+dbsPath+'/files?dataset='+sample+'"'+sedTheList)[1].split() list = [x for x in list if ".root" in x] #make sure that we only consider root files for i in range(0,len(list)): if IsOnLocalTier: if (hostname.find("iihe.ac.be")!=-1): list[i] = "dcap://maite.iihe.ac.be/pnfs/iihe/cms/ph/sc4"+list[i] elif(hostname.find("ucl.ac.be" )!=-1): list[i] = "/storage/data/cms"+list[i] else: list[i] = "root://eoscms//eos/cms"+list[i] else: list[i] = "root://cms-xrd-global.cern.ch/"+list[i] #works worldwide #list[i] = "root://xrootd-cms.infn.it/"+list[i] #optimal for EU side #list[i] = "root://cmsxrootd.fnal.gov/"+list[i] #optimal for US side else: print "Processing private local sample: " + sample list = storeTools.fillFromStore(sample,0,-1,True); list = storeTools.keepOnlyFilesFromGoodRun(list, os.path.expandvars(getByLabel(procData,'lumiMask',''))) split=getByLabel(procData,'split',-1) if(split>0): NFilesPerJob = max(1,len(list)/split) else: NFilesPerJob = DefaultNFilesPerJob if((len(list)/NFilesPerJob)>100):NFilesPerJob=len(list)/100; #make sure the number of jobs isn't too big for g in range(0, len(list), NFilesPerJob): groupList = '' for f in list[g:g+NFilesPerJob]: groupList += '"'+f+'",\\n'; FileList.append(groupList) for sample in nonMiniAODSamples: split=getByLabel(procData,'split',-1) for segment in range(0,split) : print "Processing a non EDM/miniAOD sample in : " + opt.indir + '/' + origdtag + '_' + str(segment) + '.root' eventsFile=opt.indir + '/' + origdtag + '_' + str(segment) + '.root' if(eventsFile.find('/store/')==0) : eventsFile = commands.getstatusoutput('cmsPfn ' + eventsFile)[1] FileList.append('"'+eventsFile+'"') return FileList