def getFileList(procData,DefaultNFilesPerJob): global nonLocalSamples global isLocalSample isLocalSample = False FileList = []; miniAODSamples = getByLabel(procData,'miniAOD','') isMINIAODDataset = ("/MINIAOD" in getByLabel(procData,'dset','')) or ("amagitte" in getByLabel(procData,'dset','')) if(isMINIAODDataset or len(getByLabel(procData,'miniAOD',''))>0): instance = "" if(len(getByLabel(procData,'dbsURL',''))>0): instance = "instance=prod/"+ getByLabel(procData,'dbsURL','') listSites = commands.getstatusoutput('das_client.py --query="site dataset='+getByLabel(procData,'dset','') + ' ' + instance + ' | grep site.name,site.dataset_fraction " --limit=0')[1] IsOnLocalTier=False for site in listSites.split('\n'): if(localTier != "" and localTier in site and '100.00%' in site): IsOnLocalTier=True print ("Sample is found to be on the local grid tier (%s): %s") %(localTier, site) break isLocalSample = IsOnLocalTier if(localTier != "" and not IsOnLocalTier): nonLocalSamples += [getByLabel(procData,'dset','')] list = [] if(IsOnLocalTier or isMINIAODDataset): list = [] if(DatasetFileDB=="DAS"): list = commands.getstatusoutput('das_client.py --query="file dataset='+getByLabel(procData,'dset','') + ' ' + instance + '" --limit=0')[1].split() elif(DatasetFileDB=="DBS"): curlCommand="curl -ks --key $X509_USER_PROXY --cert $X509_USER_PROXY -X GET " dbsPath="https://cmsweb.cern.ch/dbs/prod/global/DBSReader" sedTheList=' | sed \"s#logical_file_name#\\nlogical_file_name#g\" | sed \"s#logical_file_name\': \'##g\" | sed \"s#\'}, {u\'##g\" | sed \"s#\'}]##g\" | grep store ' list = commands.getstatusoutput(initialCommand + curlCommand+'"'+dbsPath+'/files?dataset='+getByLabel(procData,'dset','')+'"'+sedTheList)[1].split() list = [x for x in list if ".root" in x] #make sure that we only consider root files for i in range(0,len(list)): if IsOnLocalTier: if (hostname.find("iihe.ac.be")!=-1): list[i] = "dcap://maite.iihe.ac.be:/pnfs/iihe/cms/ph/sc4"+list[i] elif(hostname.find("ucl.ac.be" )!=-1): list[i] = "/storage/data/cms"+list[i] else: list[i] = "root://eoscms//eos/cms"+list[i] else: list[i] = "root://cms-xrd-global.cern.ch/"+list[i] #works worldwide #list[i] = "root://xrootd-cms.infn.it/"+list[i] #optimal for EU side #list[i] = "root://cmsxrootd.fnal.gov/"+list[i] #optimal for US side elif(len(getByLabel(procData,'miniAOD',''))>0): print "Processing private local sample: " + getByLabel(procData,'miniAOD','') list = storeTools.fillFromStore(getByLabel(procData,'miniAOD',''),0,-1,True); else: print "Processing an unknown type of sample (assuming it's a private local sample): " + getByLabel(procData,'miniAOD','') list = storeTools.fillFromStore(getByLabel(procData,'miniAOD',''),0,-1,True); list = storeTools.keepOnlyFilesFromGoodRun(list, getByLabel(procData,'lumiMask','')) split=getByLabel(procData,'split',-1) if(split>0): NFilesPerJob = max(1,len(list)/split) else: NFilesPerJob = DefaultNFilesPerJob for g in range(0, len(list), NFilesPerJob): groupList = '' for f in list[g:g+NFilesPerJob]: groupList += '"'+f+'",\\n'; FileList.append(groupList) else: print "Processing a non EDM/miniAOD sample in : " + opt.indir + '/' + origdtag + '_' + str(segment) + '.root' for segment in range(0,split) : eventsFile=opt.indir + '/' + origdtag + '_' + str(segment) + '.root' if(eventsFile.find('/store/')==0) : eventsFile = commands.getstatusoutput('cmsPfn ' + eventsFile)[1] FileList.append('"'+eventsFile+'"') return FileList
if (opt.onlytag != 'all'): itag = d['dtag'] if (itag.find(opt.onlytag) < 0): continue inputdir = opt.inDir + "/" + d['dtag'] print str(d['dtag']) + " --> " + inputdir if (opt.duplicates): dirToClean = inputdir if (dirToClean.find('/storage/data/cms/store/')): dirToClean = dirToClean.replace( '/storage/data/cms/store/', '/storage_rw/data/cms/store/') #Hack for Louvain T2 removeDuplicates(dirToClean) filenames = fillFromStore(inputdir, 0, -1, False) nfiles = len(filenames) filenames = addPrefixSuffixToFileList(" '", filenames, "',") split = getByLabel(d, 'split', 1) NFilesToMerge = nfiles // split NFilesToMergeRemains = nfiles % split startFile = 0 endFile = 0 for segment in range(0, split): startFile = endFile endFile = endFile + NFilesToMerge if (NFilesToMergeRemains > 0): endFile += 1 NFilesToMergeRemains -= 1
def getFileList(procData, DefaultNFilesPerJob): global nonLocalSamples global isLocalSample isLocalSample = False FileList = [] nonMiniAODSamples = [] miniAODSamples = getByLabel(procData, 'miniAOD', []) dsetSamples = getByLabel(procData, 'dset', []) for s in dsetSamples: if ("/MINIAOD" in s): miniAODSamples += [s] else: nonMiniAODSamples += [s] for sample in miniAODSamples: instance = "" if (len(getByLabel(procData, 'dbsURL', '')) > 0): instance = "instance=prod/" + getByLabel(procData, 'dbsURL', '') listSites = DASQuery('site dataset=' + sample + ' ' + instance + ' | grep site.name,site.replica_fraction') IsOnLocalTier = False MaxFraction = 0 FractionOnLocal = -1 for site in listSites.split('\n'): if (localTier == ""): continue try: # MaxFraction = max(MaxFraction, float(site.split()[1].replace('%','')) ) MaxFraction = max( MaxFraction, float(site.split()[2].replace('%', '').replace('"', ''))) except: MaxFraction = max(MaxFraction, 0.0) if (localTier in site): #FractionOnLocal = float(site.split()[1].replace('%','')); FractionOnLocal = float(site.split()[2].replace('%', '').replace( '"', '')) if (FractionOnLocal == MaxFraction): IsOnLocalTier = True print( "Sample is found to be on the local grid tier %s (%f%%) for %s" ) % (localTier, FractionOnLocal, sample) isLocalSample = IsOnLocalTier if (localTier != "" and not IsOnLocalTier): nonLocalSamples += [sample] list = [] if (IsOnLocalTier or "/MINIAOD" in sample): list = [] if (DatasetFileDB == "DAS"): list = DASQuery('file dataset=' + sample + ' ' + instance).split() #print list # print "\n" elif (DatasetFileDB == "DBS"): curlCommand = "curl -ks --key $X509_USER_PROXY --cert $X509_USER_PROXY -X GET " dbsPath = "https://cmsweb.cern.ch/dbs/prod/global/DBSReader" sedTheList = ' | sed \"s#logical_file_name#\\nlogical_file_name#g\" | sed \"s#logical_file_name\': \'##g\" | sed \"s#\'}, {u\'##g\" | sed \"s#\'}]##g\" | grep store ' list = commands.getstatusoutput(initialCommand + curlCommand + '"' + dbsPath + '/files?dataset=' + sample + '"' + sedTheList)[1].split() list = [x for x in list if ".root" in x ] #make sure that we only consider root files for i in range(0, len(list)): if IsOnLocalTier: if (hostname.find("iihe.ac.be") != -1): list[ i] = "dcap://maite.iihe.ac.be/pnfs/iihe/cms/ph/sc4" + list[ i] elif (hostname.find("ucl.ac.be") != -1): list[i] = "/storage/data/cms" + list[i] else: list[i] = "root://eoscms//eos/cms" + list[i] #print list[i] else: list[i] = "root://cms-xrd-global.cern.ch/" + list[ i] #works worldwide #list[i] = "root://xrootd-cms.infn.it/"+list[i] #optimal for EU side #list[i] = "root://cmsxrootd.fnal.gov/"+list[i] #optimal for US side else: print "Processing private local sample: " + sample list = storeTools.fillFromStore(sample, 0, -1, True) list = storeTools.keepOnlyFilesFromGoodRun( list, os.path.expandvars(getByLabel(procData, 'lumiMask', ''))) # print len(list) split = getByLabel(procData, 'split', -1) if (split > 0): NFilesPerJob = max(1, len(list) / split) else: NFilesPerJob = DefaultNFilesPerJob if ((len(list) / NFilesPerJob) > 100): NFilesPerJob = len(list) / 100 #make sure the number of jobs isn't too big for g in range(0, len(list), NFilesPerJob): # print g groupList = '' for f in list[g:g + NFilesPerJob]: groupList += '"' + f + '",\\n' #print f FileList.append(groupList) for sample in nonMiniAODSamples: print "Processing a non EDM/miniAOD sample in : " + opt.indir + '/' + origdtag + '_' + str( segment) + '.root' for segment in range(0, split): eventsFile = opt.indir + '/' + origdtag + '_' + str( segment) + '.root' if (eventsFile.find('/store/') == 0): eventsFile = commands.getstatusoutput('cmsPfn ' + eventsFile)[1] FileList.append('"' + eventsFile + '"') return FileList
for d in data : #tag veto if(opt.onlytag!='all') : itag=d['dtag'] if(itag.find(opt.onlytag)<0) : continue inputdir = opt.inDir+"/"+d['dtag']; print str(d['dtag'])+" --> "+inputdir if(opt.duplicates): dirToClean = inputdir if(dirToClean.find('/storage/data/cms/store/')): dirToClean = dirToClean.replace('/storage/data/cms/store/', '/storage_rw/data/cms/store/') #Hack for Louvain T2 removeDuplicates(dirToClean); filenames=fillFromStore(inputdir,0,-1,False) nfiles=len(filenames) filenames=addPrefixSuffixToFileList(" '", filenames, "',") split=getByLabel(d,'split',1) NFilesToMerge = nfiles//split NFilesToMergeRemains = nfiles%split startFile = 0 endFile = 0 for segment in range(0,split) : startFile = endFile endFile = endFile + NFilesToMerge if(NFilesToMergeRemains>0): endFile+=1 NFilesToMergeRemains-=1
for d in data : #tag veto if(opt.onlytag!='all') : itag=d['dtag'] if(itag.find(opt.onlytag)<0) : continue inputdir = opt.inDir+"/"+d['dtag']; print str(d['dtag'])+" --> "+inputdir if(opt.duplicates): dirToClean = inputdir if(dirToClean.find('/storage/data/cms/store/')): dirToClean = dirToClean.replace('/storage/data/cms/store/', '/storage_rw/data/cms/store/') #Hack for Louvain T2 removeDuplicates(dirToClean); filenames=LaunchOnCondor.natural_sort(fillFromStore(inputdir,0,-1,False)) nfiles=len(filenames) filenames=addPrefixSuffixToFileList(" '", filenames, "',") split=getByLabel(d,'split',1) NFilesToMerge = nfiles//split NFilesToMergeRemains = nfiles%split startFile = 0 endFile = 0 for segment in range(0,split) : startFile = endFile endFile = endFile + NFilesToMerge if(NFilesToMergeRemains>0): endFile+=1 NFilesToMergeRemains-=1
alldirs = [] try: alldirs = d[opt.dirtag] except: continue #tag veto if (opt.onlytag != 'all'): itag = d['dtag'] if (itag.find(opt.onlytag) < 0): continue idir = 0 for dir in alldirs: idir = idir + 1 filenames = fillFromStore(dir, 0, -1, False) nfiles = len(filenames) njobs = 1 sleep = 0 if (opt.fperjob > 0): njobs = nfiles // opt.fperjob if (nfiles % opt.fperjob > 0): njobs = njobs + 1 sleep = 2 * opt.fperjob / 6 #special case for event generation if dir == "none": opt.fperjob = d['npersplit'] njobs = d['split'] #substitute some job parameters by json file parameters
def getFileList(procData,DefaultNFilesPerJob): global nonLocalSamples global isLocalSample isLocalSample = False FileList = []; nonMiniAODSamples = [] miniAODSamples = getByLabel(procData,'miniAOD',[]) dsetSamples = getByLabel(procData,'dset',[]) for s in dsetSamples: if("/MINIAOD" in s): miniAODSamples+=[s] else: nonMiniAODSamples+=[s] for sample in miniAODSamples: instance = "" if(len(getByLabel(procData,'dbsURL',''))>0): instance = "instance=prod/"+ getByLabel(procData,'dbsURL','') listSites = DASQuery('site dataset='+sample + ' ' + instance + ' | grep site.name,site.replica_fraction') IsOnLocalTier=False MaxFraction=0; FractionOnLocal=-1; for site in listSites.split('\n'): if(localTier==""):continue; try: MaxFraction = max(MaxFraction, float(site.split()[2].replace('%','').replace('"','')) ) except: MaxFraction = max(MaxFraction, 0.0); if(localTier in site): FractionOnLocal = float(site.split()[2].replace('%','').replace('"','')); if(FractionOnLocal == MaxFraction): IsOnLocalTier=True print ("Sample is found to be on the local grid tier %s (%f%%) for %s") %(localTier, FractionOnLocal, sample) isLocalSample = IsOnLocalTier if(localTier != "" and not IsOnLocalTier): nonLocalSamples += [sample] list = [] if(IsOnLocalTier or "/MINIAOD" in sample): list = [] if(DatasetFileDB=="DAS"): list = DASQuery('file dataset='+sample + ' ' + instance).split() elif(DatasetFileDB=="DBS"): curlCommand="curl -ks --key $X509_USER_PROXY --cert $X509_USER_PROXY -X GET " dbsPath="https://cmsweb.cern.ch/dbs/prod/global/DBSReader" sedTheList=' | sed \"s#logical_file_name#\\nlogical_file_name#g\" | sed \"s#logical_file_name\': \'##g\" | sed \"s#\'}, {u\'##g\" | sed \"s#\'}]##g\" | grep store ' list = commands.getstatusoutput(initialCommand + curlCommand+'"'+dbsPath+'/files?dataset='+sample+'"'+sedTheList)[1].split() list = [x for x in list if ".root" in x] #make sure that we only consider root files for i in range(0,len(list)): if IsOnLocalTier: if (hostname.find("iihe.ac.be")!=-1): list[i] = "dcap://maite.iihe.ac.be/pnfs/iihe/cms/ph/sc4"+list[i] elif(hostname.find("ucl.ac.be" )!=-1): list[i] = "/storage/data/cms"+list[i] else: list[i] = "root://eoscms//eos/cms"+list[i] else: list[i] = "root://cms-xrd-global.cern.ch/"+list[i] #works worldwide #list[i] = "root://xrootd-cms.infn.it/"+list[i] #optimal for EU side #list[i] = "root://cmsxrootd.fnal.gov/"+list[i] #optimal for US side else: print "Processing private local sample: " + sample list = storeTools.fillFromStore(sample,0,-1,True); list = storeTools.keepOnlyFilesFromGoodRun(list, os.path.expandvars(getByLabel(procData,'lumiMask',''))) split=getByLabel(procData,'split',-1) if(split>0): NFilesPerJob = max(1,len(list)/split) else: NFilesPerJob = DefaultNFilesPerJob if((len(list)/NFilesPerJob)>100):NFilesPerJob=len(list)/100; #make sure the number of jobs isn't too big for g in range(0, len(list), NFilesPerJob): groupList = '' for f in list[g:g+NFilesPerJob]: groupList += '"'+f+'",\\n'; FileList.append(groupList) for sample in nonMiniAODSamples: split=getByLabel(procData,'split',-1) for segment in range(0,split) : print "Processing a non EDM/miniAOD sample in : " + opt.indir + '/' + origdtag + '_' + str(segment) + '.root' eventsFile=opt.indir + '/' + origdtag + '_' + str(segment) + '.root' if(eventsFile.find('/store/')==0) : eventsFile = commands.getstatusoutput('cmsPfn ' + eventsFile)[1] FileList.append('"'+eventsFile+'"') return FileList
itag = d['dtag'] if (itag.find(opt.onlytag) < 0): continue inputdir = opt.inDir + "/" + d['dtag'] print str(d['dtag']) + " --> " + inputdir if (opt.duplicates): dirToClean = inputdir if (dirToClean.find('/storage/data/cms/store/')): dirToClean = dirToClean.replace( '/storage/data/cms/store/', '/storage_rw/data/cms/store/') #Hack for Louvain T2 removeDuplicates(dirToClean) filenames = LaunchOnCondor.natural_sort( fillFromStore(inputdir, 0, -1, False)) nfiles = len(filenames) filenames = addPrefixSuffixToFileList(" '", filenames, "',") split = getByLabel(d, 'split', 1) NFilesToMerge = nfiles // split NFilesToMergeRemains = nfiles % split startFile = 0 endFile = 0 for segment in range(0, split): startFile = endFile endFile = endFile + NFilesToMerge if (NFilesToMergeRemains > 0): endFile += 1 NFilesToMergeRemains -= 1
if(xsec>0 and not isdata) : for ibr in br : xsec = xsec*ibr split=getByLabel(d,'split',1) FileList = []; miniAODSamples = getByLabel(d,'miniAOD','') if(("/MINIAOD" in getByLabel(d,'dset','')) or len(getByLabel(d,'miniAOD',''))>0): listSites = commands.getstatusoutput('das_client.py --query="site dataset='+getByLabel(d,'dset','') + '" --limit=0')[1] list = [] if(localTier in listSites and "CERN" in localTier): list = commands.getstatusoutput('das_client.py --query="file dataset='+getByLabel(d,'dset','') + '" --limit=0')[1].split() for i in range(0,len(list)): list[i] = "root://eoscms//eos/cms"+list[i] elif(len(getByLabel(d,'miniAOD',''))>0): list = storeTools.fillFromStore(getByLabel(d,'miniAOD',''),0,-1,True); elif("/MINIAODSIM" in getByLabel(d,'dset','')): if(not kInitDone): print "You are going to run on a sample over grid using the AAA protocol, it is therefore needed to initialize your grid certificate" os.system('mkdir -p ~/x509_user_proxy; voms-proxy-init -voms cms -valid 192:00 --out ~/x509_user_proxy/proxy')#all must be done in the same command to avoid environement problems. Note that the first sourcing is only needed in Louvain initialCommand = 'export X509_USER_PROXY=~/x509_user_proxy/proxy;voms-proxy-init --noregen;' kInitDone = True print("Use das_client.py to list files from : " + getByLabel(d,'dset','') ) list = commands.getstatusoutput('das_client.py --query="file dataset='+getByLabel(d,'dset','') + '" --limit=0')[1].split() for i in range(0,len(list)): list[i] = "root://cms-xrd-global.cern.ch/"+list[i] else: list = storeTools.fillFromStore(getByLabel(d,'miniAOD',''),0,-1,True);
alldirs=[] try : alldirs = d[opt.dirtag] except: continue #tag veto if(opt.onlytag!='all') : itag=d['dtag'] if(itag.find(opt.onlytag)<0) : continue idir=0 for dir in alldirs: idir=idir+1 filenames=fillFromStore(dir,0,-1,False) nfiles=len(filenames) njobs=1 sleep=0; if(opt.fperjob>0) : njobs=nfiles//opt.fperjob if(nfiles%opt.fperjob>0):njobs = njobs+1 sleep=2*opt.fperjob/6 #special case for event generation if dir=="none" : opt.fperjob=d['npersplit'] njobs=d['split'] #substitute some job parameters by json file parameters