def check_dataset_completion( originalDS, filteredDS, treeNameOrig=None, treeNameFilt=None, histNameOrig=None, histNameFilt=None, fileKeyOrig=None, fileKeyFilt=None ) : assert treeNameOrig is not None or histNameOrig is not None, 'Must provide a histogram or tree name for original samples' assert treeNameFilt is not None or histNameFilt is not None, 'Must provide a histogram or tree name for filtered samples' #assert not (treeNameOrig is not None and histNameOrig is not None), 'Must provide a histogram or tree name for original samples, not both' #assert not (treeNameFilt is not None and histNameFilt is not None), 'Must provide a histogram or tree name for filtered samples, not both' orig_nevt_tree = 0 orig_nevt_hist = 0 filt_nevt_tree = 0 filt_nevt_hist = 0 for top, dirs, files, sizes in eosutil.walk_eos( originalDS ) : for file in files : if fileKeyOrig is not None and not file.count(fileKeyOrig) : continue ofile = ROOT.TFile.Open( 'root://eoscms/' + top+'/'+file ) if treeNameOrig is not None : otree = ofile.Get(treeNameOrig) orig_nevt_tree += otree.GetEntries() if histNameOrig is not None : ohist = ofile.Get(histNameOrig) orig_nevt_hist += ohist.GetBinContent(1) if not orig_nevt_tree and not orig_nevt_hist : print 'Did not get any original events. Check the path' return for top, dirs, files, sizes in eosutil.walk_eos( filteredDS ) : for file in files : if fileKeyFilt is not None and not file.count(fileKeyFilt) : continue ofile = ROOT.TFile.Open( 'root://eoscms/' + top+'/'+file ) if treeNameFilt is not None : otree = ofile.Get(treeNameFilt) filt_nevt_tree += otree.GetEntries() if histNameFilt is not None : ohist = ofile.Get(histNameFilt) filt_nevt_hist += ohist.GetBinContent(1) return orig_nevt_tree, orig_nevt_hist, filt_nevt_tree, filt_nevt_hist
def collect_input_files_eos( filesDir, filekey='.root' ) : logging.info('Getting list of input files from eos in %s' %filesDir) input_files = [] for top, dirs, files, sizes in eosutil.walk_eos(filesDir) : for f in files : if f.count(filekey) > 0 : input_files.append(top+'/'+f) return input_files
def main(): mc_path = '%s/MC/%s/Ntuple' % (options.baseDir, options.version) data_path = '%s/Data/%s/Ntuple' % (options.baseDir, options.version) data_files = [] for top, dirs, files, sizes in walk_eos(data_path): for f in files: data_files.append('%s/%s' % (top, f)) mc_files = [] for top, dirs, files, sizes in walk_eos(mc_path): for f in files: mc_files.append('%s/%s' % (top, f)) all_files = data_files + mc_files outfile = open('files.pickle', 'w') pickle.dump(all_files, outfile) outfile.close()
def main() : mc_path = '%s/MC/%s/Ntuple' %( options.baseDir, options.version ) data_path = '%s/Data/%s/Ntuple' %( options.baseDir, options.version ) data_files = [] for top, dirs, files, sizes in walk_eos( data_path ) : for f in files : data_files.append( '%s/%s' %( top, f ) ) mc_files = [] for top, dirs, files, sizes in walk_eos( mc_path ) : for f in files : mc_files.append( '%s/%s' %( top, f ) ) all_files = data_files + mc_files outfile = open('files.pickle', 'w' ) pickle.dump( all_files, outfile ) outfile.close()
def get_eos_tot_size(top_dir): top_size = 0 subdir_sizes = {} iteration = 0 dirs, files, sizes = eosutil.parse_eos_dir(top_dir) # only iterate once here to # get the directories if dirs: for sdir in dirs: subdir_sizes[sdir] = 0 for stop, sdirs, sfiles, ssizes in eosutil.walk_eos(top_dir + '/' + sdir): for size in ssizes: subdir_sizes[sdir] = subdir_sizes[sdir] + size top_size = reduce(lambda x, y: x + y, subdir_sizes.values()) for size in sizes: top_size += size return top_size, subdir_sizes
def get_eos_tot_size( top_dir ) : top_size = 0 subdir_sizes = {} iteration = 0 dirs, files, sizes = eosutil.parse_eos_dir( top_dir ) # only iterate once here to # get the directories if dirs : for sdir in dirs : subdir_sizes[sdir] = 0 for stop, sdirs, sfiles, ssizes in eosutil.walk_eos( top_dir+'/'+sdir) : for size in ssizes : subdir_sizes[sdir] = subdir_sizes[sdir] + size top_size = reduce(lambda x, y : x+y , subdir_sizes.values() ) for size in sizes : top_size += size return top_size, subdir_sizes
def get_dataset_counts( dataset, fileKey, treeName=None, histName=None, vetoes=[] ) : if not isinstance( vetoes, list ) : vetoes = [vetoes] nevt_tree = 0 nevt_hist = 0 if dataset.count( '/eos/' ) : for top, dirs, files, sizes in eosutil.walk_eos( dataset ) : for file in files : filepath = top + '/' + file if vetoes : match_veto = False for v in vetoes : if filepath.count(v) : match_veto = True break if match_veto : continue if fileKey is not None and not file.count(fileKey) : continue ofile = ROOT.TFile.Open( 'root://eoscms/' + filepath ) if ofile == None : continue if ofile.IsZombie() : continue if ofile.TestBit(ROOT.TFile.kRecovered) : print 'File was recovered, and data is probably not available' continue if treeName is not None : try : otree = ofile.Get(treeName) nevt_tree += otree.GetEntries() except ReferenceError : print 'Could not access file' if histName is not None : try : ohist = ofile.Get(histName) except ReferenceError : print 'Could not access file' continue try : nevt_hist += ohist.GetBinContent(1) except AttributeError : print 'Could not access hist' continue ofile.Close() else : for top, dirs, files in os.walk( dataset ) : for file in files : filepath = top + '/' + file if vetoes : match_veto = False for v in vetoes : if filepath.count(v) : match_veto = True break if match_veto : continue if top.count('failed') > 0: print "WARNING!! Skip the file %s/%s as it (might) belongs to the failed files from crab"%(top, file) continue if fileKey is not None and not file.count(fileKey) : continue ofile = ROOT.TFile.Open( filepath ) if ofile == None : continue if ofile.IsZombie() : continue if ofile.TestBit(ROOT.TFile.kRecovered) : print 'File was recovered, and data is probably not available' continue if treeName is not None : try : otree = ofile.Get(treeName) otree.GetName() nevt_tree += otree.GetEntries() except ReferenceError : print 'Could not access file with treename ', treeName if histName is not None : try : ohist = ofile.Get(histName) except ReferenceError : print 'Could not access file' try : nevt_hist += ohist.GetBinContent(1) except AttributeError : print 'Could not get hist from file %s' %(filepath) ofile.Close() return (nevt_tree, nevt_hist )
def check_dataset_completion( originalDS, filteredDS, treeNameOrig=None, treeNameFilt=None, histNameOrig=None, histNameFilt=None, fileKeyOrig=None, fileKeyFilt=None ) : assert treeNameOrig is not None or histNameOrig is not None, 'Must provide a histogram or tree name for original samples' assert treeNameFilt is not None or histNameFilt is not None, 'Must provide a histogram or tree name for filtered samples' #assert not (treeNameOrig is not None and histNameOrig is not None), 'Must provide a histogram or tree name for original samples, not both' #assert not (treeNameFilt is not None and histNameFilt is not None), 'Must provide a histogram or tree name for filtered samples, not both' orig_nevt_tree = 0 orig_nevt_hist = 0 filt_nevt_tree = 0 filt_nevt_hist = 0 if originalDS.count( '/eos/' ) : for top, dirs, files, sizes in eosutil.walk_eos( originalDS ) : for file in files : if fileKeyOrig is not None and not file.count(fileKeyOrig) : continue ofile = ROOT.TFile.Open( 'root://eoscms/' + top+'/'+file ) if treeNameOrig is not None : try : otree = ofile.Get(treeNameOrig) except ReferenceError : print 'Could not access file' continue try : orig_nevt_tree += otree.GetEntries() except AttributeError : print 'Could not access hist' continue if histNameOrig is not None : try : ohist = ofile.Get(histNameOrig) except ReferenceError : print 'Could not access file' continue try : orig_nevt_hist += ohist.GetBinContent(1) except AttributeError : print 'Could not access hist' continue else : for top, dirs, files in os.walk( originalDS ) : for file in files : if fileKeyOrig is not None and not file.count(fileKeyOrig) : continue ofile = ROOT.TFile.Open( top+'/'+file ) if treeNameOrig is not None : try : otree = ofile.Get(treeNameOrig) orig_nevt_tree += otree.GetEntries() except ReferenceError : print 'Could not access file' if histNameOrig is not None : try : ohist = ofile.Get(histNameOrig) orig_nevt_hist += ohist.GetBinContent(1) except ReferenceError : print 'Could not access file' if not orig_nevt_tree and not orig_nevt_hist : print 'Did not get any original events. Check the path' return orig_nevt_tree, orig_nevt_hist, filt_nevt_tree, filt_nevt_hist if filteredDS.count( '/eos/' ) : for top, dirs, files, sizes in eosutil.walk_eos( filteredDS ) : for file in files : if fileKeyFilt is not None and not file.count(fileKeyFilt) : continue ofile = ROOT.TFile.Open( 'root://eoscms/' + top+'/'+file ) if ofile == None : continue if ofile.IsZombie() : continue if ofile.TestBit(ROOT.TFile.kRecovered) : print 'File was recovered, and data is probably not available' continue if treeNameFilt is not None : try : otree = ofile.Get(treeNameFilt) filt_nevt_tree += otree.GetEntries() except ReferenceError : print 'Could not access file' if histNameFilt is not None : try : ohist = ofile.Get(histNameFilt) except ReferenceError : print 'Could not access file' continue try : filt_nevt_hist += ohist.GetBinContent(1) except AttributeError : print 'Could not access hist' continue else : for top, dirs, files in os.walk( filteredDS ) : for file in files : if fileKeyFilt is not None and not file.count(fileKeyFilt) : continue ofile = ROOT.TFile.Open( top+'/'+file ) if ofile == None : continue if ofile.IsZombie() : continue if ofile.TestBit(ROOT.TFile.kRecovered) : print 'File was recovered, and data is probably not available' continue if treeNameFilt is not None : try : otree = ofile.Get(treeNameFilt) filt_nevt_tree += otree.GetEntries() except ReferenceError : print 'Could not access file' if histNameFilt is not None : try : ohist = ofile.Get(histNameFilt) except ReferenceError : print 'Could not access file' try : filt_nevt_hist += ohist.GetBinContent(1) except AttributeError : print 'Could not get hist from file %s' %(top+'/'+file) return orig_nevt_tree, orig_nevt_hist, filt_nevt_tree, filt_nevt_hist
def get_dataset_counts( dataset, fileKey, treeName=None, histName=None, vetoes=[] ) : if not isinstance( vetoes, list ) : vetoes = [vetoes] nevt_tree = 0 nevt_hist = 0 if dataset.count( '/eos/' ) : for top, dirs, files, sizes in eosutil.walk_eos( dataset ) : for file in files : filepath = top + '/' + file if vetoes : match_veto = False for v in vetoes : if filepath.count(v) : match_veto = True break if match_veto : continue if fileKey is not None and not file.count(fileKey) : continue ofile = ROOT.TFile.Open( 'root://eoscms/' + filepath ) if ofile == None : continue if ofile.IsZombie() : continue if ofile.TestBit(ROOT.TFile.kRecovered) : print 'File was recovered, and data is probably not available' continue if treeName is not None : try : otree = ofile.Get(treeName) nevt_tree += otree.GetEntries() except ReferenceError : print 'Could not access file' if histName is not None : try : ohist = ofile.Get(histName) except ReferenceError : print 'Could not access file' continue try : nevt_hist += ohist.GetBinContent(1) except AttributeError : print 'Could not access hist' continue else : for top, dirs, files in os.walk( dataset ) : for file in files : filepath = top + '/' + file if vetoes : match_veto = False for v in vetoes : if filepath.count(v) : match_veto = True break if match_veto : continue if fileKey is not None and not file.count(fileKey) : continue ofile = ROOT.TFile.Open( filepath ) if ofile == None : continue if ofile.IsZombie() : continue if ofile.TestBit(ROOT.TFile.kRecovered) : print 'File was recovered, and data is probably not available' continue if treeName is not None : try : otree = ofile.Get(treeName) otree.GetName() nevt_tree += otree.GetEntries() except ReferenceError : print 'Could not access file with treename ', treeName if histName is not None : try : ohist = ofile.Get(histName) except ReferenceError : print 'Could not access file' try : nevt_hist += ohist.GetBinContent(1) except AttributeError : print 'Could not get hist from file %s' %(filepath) return (nevt_tree, nevt_hist )
parser.add_argument( '--originalPath', dest='originalPath', default=None, help='Path to the directory contating orginal samples', required=True ) parser.add_argument( '--singleFiles', dest='singleFiles', default=False, action='store_true', help='if true, each sample is an individual root file' ) parser.add_argument( '--key', dest='key', default=None, help='Only check datasets matching key' ) options = parser.parse_args() original_samples = [] filtered_samples = [] if options.originalPath.count('/eos/') : if options.singleFiles : for top, dirs, files, sizes in eosutil.walk_eos( options.originalPath ) : for file in files : original_samples.append( file.rstrip('.root') ) #only run once because the sample files should be in the given directory break else : for top, dirs, files, sizes in eosutil.walk_eos( options.originalPath ) : for dir in dirs : original_samples.append( dir ) #only run once because the sample directories should be in the given directory break else : # use os.walk locally for top, dirs, files in os.walk( options.originalPath ) : for dir in dirs : original_samples.append( dir )
action='store_true', help='if true, each sample is an individual root file') parser.add_argument('--key', dest='key', default=None, help='Only check datasets matching key') options = parser.parse_args() original_samples = [] filtered_samples = [] if options.originalPath.count('/eos/'): if options.singleFiles: for top, dirs, files, sizes in eosutil.walk_eos(options.originalPath): for file in files: original_samples.append(file.rstrip('.root')) #only run once because the sample files should be in the given directory break else: for top, dirs, files, sizes in eosutil.walk_eos(options.originalPath): for dir in dirs: original_samples.append(dir) #only run once because the sample directories should be in the given directory break else: # use os.walk locally for top, dirs, files in os.walk(options.originalPath): for dir in dirs: original_samples.append(dir)