def check_dataset_completion( originalDS, filteredDS, treeNameOrig=None, treeNameFilt=None, histNameOrig=None, histNameFilt=None, fileKeyOrig=None, fileKeyFilt=None ) :

    assert treeNameOrig is not None or histNameOrig is not None, 'Must provide a histogram or tree name for original samples'
    assert treeNameFilt is not None or histNameFilt is not None, 'Must provide a histogram or tree name for filtered samples'

    #assert not (treeNameOrig is not None and histNameOrig is not None), 'Must provide a histogram or tree name for original samples, not both'
    #assert not (treeNameFilt is not None and histNameFilt is not None), 'Must provide a histogram or tree name for filtered samples, not both'

    orig_nevt_tree = 0
    orig_nevt_hist = 0
    filt_nevt_tree = 0
    filt_nevt_hist = 0
    for top, dirs, files, sizes in eosutil.walk_eos( originalDS ) :
        for file in files :

            if fileKeyOrig is not None and not file.count(fileKeyOrig) : continue

            ofile = ROOT.TFile.Open( 'root://eoscms/' + top+'/'+file )
            if treeNameOrig is not None :
                otree = ofile.Get(treeNameOrig)
                orig_nevt_tree += otree.GetEntries()
            if histNameOrig is not None :
                ohist = ofile.Get(histNameOrig)
                orig_nevt_hist += ohist.GetBinContent(1)

    if not orig_nevt_tree and not orig_nevt_hist  :
        print 'Did not get any original events.  Check the path'
        return
        

    for top, dirs, files, sizes in eosutil.walk_eos( filteredDS ) :
        for file in files :

            if fileKeyFilt is not None and not file.count(fileKeyFilt) : continue

            ofile = ROOT.TFile.Open( 'root://eoscms/' + top+'/'+file )
            if treeNameFilt is not None :
                otree = ofile.Get(treeNameFilt)
                filt_nevt_tree += otree.GetEntries()
            if histNameFilt is not None :
                ohist = ofile.Get(histNameFilt)
                filt_nevt_hist += ohist.GetBinContent(1)

    return orig_nevt_tree, orig_nevt_hist, filt_nevt_tree, filt_nevt_hist
Example #2
0
def collect_input_files_eos( filesDir, filekey='.root' ) :
    
    logging.info('Getting list of input files from eos in %s' %filesDir)
    input_files = []
    for top, dirs, files, sizes in eosutil.walk_eos(filesDir) :
        for f in files :
            if f.count(filekey) > 0 :
                input_files.append(top+'/'+f)

    return input_files
def main():

    mc_path = '%s/MC/%s/Ntuple' % (options.baseDir, options.version)
    data_path = '%s/Data/%s/Ntuple' % (options.baseDir, options.version)

    data_files = []
    for top, dirs, files, sizes in walk_eos(data_path):
        for f in files:
            data_files.append('%s/%s' % (top, f))

    mc_files = []
    for top, dirs, files, sizes in walk_eos(mc_path):
        for f in files:
            mc_files.append('%s/%s' % (top, f))

    all_files = data_files + mc_files

    outfile = open('files.pickle', 'w')
    pickle.dump(all_files, outfile)
    outfile.close()
Example #4
0
def main() :


    mc_path   = '%s/MC/%s/Ntuple' %( options.baseDir, options.version )
    data_path = '%s/Data/%s/Ntuple' %( options.baseDir, options.version )

    data_files = []
    for top, dirs, files, sizes in walk_eos( data_path ) :
        for f in files :
            data_files.append( '%s/%s' %( top, f ) )

    mc_files = []
    for top, dirs, files, sizes in walk_eos( mc_path ) :
        for f in files :
            mc_files.append( '%s/%s' %( top, f ) )

    all_files = data_files + mc_files

    outfile = open('files.pickle', 'w' )
    pickle.dump( all_files, outfile )
    outfile.close()
Example #5
0
def get_eos_tot_size(top_dir):

    top_size = 0
    subdir_sizes = {}
    iteration = 0
    dirs, files, sizes = eosutil.parse_eos_dir(top_dir)
    # only iterate once here to
    # get the directories
    if dirs:
        for sdir in dirs:
            subdir_sizes[sdir] = 0
            for stop, sdirs, sfiles, ssizes in eosutil.walk_eos(top_dir + '/' +
                                                                sdir):
                for size in ssizes:
                    subdir_sizes[sdir] = subdir_sizes[sdir] + size

        top_size = reduce(lambda x, y: x + y, subdir_sizes.values())

    for size in sizes:
        top_size += size

    return top_size, subdir_sizes
Example #6
0
def get_eos_tot_size( top_dir ) :

    top_size = 0
    subdir_sizes = {}
    iteration = 0
    dirs, files, sizes = eosutil.parse_eos_dir( top_dir )
    # only iterate once here to
    # get the directories
    if dirs :
        for sdir in dirs :
            subdir_sizes[sdir] = 0
            for stop, sdirs, sfiles, ssizes in eosutil.walk_eos( top_dir+'/'+sdir) :
                for size in ssizes :
                    subdir_sizes[sdir] = subdir_sizes[sdir] + size
    
        top_size = reduce(lambda x, y : x+y , subdir_sizes.values() )

    for size in sizes :
        top_size += size




    return top_size, subdir_sizes
Example #7
0
def get_dataset_counts( dataset, fileKey, treeName=None, histName=None, vetoes=[] ) :

    if not isinstance( vetoes, list ) :
        vetoes = [vetoes]

    nevt_tree = 0
    nevt_hist = 0
    if dataset.count( '/eos/' ) :
        for top, dirs, files, sizes in eosutil.walk_eos( dataset ) :

            for file in files :

                filepath = top + '/' + file
                if vetoes :
                    match_veto = False
                    for v in vetoes :
                        if filepath.count(v) :
                            match_veto = True
                            break

                    if match_veto :
                        continue

                if fileKey is not None and not file.count(fileKey) : continue

                ofile = ROOT.TFile.Open( 'root://eoscms/' + filepath )
                if ofile == None :
                    continue
                if  ofile.IsZombie() :
                    continue
                if ofile.TestBit(ROOT.TFile.kRecovered) :
                    print 'File was recovered, and data is probably not available'
                    continue
                if treeName is not None :
                    try :
                        otree = ofile.Get(treeName)
                        nevt_tree += otree.GetEntries()
                    except ReferenceError :
                        print 'Could not access file'

                if histName is not None :
                    try :
                        ohist = ofile.Get(histName)
                    except ReferenceError :
                        print 'Could not access file'
                        continue

                    try  :
                        nevt_hist += ohist.GetBinContent(1)
                    except AttributeError :
                        print 'Could not access hist'
                        continue
                ofile.Close()


    else :
        for top, dirs, files in os.walk( dataset ) :
            for file in files :

                filepath = top + '/' + file
                if vetoes :
                    match_veto = False
                    for v in vetoes :
                        if filepath.count(v) :
                            match_veto = True
                            break

                    if match_veto :
                        continue

                if top.count('failed') > 0:
                   print "WARNING!! Skip the file %s/%s as it (might) belongs to the failed files from crab"%(top, file)
                   continue

                if fileKey is not None and not file.count(fileKey) : continue

                ofile = ROOT.TFile.Open( filepath  )
                if ofile == None :
                    continue
                if ofile.IsZombie() :
                    continue
                if ofile.TestBit(ROOT.TFile.kRecovered) :
                    print 'File was recovered, and data is probably not available'
                    continue
                if treeName is not None :
                    try :
                        otree = ofile.Get(treeName)
                        otree.GetName()
                        nevt_tree += otree.GetEntries()
                    except ReferenceError :
                        print 'Could not access file with treename ', treeName

                if histName is not None :
                    try :
                        ohist = ofile.Get(histName)
                    except ReferenceError :
                        print 'Could not access file'
                    try :
                        nevt_hist += ohist.GetBinContent(1)
                    except AttributeError :
                        print 'Could not get hist from file %s' %(filepath)
                ofile.Close()


    return (nevt_tree, nevt_hist )
def check_dataset_completion( originalDS, filteredDS, treeNameOrig=None, treeNameFilt=None, histNameOrig=None, histNameFilt=None, fileKeyOrig=None, fileKeyFilt=None ) :

    assert treeNameOrig is not None or histNameOrig is not None, 'Must provide a histogram or tree name for original samples'
    assert treeNameFilt is not None or histNameFilt is not None, 'Must provide a histogram or tree name for filtered samples'

    #assert not (treeNameOrig is not None and histNameOrig is not None), 'Must provide a histogram or tree name for original samples, not both'
    #assert not (treeNameFilt is not None and histNameFilt is not None), 'Must provide a histogram or tree name for filtered samples, not both'

    orig_nevt_tree = 0
    orig_nevt_hist = 0
    filt_nevt_tree = 0
    filt_nevt_hist = 0

    if originalDS.count( '/eos/' ) :
        for top, dirs, files, sizes in eosutil.walk_eos( originalDS ) :
            for file in files :

                if fileKeyOrig is not None and not file.count(fileKeyOrig) : continue

                ofile = ROOT.TFile.Open( 'root://eoscms/' + top+'/'+file )
                if treeNameOrig is not None :
                    try :
                        otree = ofile.Get(treeNameOrig)
                    except ReferenceError :
                        print 'Could not access file'
                        continue

                    try :
                        orig_nevt_tree += otree.GetEntries()
                    except AttributeError :
                        print 'Could not access hist'
                        continue

                if histNameOrig is not None :
                    try :
                        ohist = ofile.Get(histNameOrig)
                    except ReferenceError :
                        print 'Could not access file'
                        continue

                    try  :
                        orig_nevt_hist += ohist.GetBinContent(1)
                    except AttributeError :
                        print 'Could not access hist'
                        continue
    else :
        for top, dirs, files in os.walk( originalDS ) :
            for file in files :

                if fileKeyOrig is not None and not file.count(fileKeyOrig) : continue

                ofile = ROOT.TFile.Open( top+'/'+file )
                if treeNameOrig is not None :
                    try :
                        otree = ofile.Get(treeNameOrig)
                        orig_nevt_tree += otree.GetEntries()
                    except ReferenceError :
                        print 'Could not access file'

                if histNameOrig is not None :
                    try :
                        ohist = ofile.Get(histNameOrig)
                        orig_nevt_hist += ohist.GetBinContent(1)
                    except ReferenceError :
                        print 'Could not access file'



    if not orig_nevt_tree and not orig_nevt_hist  :
        print 'Did not get any original events.  Check the path'
        return orig_nevt_tree, orig_nevt_hist, filt_nevt_tree, filt_nevt_hist
        

    if filteredDS.count( '/eos/' ) :
        for top, dirs, files, sizes in eosutil.walk_eos( filteredDS ) :
            for file in files :

                if fileKeyFilt is not None and not file.count(fileKeyFilt) : continue

                ofile = ROOT.TFile.Open( 'root://eoscms/' + top+'/'+file )
                if ofile == None :
                    continue
                if  ofile.IsZombie() :
                    continue
                if ofile.TestBit(ROOT.TFile.kRecovered) :
                    print 'File was recovered, and data is probably not available'
                    continue
                if treeNameFilt is not None :
                    try :
                        otree = ofile.Get(treeNameFilt)
                        filt_nevt_tree += otree.GetEntries()
                    except ReferenceError :
                        print 'Could not access file'

                if histNameFilt is not None :
                    try :
                        ohist = ofile.Get(histNameFilt)
                    except ReferenceError :
                        print 'Could not access file'
                        continue

                    try  :
                        filt_nevt_hist += ohist.GetBinContent(1)
                    except AttributeError :
                        print 'Could not access hist'
                        continue


    else :
        for top, dirs, files in os.walk( filteredDS ) :
            for file in files :

                if fileKeyFilt is not None and not file.count(fileKeyFilt) : continue

                ofile = ROOT.TFile.Open( top+'/'+file )
                if ofile == None :
                    continue
                if ofile.IsZombie() :
                    continue
                if ofile.TestBit(ROOT.TFile.kRecovered) :
                    print 'File was recovered, and data is probably not available'
                    continue
                if treeNameFilt is not None :
                    try :
                        otree = ofile.Get(treeNameFilt)
                        filt_nevt_tree += otree.GetEntries()
                    except ReferenceError :
                        print 'Could not access file'

                if histNameFilt is not None :
                    try :
                        ohist = ofile.Get(histNameFilt)
                    except ReferenceError :
                        print 'Could not access file'
                    try :
                        filt_nevt_hist += ohist.GetBinContent(1)
                    except AttributeError :
                        print 'Could not get hist from file %s' %(top+'/'+file)




    return orig_nevt_tree, orig_nevt_hist, filt_nevt_tree, filt_nevt_hist
Example #9
0
def get_dataset_counts( dataset, fileKey, treeName=None, histName=None, vetoes=[] ) :

    if not isinstance( vetoes, list ) :
        vetoes = [vetoes]

    nevt_tree = 0
    nevt_hist = 0
    if dataset.count( '/eos/' ) :
        for top, dirs, files, sizes in eosutil.walk_eos( dataset ) :

            for file in files :

                filepath = top + '/' + file
                if vetoes :
                    match_veto = False
                    for v in vetoes :
                        if filepath.count(v) :
                            match_veto = True
                            break

                    if match_veto :
                        continue

                if fileKey is not None and not file.count(fileKey) : continue

                ofile = ROOT.TFile.Open( 'root://eoscms/' + filepath )
                if ofile == None :
                    continue
                if  ofile.IsZombie() :
                    continue
                if ofile.TestBit(ROOT.TFile.kRecovered) :
                    print 'File was recovered, and data is probably not available'
                    continue
                if treeName is not None :
                    try :
                        otree = ofile.Get(treeName)
                        nevt_tree += otree.GetEntries()
                    except ReferenceError :
                        print 'Could not access file'

                if histName is not None :
                    try :
                        ohist = ofile.Get(histName)
                    except ReferenceError :
                        print 'Could not access file'
                        continue

                    try  :
                        nevt_hist += ohist.GetBinContent(1)
                    except AttributeError :
                        print 'Could not access hist'
                        continue


    else :
        for top, dirs, files in os.walk( dataset ) :
            for file in files :

                filepath = top + '/' + file
                if vetoes :
                    match_veto = False
                    for v in vetoes :
                        if filepath.count(v) :
                            match_veto = True
                            break

                    if match_veto :
                        continue

                if fileKey is not None and not file.count(fileKey) : continue

                ofile = ROOT.TFile.Open( filepath  )
                if ofile == None :
                    continue
                if ofile.IsZombie() :
                    continue
                if ofile.TestBit(ROOT.TFile.kRecovered) :
                    print 'File was recovered, and data is probably not available'
                    continue
                if treeName is not None :
                    try :
                        otree = ofile.Get(treeName)
                        otree.GetName()
                        nevt_tree += otree.GetEntries()
                    except ReferenceError :
                        print 'Could not access file with treename ', treeName

                if histName is not None :
                    try :
                        ohist = ofile.Get(histName)
                    except ReferenceError :
                        print 'Could not access file'
                    try :
                        nevt_hist += ohist.GetBinContent(1)
                    except AttributeError :
                        print 'Could not get hist from file %s' %(filepath)


    return (nevt_tree, nevt_hist )
Example #10
0
parser.add_argument( '--originalPath', dest='originalPath', default=None, help='Path to the directory contating orginal samples', required=True )

parser.add_argument( '--singleFiles', dest='singleFiles', default=False, action='store_true', help='if true, each sample is an individual root file' )

parser.add_argument( '--key', dest='key', default=None, help='Only check datasets matching key' )


options = parser.parse_args()

original_samples = []
filtered_samples = []

if options.originalPath.count('/eos/') :
    if options.singleFiles :
        for top, dirs, files, sizes in eosutil.walk_eos( options.originalPath ) :
            for file in files :
                original_samples.append( file.rstrip('.root') )
            #only run once because the sample files should be in the given directory
            break
    else :
        for top, dirs, files, sizes in eosutil.walk_eos( options.originalPath ) :
            for dir in dirs :
                original_samples.append( dir )
            #only run once because the sample directories should be in the given directory
            break
else :
    # use os.walk locally
    for top, dirs, files in os.walk( options.originalPath ) :
        for dir in dirs :
            original_samples.append( dir )
Example #11
0
                    action='store_true',
                    help='if true, each sample is an individual root file')

parser.add_argument('--key',
                    dest='key',
                    default=None,
                    help='Only check datasets matching key')

options = parser.parse_args()

original_samples = []
filtered_samples = []

if options.originalPath.count('/eos/'):
    if options.singleFiles:
        for top, dirs, files, sizes in eosutil.walk_eos(options.originalPath):
            for file in files:
                original_samples.append(file.rstrip('.root'))
            #only run once because the sample files should be in the given directory
            break
    else:
        for top, dirs, files, sizes in eosutil.walk_eos(options.originalPath):
            for dir in dirs:
                original_samples.append(dir)
            #only run once because the sample directories should be in the given directory
            break
else:
    # use os.walk locally
    for top, dirs, files in os.walk(options.originalPath):
        for dir in dirs:
            original_samples.append(dir)