Esempio n. 1
0
def hadd(output_fn, input_fns):
    """This is a simple wrapper around hadd that suppresses the stdout
    from hadd, only reporting a summary line of how many files were
    merged. Output to eos is supported, including for the log file for
    stdout. Checks that the number of files reported merged by hadd is
    the same as the number in the input list, or if there were any
    other problems reported by hadd. If so, prints an error to
    stdout. Returns true if success.
    """

    l = len(input_fns)
    start = datetime.now()
    print 'hadding %i files to %s at %s' % (l, output_fn, start)
    args = ['hadd', output_fn] + input_fns

    p = subprocess.Popen(args=args,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.STDOUT)
    stdout, stderr = p.communicate()
    assert stderr is None

    log_fn = output_fn + '.haddlog'
    is_eos = '/store/' in output_fn  # ugh
    while eos.exists(log_fn) if is_eos else os.path.exists(log_fn):
        log_fn += '.2'

    if is_eos:
        fd, tmp_fn = tempfile.mkstemp()
        os.fdopen(fd, 'wt').write(stdout)
        eos.cp(
            tmp_fn, log_fn
        )  # if the haddlog already exists the new one will silently go into the ether...
        os.remove(tmp_fn)
    else:
        open(log_fn, 'wt').write(stdout)

    if p.returncode != 0:
        print colors.error('PROBLEM hadding %s' % output_fn)
        #print p.stdout.read()
        return False

    max_file_num = max(
        int(line.split(':')[0].split(' ')[-1]) for line in stdout.split('\n')
        if 'Source file' in line)
    print '-> %i files merged in %s' % (max_file_num, datetime.now() - start)
    if max_file_num != l:
        print colors.error('PROBLEM hadding %s' % output_fn)
        return False

    return True
Esempio n. 2
0
def hadd(output_fn, input_fns):
    """This is a simple wrapper around hadd that suppresses the stdout
    from hadd, only reporting a summary line of how many files were
    merged. Output to eos is supported, including for the log file for
    stdout. Checks that the number of files reported merged by hadd is
    the same as the number in the input list, or if there were any
    other problems reported by hadd. If so, prints an error to
    stdout. Returns true if success.
    """
    
    l = len(input_fns)
    start = datetime.now()
    print 'hadding %i files to %s at %s' % (l, output_fn, start)
    args = ['hadd', output_fn] + input_fns

    p = subprocess.Popen(args=args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    stdout, stderr = p.communicate()
    assert stderr is None

    log_fn = output_fn + '.haddlog'
    is_eos = '/store/' in output_fn # ugh
    while eos.exists(log_fn) if is_eos else os.path.exists(log_fn):
        log_fn += '.2'

    if is_eos:
        fd, tmp_fn = tempfile.mkstemp()
        os.fdopen(fd, 'wt').write(stdout)
        eos.cp(tmp_fn, log_fn) # if the haddlog already exists the new one will silently go into the ether...
        os.remove(tmp_fn)
    else:
        open(log_fn, 'wt').write(stdout)

    if p.returncode != 0:
        print colors.boldred('PROBLEM hadding %s' % output_fn)
        #print p.stdout.read()
        return False

    max_file_num = max(int(line.split(':')[0].split(' ')[-1]) for line in stdout.split('\n') if 'Source file' in line)
    print '-> %i files merged in %s' % (max_file_num, datetime.now() - start)
    if max_file_num != l:
        print colors.boldred('PROBLEM hadding %s' % output_fn)
        return False

    return True
infos = []

for sample in samples:
    if verbosity >= 1:
        print sample
    fns = SampleFiles.get_fns(sample, dataset)
    fns = random.sample(fns, nfiles)

    total_size = 0
    total_size_in_events = 0

    for fn in fns:
        assert fn.endswith('.root')
        if verbosity >= 2:
            print fn
        if not eos.exists(fn):
            raise IOError('does not exist on eos: %r' % fn)

        bn = os.path.basename(fn)
        if not fnmatch(bn, pattern):
            continue

        size = eos.size(fn)
        info = EdmFileInfo(eos.canon(fn))
        nevents = info.Events.nevents
        size_in_events = info.Events.size()

        if verbosity >= 3:
            print '  size %.0f in %i events %.0f frac %.4f' % (
                size, nevents, size_in_events, float(size_in_events) / size)
Esempio n. 4
0
import sys, os
from pprint import pprint
from JMTucker.Tools import eos, SampleFiles
from JMTucker.Tools.ROOTTools import ROOT, detree

if len(sys.argv) < 6:
    sys.exit('usage: %s dataset sample run lumi event\n  where dataset and sample are as registered in SampleFiles. sample can be "*" to mean all samples having the dataset.' % sys.argv[0])

dataset = sys.argv[1]
sample = sys.argv[2]
rle = int(sys.argv[3]), int(sys.argv[4]), int(sys.argv[5])

fns = SampleFiles.get_fns(sample, dataset)
nfound = 0

for fn in fns:
    assert fn.endswith('.root')
    if not eos.exists(fn):
        raise IOError('does not exist on eos: %r' % fn)

    f = ROOT.TFile.Open(eos.canon(fn))
    t = f.Get('Events')
    for x in sorted(detree(t, 'EventAuxiliary.id().run():EventAuxiliary.luminosityBlock():EventAuxiliary.id().event()', xform=int)):
        if x == rle:
            print fn
            nfound += 1

if nfound != 1:
    sys.exit('%i found' % nfound)

Esempio n. 5
0
for sample in samples:
    if verbosity >= 1:
        print sample
    fns = SampleFiles.get_fns(sample, dataset)
    vh_fns = []

    # since the files can be spread out over multiple datedirs due to
    # resubmissions, put the file in the date dir that has most of the files
    new_vh_paths = Counter()

    for fn in fns:
        assert fn.endswith('.root')
        if verbosity >= 2:
            print fn
        if not eos.exists(fn):
            raise IOError('does not exist on eos: %r' % fn)

        dn, bn = os.path.split(fn)
        mo = bn_re.search(bn)
        if not mo:
            raise ValueError('could not parse fn %r' % fn)

        _, jobnum = mo.groups()
        jobnum = int(jobnum)

        path_mo = path_re.search(fn)
        if not path_mo:
            raise ValueError('could not parse path for %r' % fn)
        new_vh_paths[path_mo.group(1)] += 1