Example #1
0
def crab_hadd(working_dir, new_name=None, new_dir=None, raise_on_empty=False, chunk_size=900, pattern=None):
    if working_dir.endswith('/'):
        working_dir = working_dir[:-1]
    if new_name is None:
        new_name = '_'.join(os.path.basename(working_dir).split('_')[2:])
    if not new_name.endswith('.root'):
        new_name += '.root'
    if new_dir is not None:
        new_name = os.path.join(new_dir, new_name)

    expected = crab_get_njobs(working_dir)
    print '%s: expecting %i files if all jobs succeeded' % (working_dir, expected)

    res = crab_command('out', '--xrootd', dir=working_dir)

    if 'No files to retrieve.' in res:
        files = []
    else:
        files = [x.strip() for x in res.split('\n') if x.strip() and '.root' in x]
        if pattern:
            if '/' not in pattern:
                pattern = '*/' + pattern
            files = fnmatch.filter(files, pattern)

    jobs = [int(f.split('_')[-1].split('.root')[0]) for f in files]
    jobs.sort()
    expected = range(1, expected+1)

    if jobs != expected:
        print '\033[36;7m %i files found %s not what expected \033[m' % (len(jobs), crabify_list(jobs))
        missing = sorted(set(expected) - set(jobs))
        print '\033[36;7m    %i missing: %r \033[m' % (len(missing), ' '.join(str(j) for j in missing))

    l = len(files)
    if l == 0:
        msg = 'crab_hadd: no files found in %s' % working_dir
        if raise_on_empty:
            raise CRABToolsException(msg)
        else:
            print '\033[36;7m', msg, '\033[m'
    elif l == 1:
        print working_dir, ': just one file found, copying'
        cmd = 'xrdcp -s %s %s' % (files[0], new_name)
        os.system(cmd)
        os.chmod(new_name, 0644)
    else:
        hadd(new_name, files, chunk_size)

    return new_name
Example #2
0
def cs_hadd(working_dir, new_name=None, new_dir=None, raise_on_empty=False, chunk_size=900, pattern=None):
    if working_dir.endswith('/'):
        working_dir = working_dir[:-1]
    if new_name is None:
        new_name = '_'.join(os.path.basename(working_dir).split('_')[1:])
    if not new_name.endswith('.root'):
        new_name += '.root'
    if new_dir is not None:
        new_name = os.path.join(new_dir, new_name)

    expected = cs_njobs(working_dir)
    print '%s: expecting %i files if all jobs succeeded' % (working_dir, expected)

    files = glob(os.path.join(working_dir, '*.root'))

    if pattern:
        if '/' not in pattern:
            pattern = '*/' + pattern
        files = fnmatch.filter(files, pattern)

    jobs = [int(f.split('_')[-1].split('.root')[0]) for f in files]
    jobs.sort()
    expected = range(expected)

    if jobs != expected:
        print '\033[36;7m %i files found %s not what expected \033[m' % (len(jobs), jobs)
        missing = sorted(set(expected) - set(jobs))
        print '\033[36;7m    %i missing: %r \033[m' % (len(missing), ' '.join(str(j) for j in missing))

    l = len(files)
    if l == 0:
        msg = 'cs_hadd: no files found in %s' % working_dir
        if raise_on_empty:
            raise CSHelpersException(msg)
        else:
            print '\033[36;7m', msg, '\033[m'
    elif l == 1:
        print working_dir, ': just one file found, copying'
        cmd = 'cp %s %s' % (files[0], new_name)
        os.system(cmd)
        os.chmod(new_name, 0644)
    else:
        hadd(new_name, files, chunk_size)

    return new_name
def cs_hadd(working_dir,
            new_name=None,
            new_dir=None,
            raise_on_empty=False,
            chunk_size=900,
            pattern=None,
            range_filter=None):
    working_dir, new_name, new_dir = cs_hadd_args(working_dir, new_name,
                                                  new_dir)
    expected, files = cs_hadd_files(working_dir, range_filter=range_filter)
    result = HaddBatchResult('condor', working_dir, new_name, new_dir,
                             expected, files)
    print '%s: expecting %i files if all jobs succeeded' % (working_dir,
                                                            expected)

    if pattern:
        if '/' not in pattern:
            pattern = '*/' + pattern
        files = fnmatch.filter(files, pattern)

    jobs = [int(f.split('_')[-1].split('.root')[0]) for f in files]
    jobs.sort()
    expected = range(expected)

    if jobs != expected:
        print '\033[36;7m %i files found %s not what expected \033[m' % (
            len(jobs), jobs)
        missing = sorted(set(expected) - set(jobs))
        print '\033[36;7m    %i missing: %r \033[m' % (len(missing), ' '.join(
            str(j) for j in missing))

    l = len(files)
    if l == 0:
        result.success = False
        msg = 'cs_hadd: no files found in %s' % working_dir
        if raise_on_empty:
            raise CSHelpersException(msg)
        else:
            print '\033[36;7m', msg, '\033[m'
    elif l == 1:
        print working_dir, ': just one file found, copying'
        if files[0].startswith('root://'):
            cmd = 'xrdcp -s %s %s' % (files[0], new_name)
        else:
            cmd = 'cp %s %s' % (files[0], new_name)
        result.success = os.system(cmd) == 0
        if result.success and not new_name.startswith('root://'):
            os.chmod(new_name, 0644)
    else:
        result.success = hadd(new_name, files)

    return result
def cs_hadd(working_dir, new_name=None, new_dir=None, raise_on_empty=False, chunk_size=900, pattern=None, range_filter=None):
    working_dir, new_name, new_dir = cs_hadd_args(working_dir, new_name, new_dir)
    expected, files = cs_hadd_files(working_dir, range_filter=range_filter)
    print '%s: expecting %i files if all jobs succeeded' % (working_dir, expected)

    if pattern:
        if '/' not in pattern:
            pattern = '*/' + pattern
        files = fnmatch.filter(files, pattern)

    jobs = [int(f.split('_')[-1].split('.root')[0]) for f in files]
    jobs.sort()
    expected = range(expected)

    if jobs != expected:
        print '\033[36;7m %i files found %s not what expected \033[m' % (len(jobs), jobs)
        missing = sorted(set(expected) - set(jobs))
        print '\033[36;7m    %i missing: %r \033[m' % (len(missing), ' '.join(str(j) for j in missing))

    l = len(files)
    if l == 0:
        msg = 'cs_hadd: no files found in %s' % working_dir
        if raise_on_empty:
            raise CSHelpersException(msg)
        else:
            print '\033[36;7m', msg, '\033[m'
    elif l == 1:
        print working_dir, ': just one file found, copying'
        if files[0].startswith('root://'):
            cmd = 'xrdcp -s %s %s' % (files[0], new_name)
        else:
            cmd = 'cp %s %s' % (files[0], new_name)
        os.system(cmd)
        os.chmod(new_name, 0644)
    else:
        hadd(new_name, files)

    return new_name
Example #5
0
def doit(path, out_fn):
    x = fromtree(os.path.join(path, 'observed.root'))
    if len(x) != 1:
        print 'using observed_byhand for this!'
        x = fromtree(os.path.join(path, 'observed_byhand.root'))
    obs = x[0]

    exp_fn = os.path.join(path, 'expected.root')
    if not os.path.isfile(exp_fn):
        exp_fns = glob(exp_fn.replace('.root', '_*.root'))
        if len(exp_fns) != njobs:
            raise ValueError('only found %i files, expected %i' %
                             (len(exp_fns), njobs))
        if not hadd(exp_fn, exp_fns):
            raise ValueError('problem hadding %s from %s files' %
                             (exp_fn, len(exp_fns)))

    exp = fromtree(exp_fn)
    if len(exp) != njobs * ntoysperjob:
        raise ValueError(
            'unexpected number of points in %s' %
            exp_fn)  # this can't be given the asserts in fromtree right now

    stats(out_fn, obs, exp)
Example #6
0
def crab_hadd(working_dir,
              new_name=None,
              new_dir=None,
              raise_on_empty=False,
              chunk_size=900,
              pattern=None,
              lpc_shortcut=False,
              range_filter=None):
    working_dir, new_name, new_dir = crab_hadd_args(working_dir, new_name,
                                                    new_dir)
    expected, files = crab_hadd_files(working_dir,
                                      lpc_shortcut,
                                      range_filter=range_filter)
    result = HaddBatchResult('crab', working_dir, new_name, new_dir, expected,
                             files)
    print '%s: expecting %i files if all jobs succeeded' % (working_dir,
                                                            expected)

    if pattern:
        if '/' not in pattern:
            pattern = '*/' + pattern
        files = fnmatch.filter(files, pattern)

    automatic_splitting = False
    pprinted = False
    jobs = []
    for f in files:
        jobnum = f.split('_')[-1].split('.root')[0]
        if crab_global_options.support_automatic_splitting and '-' in jobnum:
            automatic_splitting = True
            if not pprinted:
                pprint(files)
                pprinted = True
            it, jobnum = jobnum.split('-')
            it, jobnum = int(it), int(jobnum)
            assert it >= 1  # probe jobs "0-*" should not show up
            jobnum = it * 10000 + jobnum
        else:
            jobnum = int(jobnum)
        jobs.append(jobnum)
    jobs.sort()
    expected = range(1, expected + 1)

    if jobs != expected:
        print '\033[36;7m %i files found %s not what expected \033[m' % (
            len(jobs), crabify_list(jobs))
        missing = sorted(set(expected) - set(jobs))
        print '\033[36;7m    %i missing: %r \033[m' % (len(missing), ' '.join(
            str(j) for j in missing))

    l = len(files)
    if l == 0:
        result.success = False
        msg = 'crab_hadd: no files found in %s' % working_dir
        if raise_on_empty:
            raise CRABToolsException(msg)
        else:
            print '\033[36;7m', msg, '\033[m'
    elif l == 1:
        print working_dir, ': just one file found, copying'
        cmd = 'xrdcp -s %s %s' % (files[0], new_name)
        result.success = os.system(cmd) == 0
        if result.success and not new_name.startswith('root://'):
            os.chmod(new_name, 0644)
    else:
        result.success = hadd(new_name, files)

    if automatic_splitting:
        n = norm_from_file(new_name)
        sn, s = fn_to_sample(Samples, new_name)
        if not s:
            print colors.yellow(
                "\tnorm_from_file returns %r, couldn't get sample %s" %
                (n, sn))
        else:
            no1, no2 = s.datasets['main'].nevents_orig, s.datasets[
                'miniaod'].nevents_orig
            if n == no1 or n == no2:
                print '\tnorm_from_file returns nevents_orig = %i' % n
            else:
                print colors.yellow(
                    '\tnorm_from_file returns %r while %s.nevents_orig is %i (main) %i (miniaod'
                    % (n, sn, no1, no2))

    return result
Example #7
0
parser.add_argument('positional', nargs='*')

parser.add_argument('-l',
                    '--list',
                    help='File containing list of filenames for input.')

options = parser.parse_args()

output_fn = None
input_files = []

if options.list:
    if len(options.positional) != 1:
        raise ValueError(
            'when doing --list, only one positional argument allowed (the output filename)'
        )
    output_fn = options.positional[0]
    for line in file(options.list):
        line = line.strip()
        if line:
            input_files.append(line)
else:
    if len(options.positional) < 2:
        parser.print_usage()
        sys.exit(1)
    output_fn = options.positional[0]
    input_files = options.positional[1:]

hadd(output_fn, input_files)
Example #8
0
%(prog)s output.root file1.root file2.root [file3.root ... fileN.root]
   -or-
%(prog)s -l input_file_list.txt output.root''')

parser.add_argument('positional', nargs='*')

parser.add_argument('-l', '--list',
                    help='File containing list of filenames for input.')

options = parser.parse_args()

output_fn = None
input_files = []

if options.list:
    if len(options.positional) != 1:
        raise ValueError('when doing --list, only one positional argument allowed (the output filename)')
    output_fn = options.positional[0]
    for line in file(options.list):
        line = line.strip()
        if line:
            input_files.append(line)
else:
    if len(options.positional) < 2:
        parser.print_usage()
        sys.exit(1)
    output_fn = options.positional[0]
    input_files = options.positional[1:]

hadd(output_fn, input_files)
Example #9
0
        new_vh_paths[path_mo.group(1)] += 1

        vh_fn = os.path.join(dn, 'vertex_histos_%s.root' % jobnum)
        if verbosity >= 3:
            print dn, bn, jobnum, vh_fn
        if not eos.exists(vh_fn):
            raise IOError('no %r for %r' % vh_fn, fn)
        vh_fns.append(eos.canon(vh_fn))

    new_vh_path = new_vh_paths.most_common(1)[0][0]
    new_vh_fn = eos.canon(os.path.join(new_vh_path, 'vertex_histos.root'))
    if verbosity >= 3:
        print new_vh_fn
    if eos.exists(new_vh_fn):
        raise ValueError('exists already: %r' % new_vh_fn)

    hadds.append((new_vh_fn, vh_fns))

if rm:
    print 'hadd and rm these:'
else:
    print 'hadd these:'
pprint(hadds)
if rm and raw_input('ok? ').strip().lower() != 'y':
    sys.exit('giving up')

for new_fn, fns in hadds:
    if hadd(new_fn, fns) and rm:
        for fn in fns:
            eos.rm(fn)
Example #10
0
#!/usr/bin/env python

import sys
from JMTucker.Tools.hadd import hadd
hadd(sys.argv[1], sys.argv[2:])
Example #11
0
def crab_hadd(working_dir, new_name=None, new_dir=None, raise_on_empty=False, chunk_size=900):
    if working_dir.endswith('/'):
        working_dir = working_dir[:-1]
    if new_name is None:
        new_name = os.path.basename(working_dir).replace('crab_','')
    if not new_name.endswith('.root'):
        new_name += '.root'
    if new_dir is not None:
        new_name = os.path.join(new_dir, new_name)

    expected = crab_get_njobs(working_dir)
    print '%s: expecting %i files if all jobs succeeded' % (working_dir, expected)

    on_resilient = False
    on_store = False
    cfg = crab_cfg_parser(working_dir)
    try:
        storage_path = cfg.get('USER', 'storage_path')
        on_resilient = 'resilient' in storage_path
    except NoOptionError:
        pass
    try:
        storage_element = cfg.get('USER', 'storage_element')
        on_store = storage_element == 'T3_US_FNALLPC'
    except NoOptionError:
        pass

    files = []
    
    if on_resilient:
        pfns = [crab_analysis_file_pfn(path) for path in glob.glob(os.path.join(working_dir, 'res/crab_fjr*xml'))]
        files = ['dcap://cmsdca3.fnal.gov:24145/pnfs/fnal.gov/usr/cms/WAX/resilient/' + pfn.split('/resilient/')[1] for pfn in pfns] # JMTBAD
    elif on_store:
        pfns = [crab_analysis_file_pfn(path) for path in glob.glob(os.path.join(working_dir, 'res/crab_fjr*xml'))]
        files = ['dcap://cmsdca3.fnal.gov:24145/pnfs/fnal.gov/usr/cms/WAX/11/' + pfn.split('/11/')[1] for pfn in pfns] # JMTBAD
    else:    
        files = glob.glob(os.path.join(working_dir, 'res/*root'))

    job_nums = defaultdict(lambda: defaultdict(list))
    for f in files:
        f_ = f.split('_')
        job, num = int(f_[-3]), int(f_[-2])
        job_nums[job][num].append(f)

    sexpected = set(xrange(1,expected+1))
    sjobs = set(job_nums)
    if sjobs != sexpected:
        print '\033[36;7m files found %r not what expected \033[m' % sorted(sjobs)

        missing = sorted(sexpected - sjobs)
        print '\033[36;7m     missing: %r \033[m' % missing

        to_drop = []
        for job, nums_and_fs in job_nums.iteritems():
            for num, fs in nums_and_fs.iteritems():
                if len(fs) > 1:
                    print '\033[36;7m     for job %i, more than one file with resub num %i *** will keep latest by mtime *** \033[m' % (job, num)
                    fs.sort(key=lambda f: os.stat(f).st_mtime)
                    good_f = fs[-1]
                    while len(fs) > 1:
                        bad_f = fs.pop(0)
                        to_drop.append((bad_f, good_f))
            if len(nums_and_fs) > 1:
                good_num = max(nums_and_fs)
                assert len(nums_and_fs[good_num]) == 1
                good_f = nums_and_fs[good_num][0]
                for num, fs in nums_and_fs.iteritems():
                    if num != good_num:
                        assert len(fs) == 1
                        to_drop.append((fs[0], good_f))

        for f, good_f in to_drop:
            print '\033[36;7m     dropping %s in favor of %s \033[m' % (f, good_f)
            files.remove(f)

    
    l = len(files)
    if l == 0:
        msg = 'crab_hadd: no files found in %s' % working_dir
        if raise_on_empty:
            raise CrabError(msg)
        else:
            print '\033[36;7m', msg, '\033[m'
    elif l == 1:
        print working_dir, ': just one file found, copying'
        cmd = '%scp %s %s' % ('dc' if 'dcap' in files[0] else '', files[0], new_name)
        os.system(cmd)
        os.chmod(new_name, 0644)
    else:
        hadd(new_name, files, chunk_size)
        
    return new_name
def crab_hadd(working_dir, new_name=None, new_dir=None, raise_on_empty=False, chunk_size=900, pattern=None, lpc_shortcut=False, range_filter=None):
    working_dir, new_name, new_dir = crab_hadd_args(working_dir, new_name, new_dir)
    expected, files = crab_hadd_files(working_dir, lpc_shortcut, range_filter=range_filter)
    print '%s: expecting %i files if all jobs succeeded' % (working_dir, expected)

    if pattern:
        if '/' not in pattern:
            pattern = '*/' + pattern
        files = fnmatch.filter(files, pattern)

    automatic_splitting = False
    pprinted = False
    jobs = []
    for f in files:
        jobnum = f.split('_')[-1].split('.root')[0]
        if crab_global_options.support_automatic_splitting and '-' in jobnum:
            automatic_splitting = True
            if not pprinted:
                pprint(files)
                pprinted = True
            it, jobnum = jobnum.split('-')
            it, jobnum = int(it), int(jobnum)
            assert it >= 1 # probe jobs "0-*" should not show up
            jobnum = it*10000 + jobnum
        else:
            jobnum = int(jobnum)
        jobs.append(jobnum)
    jobs.sort()
    expected = range(1, expected+1)

    if jobs != expected:
        print '\033[36;7m %i files found %s not what expected \033[m' % (len(jobs), crabify_list(jobs))
        missing = sorted(set(expected) - set(jobs))
        print '\033[36;7m    %i missing: %r \033[m' % (len(missing), ' '.join(str(j) for j in missing))

    l = len(files)
    if l == 0:
        msg = 'crab_hadd: no files found in %s' % working_dir
        if raise_on_empty:
            raise CRABToolsException(msg)
        else:
            print '\033[36;7m', msg, '\033[m'
    elif l == 1:
        print working_dir, ': just one file found, copying'
        cmd = 'xrdcp -s %s %s' % (files[0], new_name)
        os.system(cmd)
        os.chmod(new_name, 0644)
    else:
        hadd(new_name, files)

    if automatic_splitting:
        n = norm_from_file(new_name)
        sn, s = fn_to_sample(Samples, new_name)
        if not s:
            print colors.yellow("\tnorm_from_file returns %r, couldn't get sample %s" % (n, sn))
        else:
            no1, no2 = s.datasets['main'].nevents_orig, s.datasets['miniaod'].nevents_orig
            if n == no1 or n == no2:
                print '\tnorm_from_file returns nevents_orig = %i' % n
            else:
                print colors.yellow('\tnorm_from_file returns %r while %s.nevents_orig is %i (main) %i (miniaod' % (n, sn, no1, no2))

    return new_name