Example #1
0
def get_input_from_argv(process):
    from JMTucker.Tools.general import typed_from_argv

    # Look for just a list of files in argv first.
    def leave_alone(x):
        return x.startswith('/store') or x.startswith('root://')
    files = [x for x in sys.argv if leave_alone(x) and x.endswith('.root')]
    files += ['file:%s' % x for x in sys.argv if not leave_alone(x) and os.path.isfile(x) and x.endswith('.root')]

    if not files:
        # else, files from txt file
        list_fn = os.path.expanduser(typed_from_argv(str, name='list'))
        if list_fn is None:
            raise ValueError('no files to run on')
        files = [x.strip() for x in open(list_fn).read().split() if x.strip()]
    
    print 'Files to run over:', len(files)
    pprint(files)
    process.source.fileNames = files

    process.out.fileName = 'merged.root'
    out = typed_from_argv(str, name='out')
    if out is not None:
        process.out.fileName = os.path.expanduser(out)
    print 'Merging to', process.out.fileName.value()

    max_events = typed_from_argv(int, name='max_events')
    if max_events is not None:
        print 'Max events =', max_events
        process.maxEvents.input = max_events
    skip_events = typed_from_argv(int, name='skip_events')
    if skip_events is not None:
        print 'Skip events =', skip_events
        process.source.skipEvents = cms.untracked.uint32(skip_events)
Example #2
0
def main(samples_registry):
    import sys
    if 'merge' in sys.argv:
        samples = samples_registry.from_argv(from_root_fns=True)
        out_fn = [x for x in sys.argv if x.endswith('.root') and not os.path.isfile(x)]
        out_fn = out_fn[0] if out_fn else 'merge.root'
        norm_to = typed_from_argv(float, default_value=1.)
        norm_path = typed_from_argv(str, default_value='', name='norm_path')
        merge(samples, output=out_fn, norm_to=norm_to, norm_path=norm_path)
Example #3
0
import sys, os
from array import array
from JMTucker.Tools.general import typed_from_argv
from JMTucker.Tools.ROOTTools import *
from JMTucker.Tools import Samples
from JMTucker.Tools.Samples import *

version = '2017p8v4'
zoom = False #(0.98,1.005)
save_more = True
data_only = False
use_qcd = False
num_dir, den_dir = 'num', 'den'
#num_dir, den_dir = 'numjet6pt75', 'denjet6pt75'

which = typed_from_argv(int, 0)
data_period, int_lumi = [
    ('p8',101037.),
    ('',   41525.),
    ('B',   4794.),
    ('C',   9631.),
    ('D',   4248.),
    ('E',   9314.),
    ('F',  13538.),
    ('',   59512.),
    ('A',  14002.),
    ('B',   7091.),
    ('C',   6937.),
    ('D',  31482.),
    ][which]
year = 2017 if which < 7 else 2018
Example #4
0
import sys, os
from array import array
from JMTucker.Tools.general import typed_from_argv
from JMTucker.Tools.ROOTTools import *
from JMTucker.Tools import Samples
from JMTucker.Tools.Samples import *

version = '2017p8v4'
zoom = False  #(0.98,1.005)
save_more = True
data_only = False
use_qcd = False
num_dir, den_dir = 'num', 'den'
#num_dir, den_dir = 'numjet6pt75', 'denjet6pt75'

which = typed_from_argv(int, 0)
data_period, int_lumi = [
    ('p8', 101037.),
    ('', 41525.),
    ('B', 4794.),
    ('C', 9631.),
    ('D', 4248.),
    ('E', 9314.),
    ('F', 13538.),
    ('', 59512.),
    ('A', 14002.),
    ('B', 7091.),
    ('C', 6937.),
    ('D', 31482.),
][which]
year = 2017 if which < 7 else 2018
Example #5
0
#!/usr/bin/env python

import sys
from JMTucker.Tools.general import typed_from_argv
from JMTucker.Tools.MiniAOD_cfg import cms, pat_tuple_process
from JMTucker.Tools.CMSSWTools import *

# 3 magic lines, don't touch
is_mc = True
njets = 2
nbjets = 0

ints = typed_from_argv(int, default_value=[], return_multiple=True)
if len(ints) > 0:
    if len(ints) != 2:
        raise RuntimeError("if you put any ints there must be two")
    njets, nbjets = ints


process = pat_tuple_process(None, is_mc=is_mc)

tfileservice(process, "movedtree.root")
random_service(process, {"mfvVertices": 12179, "mfvMovedTracks": 13068})

if is_mc:
    process.mcStat.histos = True

process.patMuons.embedTrack = False
process.patElectrons.embedTrack = False

del process.outp
Example #6
0
    if len(sys.argv) < 4:
        print sys.argv
        usage()

    batch_name = sys.argv[1]
    datasets = Samples.registry.datasets_from_argv()
    if len(datasets) != 1:
        raise ValueError('expect exactly one dataset in argv')
    dataset = datasets[0]

    samples = [s for s in Samples.registry.from_argv() if s.has_dataset(dataset)]

    output_fn = [x for x in sys.argv[2:] if x.endswith('.root')]
    output_fn = output_fn[0] if output_fn else 'phadd.root'

    nfileses = typed_from_argv(int, return_multiple=True)
    d = len(samples) - len(nfileses)
    if d < 0:
        raise ValueError('too many nfiles %s for # samples %i' % (nfileses, len(samples)))
    elif d > 0:
        nfileses.extend([1]*d)

    for sample, nfiles in zip(samples, nfileses):
        sample.set_curr_dataset(dataset)
        sample.split_by = 'files'
        sample.files_per = int_ceil(len(sample.filenames), nfiles)
        sample.njobs = nfiles
        print 'sample %s dataset %s nfiles %i -> %i' % (sample.name, dataset, len(sample.filenames), nfiles)

    phadd(batch_name, dataset, samples, output_fn)
Example #7
0
def main(samples_registry):
    from glob import glob
    from sys import argv
    from pprint import pprint
    from JMTucker.Tools import colors

    if 'merge' in argv:
        samples = samples_registry.from_argv(from_root_fns=True, raise_if_none=True)
        out_fn = [x for x in argv if x.endswith('.root') and not os.path.isfile(x)]
        out_fn = out_fn[0] if out_fn else 'merge.root'
        norm_to = typed_from_argv(float, default_value=1.)
        norm_path = typed_from_argv(str, default_value='', name='norm_path')
        merge(samples, output=out_fn, norm_to=norm_to, norm_path=norm_path)

    elif 'printmissing' in argv:
        samples = [s.name for s in samples_registry.from_argv(raise_if_none=True)]
        samples.sort()
        look_for_root_files = 'no_root' not in sys.argv
        no_batch_dir, no_root_file = [], []
        for s in samples:
            if not os.path.isdir('condor_' + s) and not glob('crab_*_' + s):
                no_batch_dir.append(s)
            if not os.path.isfile('%s.root' % s):
                no_root_file.append(s)
        if no_batch_dir:
            print colors.yellow('no batch dir for these:')
            for s in no_batch_dir:
                print s
        if look_for_root_files and no_root_file:
            print colors.yellow('no root file for these:')
            for s in no_root_file:
                print s

    elif 'ds' in argv:
        samples = samples_registry.from_argv(raise_if_none=True)
        if len(samples) != 1:
            raise ValueError('must have exactly one sample in argv')
        sample = samples[0]
        dataset = argv[argv.index(sample.name)+1]
        if not sample.has_dataset(dataset):
            raise KeyError('no dataset %s in %s' % (dataset, sample))
        print sample.datasets[dataset].dataset

    elif 'file' in argv:
        samples = samples_registry.from_argv(raise_if_none=True)
        if len(samples) != 1:
            raise ValueError('must have exactly one sample in argv')
        sample = samples[0]
        dataset = argv[argv.index(sample.name)+1]
        if not sample.has_dataset(dataset):
            raise KeyError('no dataset %s in %s' % (dataset, sample))
        sample.set_curr_dataset(dataset)
        for x in sample.filenames[:typed_from_argv(int, 5)]:
            print x

    elif 'nevents' in argv:
        samples = samples_registry.from_argv(raise_if_none=True)
        if len(samples) != 1:
            raise ValueError('must have exactly one sample in argv')
        sample = samples[0]
        dataset = argv[argv.index(sample.name)+1]
        if not sample.has_dataset(dataset):
            raise KeyError('no dataset %s in %s' % (dataset, sample))
        sample.set_curr_dataset(dataset)
        print DBS.numevents_in_dataset(sample.dataset)

    elif 'site' in argv:
        samples = samples_registry.from_argv(raise_if_none=True)
        dataset = samples_registry.datasets_from_argv()
        if len(dataset) > 1:
            raise ValueError('only zero/one dataset allowed')
        dataset = dataset[0] if len(dataset) == 1 else 'main'
        mlen = max(len(s.name) for s in samples)
        for sample in samples:
            sample.set_curr_dataset(dataset)
            try:
                sites = DBS.sites_for_dataset(sample.dataset, json=True)
            except RuntimeError:
                print sample.name, 'PROBLEM'
                continue
            print sample.name.ljust(mlen+5),
            sites.sort(key=lambda x: x['name'])
            for site in sites:
                if DBS.site_is_tape(site):
                    continue
                is_complete = DBS.complete_at_site(site)
                print (colors.green if is_complete else colors.yellow)(DBS.site_completions_string(site)),

    elif 'samplefiles' in argv:
        # rm a; touch a; for ds in '' miniaod; do for x in qcd ttbar leptonic; do ( samples samplefiles ${x}_samples_2017 $ds >> a ) ; done; done
        # rm a; touch a; for ds in '' miniaod; do for year in 2017 2018; do for x in data auxiliary_data ; do ( samples samplefiles ${x}_samples_${year} $ds >> a ) ; done; done; done
        samples = samples_registry.from_argv(raise_if_none=True)
        dataset = 'main'
        for arg in argv[1:]:
            if arg == 'miniaod' or arg.startswith('ntuple'):
                dataset = arg
                break
        print 'getting files for dataset %s:' % dataset, ', '.join(s.name for s in samples)
        import SampleFiles as sf
        for s in samples:
            d = {}
            if not s.has_dataset(dataset):
                print colors.yellow('no dataset %s for %s' % (dataset, s.name))
                continue
            s.set_curr_dataset(dataset)
            if sf.has(s.name, dataset):
                raise KeyError('SampleFiles already has an entry for %s' % s.name)
            else:
                fns = s.filenames
                print 'DBS has %i files for %s' % (len(fns), s.name)
                d[(s.name, dataset)] = (len(fns), fns)
            print "('%s:%s', '%s')," % (s.name, dataset, sf._enc(d))
Example #8
0
def main(samples_registry):
    from glob import glob
    from sys import argv
    from pprint import pprint
    from JMTucker.Tools import colors
    from JMTucker.Tools.general import chunks, typed_from_argv

    samples = samples_registry.from_argv()
    datasets = samples_registry.datasets_from_argv()
    def prnt(*x):
        print ' '.join(str(y) for y in x)
    def runem(cb):
        for dataset in datasets:
            for sample in samples:
                if not sample.has_dataset(dataset):
                    print colors.yellow('no dataset %s for %s' % (dataset, sample.name))
                    continue
                sample.set_curr_dataset(dataset)
                cb(dataset, sample)

    if 'merge' in argv:
        samples = samples_registry.from_argv(from_root_fns=True, raise_if_none=True)
        out_fn = [x for x in argv if x.endswith('.root') and not os.path.isfile(x)]
        out_fn = out_fn[0] if out_fn else 'merge.root'
        norm_to = typed_from_argv(float, default_value=1.)
        norm_path = typed_from_argv(str, default_value='', name='norm_path')
        merge(samples, output=out_fn, norm_to=norm_to, norm_path=norm_path)

    elif 'printmissing' in argv:
        samples = [s.name for s in samples_registry.from_argv(raise_if_none=True)]
        samples.sort()
        look_for_root_files = 'no_root' not in sys.argv
        no_batch_dir, no_root_file = [], []
        for s in samples:
            if not os.path.isdir('condor_' + s) and not glob('crab_*_' + s):
                no_batch_dir.append(s)
            if not os.path.isfile('%s.root' % s):
                no_root_file.append(s)
        if no_batch_dir:
            print colors.yellow('no batch dir for these:')
            for s in no_batch_dir:
                print s
        if look_for_root_files and no_root_file:
            print colors.yellow('no root file for these:')
            for s in no_root_file:
                print s

    elif 'name' in argv:
        runem(lambda dataset, sample: prnt(sample.name, dataset))

    elif 'ds' in argv:
        runem(lambda dataset, sample: prnt(sample.name, dataset, sample.dataset))

    elif 'file' in argv:
        runem(lambda dataset, sample: [prnt(sample.name, dataset, x) for x in sample.filenames[:typed_from_argv(int, 5)]])

    elif 'nevents' in argv:
        runem(lambda dataset, sample: prnt(sample.name, dataset, DBS.numevents_in_dataset(sample.dataset)))

    elif 'files_for_events' in argv:
        rles = typed_from_argv(int, return_multiple=True)
        if len(rles) % 3 != 0:
            raise ValueError('expect list of ints in argv with length divisible by 3 [run1 lumi1 event1 ...]')
        rles = list(chunks(rles,3))
        runem(lambda dataset, sample: prnt(sample.name, dataset, ' '.join(DBS.files_for_events(rles, sample.dataset))))

    elif 'site' in argv:
        mlen = max(len(s.name) for s in samples)
        def cb(dataset, sample):
            ljname = sample.name.ljust(mlen+3)
            try:
                sites = DBS.sites_for_dataset(sample.dataset, json=True)
            except RuntimeError:
                print colors.boldred(ljname + ' DBS problem')
            else:
                print ljname,
                sites.sort(key=lambda x: x['name'])
                for site in sites:
                    if DBS.site_is_tape(site):
                        continue
                    is_complete = DBS.complete_at_site(site)
                    print (colors.green if is_complete else colors.yellow)(DBS.site_completions_string(site)), ' ',
                print
        runem(cb)

    elif 'samplefiles' in argv:
        import SampleFiles as sf
        def cb(dataset, sample):
            if sf.has(sample.name, dataset):
                raise KeyError('SampleFiles already has an entry for %s' % sample.name)
            fns = sample.filenames
            print 'DBS has %i files for %s' % (len(fns), sample.name)
            d = {(sample.name, dataset): (len(fns), fns)}
            print "('%s:%s', '%s')," % (sample.name, dataset, sf._enc(d))
        runem(cb)

    elif 'sfhas' in argv:
        neg = 'neg' in argv
        import SampleFiles as sf
        for dataset in datasets:
            for sample in samples:
                if sf.has(sample.name, dataset) != neg:
                    print sample.name
Example #9
0
#!/usr/bin/env python

from JMTucker.Tools.ROOTTools import *
from JMTucker.Tools.Sample import norm_from_file
from JMTucker.Tools.Year import year
from JMTucker.Tools.general import typed_from_argv, bool_from_argv
from JMTucker.Tools import Samples
import JMTucker.MFVNeutralino.AnalysisConstants as ac

year = typed_from_argv(int, year, name='year')
yearcheck = not bool_from_argv('noyearcheck')
nosig = bool_from_argv('nosig')
nodata = bool_from_argv('nodata')
nobkg = bool_from_argv('nobkg')
onlysig = bool_from_argv('onlysig')
onlydata = bool_from_argv('onlydata')
onlybkg = bool_from_argv('onlybkg')
sumbkg = not bool_from_argv('nosumbkg')
sumall = bool_from_argv('sumall')
sort = not bool_from_argv('nosort')

genmatch = bool_from_argv('genmatch')
minnjets = typed_from_argv(int, name='minnjets')
maxnjets = typed_from_argv(int, name='maxnjets')
minht = typed_from_argv(float, name='minht')
maxht = typed_from_argv(float, name='maxht')
mindbv = typed_from_argv(float, name='mindbv')
maxdbv = typed_from_argv(float, name='maxdbv')

which = typed_from_argv(int, -1)
ntks = ('mfvMiniTreeNtk3', 'mfvMiniTreeNtk4', 'mfvMiniTree')
#!/usr/bin/env python

import os, sys
from collections import defaultdict
from pprint import pprint
from JMTucker.Tools.DBS import files_in_dataset
from JMTucker.Tools.general import typed_from_argv

instance = typed_from_argv(int)
if instance is None:
    instance = 'global'

datasets = [x for x in sys.argv[1:] if x.count('/') == 3]

for dataset in datasets:
    d = defaultdict(list)
    files = files_in_dataset(dataset, instance)
    print dataset, 'has', len(files), 'files'
    for f in files:
        num = os.path.basename(f).split('_')[1]
        d[num].append(f)
    for k,v in d.iteritems():
        if len(v) > 1:
            print 'duplicate(s) for %s:' % k
            pprint(v)