def test_archive():
    # try some of the different __init__
    archive = dir_archive(cached=False)
    check_basic(archive)
    check_numpy(archive)
    #rmtree('memo')

    archive = dir_archive(cached=False, fast=True)
    check_basic(archive)
    check_numpy(archive)
    #rmtree('memo')

    archive = dir_archive(cached=False, compression=3)
    check_basic(archive)
    check_numpy(archive)
    #rmtree('memo')

    archive = dir_archive(cached=False, memmode='r+')
    check_basic(archive)
    check_numpy(archive)
    #rmtree('memo')

    archive = dir_archive(cached=False, serialized=False)
    check_basic(archive)
    #check_numpy(archive) #FIXME: see issue #53
    rmtree('memo')
Example #2
0
def draw_skeleton(path_GT,path_pred,path_visual,name,MPII):
# =============================================================================
#     Draw skeleton based on model predictions
# =============================================================================
    
    prediction=klepto.dir_archive(path_pred,cached=False)
    prediction.load()
    
    archive=klepto.dir_archive(path_GT,cached=False)
    archive.load()
    
    img=archive[name]['img'].astype('uint8')
    heatmap=prediction[name]
    
    # define connections between joints for each dataset
    if MPII:
        lines = [(0,1),(1,2),(2,6),(6,3),(3,4),(4,5),(6,7),(7,8),(8,9),(10,11),(11,12),(12,7),(7,13),(13,14),(14,15)]
    else:
        lines = [(0,1),(1,2),(3,4),(4,5),(6,7),(7,8),(8,9),(9,10),(10,11),(2,8),(3,9),(12,13)]
    coords = dict(enumerate(list(rescale_joint_coords(heatmap))))
    for points in lines:
        if coords[points[0]]==(0,0) or coords[points[1]]==(0,0): continue
        else:
            cv2.line(img, coords[points[0]], coords[points[1]], (rand(0,255),rand(0,255),rand(0,255)), thickness=2, lineType=8)
    plt.imshow(img)
    plt.imsave(path_visual+'Skeleton.png',img)
Example #3
0
def test_archive():
    # try some of the different __init__
    archive = dir_archive(cached=False)
    check_basic(archive)
    check_numpy(archive)
    #rmtree('memo')

    archive = dir_archive(cached=False,fast=True)
    check_basic(archive)
    check_numpy(archive)
    #rmtree('memo')

    archive = dir_archive(cached=False,compression=3)
    check_basic(archive)
    check_numpy(archive)
    #rmtree('memo')

    archive = dir_archive(cached=False,memmode='r+')
    check_basic(archive)
    check_numpy(archive)
    #rmtree('memo')

    archive = dir_archive(cached=False,serialized=False)
    check_basic(archive)
    check_numpy(archive)
    rmtree('memo')
Example #4
0
def prediction(path_GT,path_pred,mymodel):
# =============================================================================
#     Compute Prediction of image
# =============================================================================
    
    prediction=klepto.dir_archive(path_pred,{},cached=False)

    archive= klepto.dir_archive(path_GT,cached=False)
    archive.load()

    for name in archive.keys():
        img=archive[name]['img'].reshape(1,w_pic,h_pic,3)
        predict_heat=mymodel.predict(img/255)
        prediction[name]=predict_heat
Example #5
0
def PCK(path_GT,path_pred,njoints):
# =============================================================================
#     Compute the PCK metric
# =============================================================================
    prediction_set = klepto.dir_archive(path_pred,cached=False)
    prediction_set.load()

    gt_maps = klepto.dir_archive(path_GT,cached=False)
    gt_maps.load()

    accuracy=[0]*njoints
    for name in prediction_set.keys():
        accuracy=accuracy_pred(prediction_set[name], gt_maps[name]['joints'],accuracy)
    return np.array(accuracy)/len(prediction_set)
Example #6
0
def test_foo():
    # start fresh
    rmtree('foo', ignore_errors=True)

    d = dir_archive('foo', cached=False)
    key = '1234TESTMETESTMETESTME1234'
    d._mkdir(key)
    #XXX: repeat mkdir does nothing, should it clear?  I think not.
    _dir = d._mkdir(key)
    assert d._getdir(key) == _dir
    d._rmdir(key)

    # with _pickle
    x = [1,2,3,4,5]
    d._fast = True
    d[key] = x
    assert d[key] == x
    d._rmdir(key)

    # with dill
    d._fast = False
    d[key] = x
    assert d[key] == x
    d._rmdir(key)

    # with import
    d._serialized = False
    d[key] = x
    assert d[key] == x
    d._rmdir(key)
    d._serialized = True

    try: 
        import numpy as np
        y = np.array(x)

        # with _pickle
        d._fast = True
        d[key] = y
        assert all(d[key] == y)
        d._rmdir(key)

        # with dill
        d._fast = False
        d[key] = y
        assert all(d[key] == y)
        d._rmdir(key)

        # with import
        d._serialized = False
        d[key] = y
        assert all(d[key] == y)
        d._rmdir(key)
        d._serialized = True

    except ImportError:
        pass

    # clean up
    rmtree('foo')
Example #7
0
def train_data_generator(path,
                         batch_size,
                         inres=(h_pic, w_pic),
                         outres=(h_heat, w_heat)):
    # =============================================================================
    #     Create data generator
    # =============================================================================
    archive_train = klepto.dir_archive(path, cached=False)
    archive_train.load()
    all_images = np.array(list(archive_train.keys()))
    size = len(all_images)

    while True:

        # take random images
        names = np.random.permutation(list(archive_train.keys()))
        num_of_batches = size // batch_size

        for im in range(num_of_batches):
            gt_stack = np.zeros(shape=(batch_size, outres[0], outres[1],
                                       nOutput))
            img_stack = np.zeros(shape=(batch_size, inres[0], inres[1], 3))

            selected_photo_names = names[im * batch_size:(im + 1) * batch_size]

            for j in range(len(selected_photo_names)):

                gt_stack[j, :, :, :] = np.transpose(
                    np.array(archive_train[selected_photo_names[j]]['joints']),
                    (1, 2, 0))
                img_stack[j, :, :, :] = archive_train[
                    selected_photo_names[j]]['img'] / 255.

            yield (img_stack, gt_stack)
Example #8
0
def main():
    args = parse_commandline()

    klepto = True
    if klepto:
        acc = dir_archive(args.inpath,
                          serialized=True,
                          compression=0,
                          memsize=1e3)
        acc.load('recoil')
        acc.load('mjj')
        acc.load('sumw')
        acc.load('sumw_pileup')
        acc.load('nevents')
    else:
        acc = acc_from_dir(args.inpath)

    outdir = pjoin('./output/', os.path.basename(args.inpath))

    if args.channel == 'monojet':
        from legacy_monojet import legacy_limit_input_monojet
        legacy_limit_input_monojet(acc, outdir=outdir)
    elif args.channel == 'monov':
        from legacy_monov import legacy_limit_input_monov
        legacy_limit_input_monov(acc, outdir=outdir)
    elif args.channel == 'vbfhinv':
        from legacy_vbf import legacy_limit_input_vbf
        legacy_limit_input_vbf(acc, outdir=outdir)
Example #9
0
def main():
    args = parse_commandline()

    klepto = True
    if klepto:
        acc = dir_archive(args.inpath, serialized=True, compression=0, memsize=1e3)
        acc.load('recoil')
        acc.load('mjj')
        acc.load('sumw')
        acc.load('sumw_pileup')
        acc.load('nevents')
    else:
        acc = acc_from_dir(args.inpath)

    args.outdir = pjoin('./output/',list(filter(lambda x:x,args.inpath.split('/')))[-1])
    for channel in args.channel.split(','):
        print(channel)
        if channel == 'monojet':
            from legacy_monojet import legacy_limit_input_monojet
            legacy_limit_input_monojet(acc, args)
        elif channel == 'monov':
            from legacy_monov import legacy_limit_input_monov
            legacy_limit_input_monov(acc, args)
        elif channel == 'vbfhinv':
            from legacy_vbf import legacy_limit_input_vbf
            legacy_limit_input_vbf(acc, outdir=args.outdir, unblind=args.unblind)
Example #10
0
def main():
    inpath = sys.argv[1]
    #acc = acc_from_dir("./input/2019-10-07_das_lhevpt_dressed_v1")
    
    acc = dir_archive(
                      inpath,
                      serialized=True,
                      compression=0,
                      memsize=1e3
                      )
    acc.load('sumw')
    acc.load('sumw2')


    outputrootfile = uproot.recreate(f'2017_gen_v_pt_qcd_sf.root')
    sf_1d(acc, tag='wjet', regex='W.*',outputrootfile=outputrootfile)
    sf_1d(acc, tag='dy', regex='.*DY.*',outputrootfile=outputrootfile)
    # # outputrootfile = uproot.recreate(f'test.root')
    sf_2d(acc, tag='wjet', regex='W.*',pt_type='dress',outputrootfile=outputrootfile)
    sf_2d(acc, tag='dy', regex='.*DY.*',pt_type='dress',outputrootfile=outputrootfile)

    sf_1d(acc, tag='gjets', regex='G\d?Jet.*',outputrootfile=outputrootfile)
    # outputrootfile = uproot.recreate('test.root')

    sf_2d(acc, tag='gjets',regex='G\d?Jet.*',pt_type='stat1',outputrootfile=outputrootfile)
def test_foo():
    # start fresh
    rmtree('foo', ignore_errors=True)

    d = dir_archive('foo', cached=False)
    key = '1234TESTMETESTMETESTME1234'
    d._mkdir(key)
    #XXX: repeat mkdir does nothing, should it clear?  I think not.
    _dir = d._mkdir(key)
    assert d._getdir(key) == _dir
    d._rmdir(key)

    # with _pickle
    x = [1, 2, 3, 4, 5]
    d._fast = True
    d[key] = x
    assert d[key] == x
    d._rmdir(key)

    # with dill
    d._fast = False
    d[key] = x
    assert d[key] == x
    d._rmdir(key)

    # with import
    d._serialized = False
    d[key] = x
    assert d[key] == x
    d._rmdir(key)
    d._serialized = True

    try:
        import numpy as np
        y = np.array(x)

        # with _pickle
        d._fast = True
        d[key] = y
        assert all(d[key] == y)
        d._rmdir(key)

        # with dill
        d._fast = False
        d[key] = y
        assert all(d[key] == y)
        d._rmdir(key)

        # with import
        d._serialized = False
        d[key] = y
        assert all(d[key] == y)
        d._rmdir(key)
        d._serialized = True

    except ImportError:
        pass

    # clean up
    rmtree('foo')
def load_big_data(fpath, fname):
    """
    https://stackoverflow.com/questions/17513036/pickle-dump-huge-file-without-memory-error
    """
    arch = dir_archive(fpath + fname, cached=False, serialized=True)
    arch.load(fname)
    return arch[fname]
def save_big_data(fpath, fname, data):
    """
    https://stackoverflow.com/questions/17513036/pickle-dump-huge-file-without-memory-error
    """
    arch = dir_archive(fpath + fname, cached=False, serialized=True)
    arch[fname] = data
    # # dump from memory cache to the on-disk archive
    arch.dump()
Example #14
0
    def __init__(self, path):

        # store information
        self.path = path
        self.arch = archives.dir_archive(self.path)

        # load new data in archive file
        self.arch.load()
Example #15
0
def klepto_load(inpath):
    acc = dir_archive(
        inpath,
        serialized=True,
        compression=0,
        memsize=1e3,
    )
    return acc
Example #16
0
def main():
    inpath_vbf = rebsmear_path(
        'submission/vbfhinv/merged_2021-06-11_vbfhinv_ULv8_05Feb21_rebsmear_CR'
    )
    inpath_rs = rebsmear_path(
        'submission/merged_2021-06-11_rebsmear_privatePS')

    acc_vbf = dir_archive(inpath_vbf)
    acc_vbf.load('sumw')
    acc_vbf.load('sumw_pileup')
    acc_vbf.load('nevents')

    h_qcd = extract_yields_in_cr(acc_vbf, distribution='mjj')

    # Rebalance and smear output
    acc_rs = dir_archive(inpath_rs)

    plot_rebsmear_prediction(acc_rs, h_qcd)
Example #17
0
def klepto_load(loc):
    '''
    for loading the dumped dictionarys
    :return: loaded dictionary
    '''

    dic = dir_archive(loc, {}, serialized=True)
    dic.load()
    print('dictionary loaded')
    return dic
Example #18
0
def read_archive(filename, axis=None): #NOTE: could return iterators
    """read 'parameters' and 'cost' from klepto.dir_archive

    Inputs:
      filename: str path to location of klepto.archives.dir_archive
      axis: int, the desired index the tuple-valued dataset [0,N]
    """
    from klepto.archives import dir_archive
    arch = dir_archive(filename, cached=True)
    return for_monitor(arch, axis=axis)
Example #19
0
def klepto_dump(merged_dict, loc):
    '''
    to dump the merged dictionary file
    :param merged_dict: the final merged dictionary obtained
    :return: None
    '''

    demo = dir_archive(loc, merged_dict, serialized =True)
    demo.dump()
    del demo
Example #20
0
def met_trigger_eff(distribution):
    if distribution == 'mjj':
        tag = '120pfht_mu_mjj'
    elif distribution == 'recoil':
        tag = '120pfht_mu_recoil'
        indir = '/afs/cern.ch/user/a/aakpinar/bucoffea/bucoffea/submission/2019-11-13_vbf_trigger_recoil'

    acc = dir_archive(indir, serialized=True, compression=0, memsize=1e3)

    # Pre-load neccessary information
    acc.load('recoil')
    acc.load('sumw')
    acc.load('sumw2')

    for year in [2017, 2018]:
        for jeteta_config in [
                'two_central_jets', 'two_forward_jets',
                'one_jet_forward_one_jet_central'
        ]:
            # Single muon CR
            region_tag = '1m'
            for dataset in ['WJetsToLNu_HT_MLM', 'SingleMuon']:
                plot_recoil(acc,
                            region_tag=region_tag,
                            distribution=distribution,
                            axis_name=distribution,
                            dataset=dataset,
                            year=year,
                            tag=tag,
                            jeteta_config=jeteta_config,
                            output_format='pdf')
            # Double muon CR
            region_tag = '2m'
            for dataset in ['VDYJetsToLL_M-50_HT_MLM', 'SingleMuon']:
                plot_recoil(acc,
                            region_tag=region_tag,
                            distribution=distribution,
                            axis_name=distribution,
                            dataset=dataset,
                            year=year,
                            tag=tag,
                            jeteta_config=jeteta_config,
                            output_format='pdf')

    for jeteta_config in [
            'two_central_jets', 'two_forward_jets',
            'one_jet_forward_one_jet_central'
    ]:
        data_mc_comparison_plot(tag,
                                distribution=distribution,
                                jeteta_config=jeteta_config,
                                output_format='pdf')

    plot_scalefactors(tag, distribution=distribution)
Example #21
0
def main():
    inpath = sys.argv[1]

    acc = dir_archive(inpath, serialized=True, memsize=1e3, compression=0)

    acc.load('sumw')
    acc.load('sumw2')

    plot_ht_dist(acc, regex='WJetsToLNu.*(2017|2018)', tag='wjets')
    plot_ht_dist(acc, regex='DYJets.*(2017|2018)', tag='dy')
    plot_ht_dist(acc, regex='GJets_HT.*(2017)', tag='gjets_17')
    plot_ht_dist(acc, regex='GJets_DR-0p4.*(2017)', tag='gjets_dr_17')
Example #22
0
def main():
    inpath = sys.argv[1]

    acc = dir_archive(inpath, serialized=True, compression=0, memsize=1e3)

    acc.load('sumw')
    acc.load('sumw2')

    # Create the output ROOT file to save the
    # PDF uncertainties as a function of v-pt
    outputrootpath = './output/theory_variations/rootfiles'
    if not os.path.exists(outputrootpath):
        os.makedirs(outputrootpath)

    outputrootfile_z_over_w = uproot.recreate(
        pjoin(outputrootpath, 'zoverw_pdf_unc.root'))
    outputrootfile_g_over_z = uproot.recreate(
        pjoin(outputrootpath, 'goverz_pdf_unc.root'))

    w_nom, w_unc, vpt_edges, vpt_centers = get_pdf_uncertainty(
        acc, regex='WNJetsToLNu.*', tag='wjet')
    dy_nom, dy_unc, vpt_edges, vpt_centers = get_pdf_uncertainty(
        acc, regex='DYNJetsToLL.*', tag='dy')
    gjets_nom, gjets_unc, vpt_edges, vpt_centers = get_pdf_uncertainty(
        acc, regex='G1Jet.*', tag='gjets')

    data_for_ratio = {
        'z_over_w': {
            'noms': (dy_nom, w_nom),
            'uncs': (dy_unc, w_unc),
            'rootfile': outputrootfile_z_over_w
        },
        'g_over_z': {
            'noms': (gjets_nom, dy_nom),
            'uncs': (gjets_unc, dy_unc),
            'rootfile': outputrootfile_g_over_z
        },
    }

    for tag, entry in data_for_ratio.items():
        noms = entry['noms']
        uncs = entry['uncs']
        plot_ratio(noms=noms,
                   uncs=uncs,
                   tag=tag,
                   vpt_edges=vpt_edges,
                   vpt_centers=vpt_centers,
                   outputrootfile=entry['rootfile'])
Example #23
0
def _load_and_sum(args):
    """
    merge item from list of coffea files and dump it to file

    For each file, the saved item corresponding to the
    same key is read out. The sum of the individual
    items for the individual files is dumped.

    :param args: Tuple (key to use, file list, output name)
    :type args: tuple
    :return: 0
    :rtype: int
    """

    # Args is a tuple for easy multiprocessing
    key, files, outname = args

    # Load the individual items
    items = []
    for fn in files:
        try:
            items.append(load(fn)[key])
        except KeyError:
            continue
    
    # Recursive merging
    while len(items) > 1:
        x = items.pop(0)
        y = items.pop(0)
        s = x + y
        items.append(s)
    
    assert(len(items)==1)
    
    # dump the content using klepto
    arc = dir_archive(
                    outname,
                    serialized=True,
                    compression=0,
                    memsize=1e3,
                    )
    arc[key] = items[0]
    arc.dump(key)
    arc.clear()
    return 0
Example #24
0
def main():
    inpath = "../../input/merged"
    year = 2017
    mc = re.compile(
        f'(VDY.*HT.*|QCD.*|W.*HT.*|ST_|TTJets-FXFX_|Diboson_|GJets.*HT.*|ZJetsToNuNu.*){year}'
    )
    signal = re.compile(f'WH.*{year}')
    distribution = "recoil"
    acc = dir_archive(
        inpath,
        serialized=True,
        compression=0,
        memsize=1e3,
    )
    acc.load(distribution)
    acc.load('sumw')
    acc.load('sumw_pileup')
    acc.load('nevents')
    try:
        acc[distribution] = merge_extensions(
            acc[distribution], acc, reweight_pu=not ('nopu' in distribution))
        scale_xs_lumi(acc[distribution])
        acc[distribution] = merge_datasets(acc[distribution])
        S_over_B(acc,
                 distribution,
                 'sr_tight_v',
                 mc=mc,
                 signal=signal,
                 unc=0.05,
                 outname="SB_unc005.png",
                 cutlim=(250, 750))
        S_over_B(acc,
                 distribution,
                 'sr_tight_v',
                 mc=mc,
                 signal=signal,
                 unc=0.10,
                 outname="SB_unc010.png",
                 cutlim=(250, 750))
    except KeyError:
        print("key error ")
        return -2
Example #25
0
def main():
    inpath = sys.argv[1]
    acc = dir_archive(inpath)
    cfname = 'cutflow_sr_vbf'
    acc.load(cfname)

    cf = acc[cfname]

    outtag = re.findall('merged_.*', inpath)[0].replace('/', '')

    datasets = list(cf.keys())
    cuts = cf[datasets[0]].keys()

    combined_cf = Counter({cut: 0 for cut in cuts})

    for d in datasets:
        cutflow = Counter(cf[d])
        combined_cf += cutflow

    pcutflow = []
    for idx, (c, v) in enumerate(combined_cf.items()):
        if idx == 0:
            acc = 100
        else:
            acc = v / list(combined_cf.values())[idx - 1] * 100
        pcutflow.append([c, v, acc])

    outdir = f'./output/{outtag}'
    if not os.path.exists(outdir):
        os.makedirs(outdir)
    outpath = pjoin(outdir, 'cutflow.txt')
    with open(outpath, 'w+') as f:
        f.write(outtag)
        f.write('\n')

        f.write(
            tabulate(pcutflow,
                     headers=['Cut', 'Number of Events', 'Acceptance (%)'],
                     floatfmt=[".0f", ".0f", ".3f"]))

    print(f'File saved: {outpath}')
Example #26
0
def main():
    args = parse_cli()
    # Path to the directory containing list of ROOT input files (R&S trees)
    inpath = args.inpath
    acc = dir_archive(inpath)

    acc.load('sumw')
    acc.load('sumw2')

    try:
        outtag = re.findall('merged_.*', inpath)[0]
    except KeyError:
        raise RuntimeError(f'Check the naming of input: {os.path.basename(inpath)}')

    outdir = f'./output/{outtag}'
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    distributions = BINNINGS.keys()

    regions = [
        'inclusive',
        'sr_vbf',
        'cr_vbf_qcd'
    ]
    
    for region in regions:
        if not re.match(args.region, region):
            continue
        for distribution in distributions:
            if not re.match(args.distribution, distribution):
                continue
            
            make_plot(acc, 
                outdir=outdir, 
                distribution=distribution,
                region=region,
                dataset='JetHT',
                years=args.years
            )
def main():

    overwrite = True

    # load the config and the cache
    cfg = loadConfig()

    # Inputs are defined in a dictionary
    # dataset : list of files
    fileset = {
        'tW_scattering': glob.glob("/hadoop/cms/store/user/dspitzba/nanoAOD/ttw_samples/0p1p2/tW_scattering__nanoAOD/merged/*.root"),
        "TTW":           glob.glob("/hadoop/cms/store/user/dspitzba/nanoAOD/ttw_samples/0p1p2/TTWJetsToLNu_TuneCP5_13TeV-amcatnloFXFX-madspin-pythia8__RunIIAutumn18NanoAODv6-Nano25Oct2019_102X_upgrade2018_realistic_v20_ext1-v1/merged/*.root") \
                        + glob.glob("/hadoop/cms/store/user/dspitzba/nanoAOD/ttw_samples/0p1p2/TTWJetsToQQ_TuneCP5_13TeV-amcatnloFXFX-madspin-pythia8__RunIIAutumn18NanoAODv6-Nano25Oct2019_102X_upgrade2018_realistic_v20-v1/merged/*.root"),
        #        "ttbar":        glob.glob("/hadoop/cms/store/user/dspitzba/nanoAOD/ttw_samples/0p1p3/TTJets_SingleLeptFromT_TuneCP5_13TeV-madgraphMLM-pythia8__RunIIAutumn18NanoAODv6-Nano25Oct2019_102X_upgrade2018_realistic_v20-v1/*.root") # adding this is still surprisingly fast (20GB file!)
        "ttbar": glob.glob("/hadoop/cms/store/user/dspitzba/nanoAOD/ttw_samples/0p1p3/TTJets_SingleLeptFromTbar_TuneCP5_13TeV-madgraphMLM-pythia8__RunIIAutumn18NanoAODv6-Nano25Oct2019_102X_upgrade2018_realistic_v20-v1/merged/*.root")
    }

    # histograms
    histograms = [
        "MET_pt", "Jet_pt", "Jet_eta", "Jet_pt_fwd", "W_pt_notFromTop",
        "GenJet_pt_fwd", "Spectator_pt", "Spectator_eta"
    ]
    histograms += [
        "Top_pt", "Top_eta", "Antitop_pt", "Antitop_eta", "W_pt", "W_eta",
        "N_b", "N_jet", "dijet_mass", "dijet_mass_bestW", "dijet_mass_secondW",
        "digenjet_mass", "dijet_deltaR"
    ]

    # initialize cache
    cache = dir_archive(os.path.join(os.path.expandvars(cfg['caches']['base']),
                                     cfg['caches']['simpleProcessor']),
                        serialized=True)
    if not overwrite:
        cache.load()

    if cfg == cache.get('cfg') and histograms == cache.get(
            'histograms') and fileset == cache.get('fileset') and cache.get(
                'simple_output'):
        output = cache.get('simple_output')

    else:
        # Run the processor
        output = processor.run_uproot_job(
            fileset,
            treename='Events',
            processor_instance=exampleProcessor(),
            executor=processor.futures_executor,
            executor_args={
                'workers': 1,
                'function_args': {
                    'flatten': False
                }
            },
            chunksize=500000,
        )
        cache['fileset'] = fileset
        cache['cfg'] = cfg
        cache['histograms'] = histograms
        cache['simple_output'] = output
        cache.dump()

    # Make a few plots
    outdir = "./tmp_plots"
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    for name in histograms:
        print(name)
        histogram = output[name]
        if name == 'MET_pt':
            # rebin
            new_met_bins = hist.Bin('pt', r'$E_T^{miss} \ (GeV)$', 20, 0, 200)
            histogram = histogram.rebin('pt', new_met_bins)
        if name == 'W_pt_notFromTop':
            # rebin
            new_pt_bins = hist.Bin('pt', r'$p_{T}(W) \ (GeV)$', 25, 0, 500)
            histogram = histogram.rebin('pt', new_pt_bins)

        ax = hist.plot1d(
            histogram, overlay="dataset", density=False, stack=True
        )  # make density plots because we don't care about x-sec differences
        ax.set_yscale('linear')  # can be log
        #ax.set_ylim(0,0.1)
        ax.figure.savefig(os.path.join(outdir, "{}.pdf".format(name)))
        ax.clear()

        ax = hist.plot1d(
            histogram, overlay="dataset", density=True, stack=False
        )  # make density plots because we don't care about x-sec differences
        ax.set_yscale('linear')  # can be log
        #ax.set_ylim(0,0.1)
        ax.figure.savefig(os.path.join(outdir, "{}_shape.pdf".format(name)))
        ax.clear()

    return output
Example #28
0
def plot(inpath):
    indir = os.path.abspath(inpath)

    # The processor output is stored in an
    # 'accumulator', which in our case is
    # just a dictionary holding all the histograms
    # Put all your *coffea files into 'indir' and
    # pass the directory as an argument here.
    # All input files in the directory will
    # automatically be found, merged and read.
    # The merging only happens the first time
    # you run over a specific set of inputs.
    acc = dir_archive(inpath, serialized=True, compression=0, memsize=1e3)
    # Get a settings dictionary that details
    # which plots to make for each region,
    # what the axis limits are, etc
    # Can add plots by extending the dictionary
    # Or modify axes ranges, etc
    settings = plot_settings()

    merged = set()

    # Separate plots per year
    for year in [2017, 2018]:
        # The data to be used for each region
        # Muon regions use MET,
        # electron+photon regions use EGamma
        # ( EGamma = SingleElectron+SinglePhoton for 2017)
        data = {
            'sr_vbf': None,
            'cr_1m_vbf': f'MET_{year}',
            'cr_2m_vbf': f'MET_{year}',
            'cr_1e_vbf': f'EGamma_{year}',
            'cr_2e_vbf': f'EGamma_{year}',
            'cr_g_vbf': f'EGamma_{year}',
        }

        # Same for MC selection
        # Match datasets by regular expressions
        # Here for LO V samples (HT binned)
        mc_lo = {
            'sr_vbf':
            re.compile(
                f'(ZJetsToNuNu.*|EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*|.*WJetsToLNu.*HT.*).*{year}'
            ),
            'cr_1m_vbf':
            re.compile(
                f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*|.*WJetsToLNu.*HT.*).*{year}'
            ),
            'cr_1e_vbf':
            re.compile(
                f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*|.*WJetsToLNu.*HT.*).*{year}'
            ),
            'cr_2m_vbf':
            re.compile(
                f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*).*{year}'
            ),
            'cr_2e_vbf':
            re.compile(
                f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*).*{year}'
            ),
            'cr_g_vbf':
            re.compile(f'(GJets_(HT|SM).*|QCD_HT.*|WJetsToLNu.*HT.*).*{year}'),
        }

        # Want to compare LO and NLO,
        # so do same thing for NLO V samples
        # All non-V samples remain the same
        mc_nlo = {
            'sr_vbf':
            re.compile(
                f'(ZJetsToNuNu.*|EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*|.*WJetsToLNu.*FXFX.*).*{year}'
            ),
            'cr_1m_vbf':
            re.compile(
                f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DY.*FXFX.*|.*WJetsToLNu.*FXFX.*).*{year}'
            ),
            'cr_1e_vbf':
            re.compile(
                f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DY.*FXFX.*|.*WJetsToLNu.*FXFX.*).*{year}'
            ),
            'cr_2m_vbf':
            re.compile(
                f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DY.*FXFX.*).*{year}'
            ),
            'cr_2e_vbf':
            re.compile(
                f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DY.*FXFX.*).*{year}'
            ),
            'cr_g_vbf':
            re.compile(f'(GJets_(HT|SM).*|QCD_HT.*|W.*FXFX.*).*{year}'),
        }

        regions = list(mc_lo.keys())
        # Remove signal region, no need in ratio plots
        regions.remove('sr_vbf')

        # Make control region ratio plots for both
        # LO and NLO. Can be skipped if you only
        # want data / MC agreement plots.
        outdir = f'./output/{os.path.basename(indir)}/ratios'

        # Load ingredients from cache
        acc.load('mjj')
        acc.load('sumw')
        acc.load('sumw_pileup')
        acc.load('nevents')
        cr_ratio_plot(acc,
                      year=year,
                      tag='losf',
                      outdir=outdir,
                      mc=mc_lo,
                      regions=regions,
                      distribution='mjj')
        cr_ratio_plot(acc,
                      year=year,
                      tag='nlo',
                      outdir=outdir,
                      mc=mc_nlo,
                      regions=regions,
                      distribution='mjj')

        # Data / MC plots are made here
        # Loop over all regions
        for region in mc_lo.keys():
            ratio = True if region != 'sr_vbf' else False
            # Make separate output direcotry for each region
            outdir = f'./output/{os.path.basename(indir)}/{region}'
            # Settings for this region
            plotset = settings[region]

            # Loop over the distributions
            for distribution in plotset.keys():
                # Load from cache
                if not distribution in merged:
                    acc.load(distribution)

                    if not distribution in acc.keys():
                        print(
                            f"WARNING: Distribution {distribution} not found in input files."
                        )
                        continue
                    acc[distribution] = merge_extensions(
                        acc[distribution],
                        acc,
                        reweight_pu=not ('nopu' in distribution))
                    scale_xs_lumi(acc[distribution])
                    acc[distribution] = merge_datasets(acc[distribution])
                    acc[distribution].axis('dataset').sorting = 'integral'
                    merged.add(distribution)
                try:
                    # The heavy lifting of making a plot is hidden
                    # in make_plot. We call it once using the LO MC
                    make_plot(
                        acc,
                        region=region,
                        distribution=distribution,
                        year=year,
                        data=data[region],
                        mc=mc_lo[region],
                        ylim=plotset[distribution].get('ylim', None),
                        xlim=plotset[distribution].get('xlim', None),
                        tag='losf',
                        outdir=f'./output/{os.path.basename(indir)}/{region}',
                        output_format='pdf',
                        ratio=ratio)

                    # And then we also call it for the NLO MC
                    # The output files will be named according to the 'tag'
                    # argument, so we  will be able to tell them apart.
                    make_plot(
                        acc,
                        region=region,
                        distribution=distribution,
                        year=year,
                        data=data[region],
                        mc=mc_nlo[region],
                        ylim=plotset[distribution].get('ylim', None),
                        xlim=plotset[distribution].get('xlim', None),
                        tag='nlo',
                        outdir=f'./output/{os.path.basename(indir)}/{region}',
                        output_format='pdf',
                        ratio=ratio)

                except KeyError:
                    continue
Example #29
0
def plot(args):
    indir = os.path.abspath(args.inpath)

    # The processor output is stored in an
    # 'accumulator', which in our case is
    # just a dictionary holding all the histograms
    # Put all your *coffea files into 'indir' and
    # pass the directory as an argument here.
    # All input files in the directory will
    # automatically be found, merged and read.
    # The merging only happens the first time
    # you run over a specific set of inputs.
    acc = dir_archive(args.inpath, serialized=True, compression=0, memsize=1e3)
    # Get a settings dictionary that details
    # which plots to make for each region,
    # what the axis limits are, etc
    # Can add plots by extending the dictionary
    # Or modify axes ranges, etc
    settings = plot_settings()

    merged = set()

    # Separate plots per year
    for year in [2017, 2018]:
        # The data to be used for each region
        # Muon regions use MET,
        # electron+photon regions use EGamma
        # ( EGamma = SingleElectron+SinglePhoton for 2017)
        data = {
            'sr_vbf': f'MET_{year}',
            'cr_1m_vbf': f'MET_{year}',
            'cr_2m_vbf': f'MET_{year}',
            'cr_1e_vbf': f'EGamma_{year}',
            'cr_2e_vbf': f'EGamma_{year}',
            'cr_g_vbf': f'EGamma_{year}',
        }

        # Same for MC selection
        # Match datasets by regular expressions
        # Here for LO V samples (HT binned)
        mc_lo = {
            'sr_vbf':
            re.compile(
                f'(ZJetsToNuNu.*|EW.*|Top_FXFX.*|Diboson.*|.*DYJetsToLL_M-50_HT_MLM.*|.*WJetsToLNu.*HT.*).*{year}'
            ),
            'cr_1m_vbf':
            re.compile(
                f'(EWKW.*|Top_FXFX.*|Diboson.*|.*DYJetsToLL_M-50_HT_MLM.*|.*WJetsToLNu.*HT.*).*{year}'
            ),
            'cr_1e_vbf':
            re.compile(
                f'(EWKW.*|Top_FXFX.*|Diboson.*|.*DYJetsToLL_M-50_HT_MLM.*|.*WJetsToLNu.*HT.*).*{year}'
            ),
            'cr_2m_vbf':
            re.compile(
                f'(EWKZ.*ZToLL.*|Top_FXFX.*|Diboson.*|.*DYJetsToLL_M-50_HT_MLM.*).*{year}'
            ),
            'cr_2e_vbf':
            re.compile(
                f'(EWKZ.*ZToLL.*|Top_FXFX.*|Diboson.*|.*DYJetsToLL_M-50_HT_MLM.*).*{year}'
            ),
            'cr_g_vbf':
            re.compile(
                f'(GJets_(DR-0p4|SM).*|QCD_data.*|WJetsToLNu.*HT.*).*{year}'),
        }

        # Load ingredients from cache
        acc.load('sumw')
        acc.load('sumw_pileup')
        acc.load('nevents')

        # Data / MC plots are made here
        # Loop over all regions
        for region in mc_lo.keys():
            if not re.match(args.region, region):
                continue
            # Plot ratio pads for all regions (now that we're unblinded)
            ratio = True
            # Make separate output direcotry for each region
            outdir = f'./output/{os.path.basename(indir)}/{region}'
            # Settings for this region
            plotset = settings[region]

            # Loop over the distributions
            for distribution in plotset.keys():
                if not re.match(args.distribution, distribution):
                    continue
                # Load from cache
                if not distribution in merged:
                    acc.load(distribution)

                    if not distribution in acc.keys():
                        print(
                            f"WARNING: Distribution {distribution} not found in input files."
                        )
                        continue
                    acc[distribution] = merge_extensions(
                        acc[distribution],
                        acc,
                        reweight_pu=not ('nopu' in distribution))
                    scale_xs_lumi(acc[distribution])
                    acc[distribution] = merge_datasets(acc[distribution])
                    acc[distribution].axis('dataset').sorting = 'integral'
                    merged.add(distribution)
                try:
                    # The heavy lifting of making a plot is hidden
                    # in make_plot. We call it once using the LO MC
                    imc = mc_lo[region]
                    if "cr_g" in region and distribution != "recoil":
                        imc = re.compile(
                            imc.pattern.replace('QCD_data', 'QCD.*HT'))
                    make_plot(
                        acc,
                        region=region,
                        distribution=distribution,
                        year=year,
                        data=data[region],
                        mc=imc,
                        ylim=plotset[distribution].get('ylim', None),
                        xlim=plotset[distribution].get('xlim', None),
                        tag='losf',
                        outdir=f'./output/{os.path.basename(indir)}/{region}',
                        output_format='pdf',
                        ratio=ratio)
                except KeyError:
                    continue
Example #30
0
if __name__ == '__main__':

    from klepto.archives import dir_archive
    from Tools.samples import * # fileset_2018 #, fileset_2018_small
    from processor.default_accumulators import *

    overwrite = True
    small = False
    save = True

    # load the config and the cache
    cfg = loadConfig()
    
    cacheName = 'SS_analysis'
    if small: cacheName += '_small'
    cache = dir_archive(os.path.join(os.path.expandvars(cfg['caches']['base']), cacheName), serialized=True)
    
    year = 2018
    
    fileset = {
        'topW_v3': fileset_2018['topW_v3'],
        'topW_EFT_cp8': fileset_2018['topW_EFT_cp8'],
        'topW_EFT_mix': fileset_2018['topW_EFT_mix'],
        'TTW': fileset_2018['TTW'],
        'TTZ': fileset_2018['TTZ'],
        'TTH': fileset_2018['TTH'],
        'diboson': fileset_2018['diboson'],
        'triboson': fileset_2018['triboson'],
        #'wpwp': fileset_2018['wpwp'],
        'TTTT': fileset_2018['TTTT'],
        'ttbar': fileset_2018['ttbar'],
Example #31
0
    sprayer = BuckshotSolver
    seeker = PowellDirectionalSolver
    npts = 25 # number of solvers
    retry = 1 # max consectutive iteration retries without a cache 'miss'
    tol = 8   # rounding precision
    mem = 1   # cache rounding precision

    #CUTE: 'configure' monitor and archive if they are desired
    if stepmon:
        stepmon = LoggingMonitor(1) # montor for all runs
        itermon = LoggingMonitor(1, filename='inv.txt') #XXX: log.txt?
    else:
        stepmon = itermon = None
    if archive: #python2.5
        ar_name = '__%s_%sD_cache__' % (model.__self__.__class__.__name__,ndim)
        archive = dir_archive(ar_name, serialized=True, cached=False)
        ar_name = '__%s_%sD_invcache__' % (model.__self__.__class__.__name__,ndim)
        ivcache = dir_archive(ar_name, serialized=True, cached=False)
    else:
        archive = ivcache = None

    from mystic.search import Searcher #XXX: init w/ archive, then UseArchive?
    sampler = Searcher(npts, retry, tol, mem, _map, archive, sprayer, seeker)
    sampler.Verbose(disp)
    sampler.UseTrajectories(traj)

    ### doit ###
    maxpts = 1000. #10000.
    surface = Surface(model, sampler, maxpts=maxpts, dim=ndim)
    surface.UseMonitor(stepmon, itermon)
    surface.UseArchive(archive, ivcache)
Example #32
0
def main():
    # set to True if want to update the mistag root file
    # otherwise just make the plots
    if True:
        outfile = ROOT.TFile.Open(outfilename,'recreate')
    else:
        outfile = None

    # Prepare the acc
    acc = dir_archive(
        inpath,
        serialized=True,
        compression=0,
        memsize=1e3,
        )
    acc.load('sumw')
    acc.load('sumw_pileup')
    acc.load('nevents')
    distribution = 'ak8_pt0'
    distribution_Vmatched = 'ak8_Vmatched_pt0'
    distribution_mass = 'ak8_mass0'
    acc.load(distribution)
    acc.load(distribution_Vmatched)
    acc.load(distribution_mass)
    
    # merge datasets and scale with lumi xs
    htmp = acc[distribution]
    htmp_Vmatched = acc[distribution_Vmatched]
    htmp_mass = acc[distribution_mass]
    htmp = merge_extensions(htmp, acc, reweight_pu=True)
    scale_xs_lumi(htmp)
    htmp = merge_datasets(htmp)
    acc[distribution]=htmp
    htmp_Vmatched = merge_extensions(htmp_Vmatched, acc, reweight_pu=True)
    scale_xs_lumi(htmp_Vmatched)
    htmp_Vmatched = merge_datasets(htmp_Vmatched)
    acc[distribution_Vmatched]=htmp_Vmatched
    htmp_mass = merge_extensions(htmp_mass, acc, reweight_pu=True)
    scale_xs_lumi(htmp_mass)
    htmp_mass = merge_datasets(htmp_mass)
    acc[distribution_mass]=htmp_mass

    acc[distribution].axis('dataset').sorting = 'integral'
    acc[distribution_Vmatched].axis('dataset').sorting = 'integral'
    acc[distribution_mass].axis('dataset').sorting = 'integral'
    
    #binning stuff
    if newbin:
        htmp = htmp.rebin(htmp.axis('jetpt'),newbin)
        htmp_Vmatched = htmp_Vmatched.rebin(htmp_Vmatched.axis('jetpt'),newbin)
    edges = htmp.axis('jetpt').edges()
    centers = htmp.axis('jetpt').centers()
    halfwidth = [centers[i]-edges[i] for i in range(len(centers))]

    for lepton_flag in ['1m','2m','1e','2e','g']:
    #for lepton_flag in ['g']:
        for year in [2017,2018]:
            mc_map = {
                'cr_1m_v'      : re.compile(f'(Top.*FXFX|Diboson|QCD_HT|DYJetsToLL_M-50_HT_MLM|WJetsToLNu.*HT).*{year}'),
                'cr_1e_v'      : re.compile(f'(Top.*FXFX|Diboson|QCD_HT|DYJetsToLL_M-50_HT_MLM|WJetsToLNu.*HT|GJets_DR.*HT).*{year}'),
                'cr_2m_v'      : re.compile(f'(Top.*FXFX|Diboson|DYJetsToLL_M-50_HT_MLM).*{year}'),
                'cr_2e_v'      : re.compile(f'(Top.*FXFX|Diboson|DYJetsToLL_M-50_HT_MLM).*{year}'),
                'cr_g_v'       : re.compile(f'(Diboson|QCD_HT|GJets_DR.*HT|VQQGamma_FXFX|WJetsToLNu.*HT).*{year}'),
                'cr_nobveto_v' : re.compile(f'(Top.*FXFX|Diboson|QCD_HT|DYJetsToLL_M-50_HT_MLM|WJetsToLNu.*HT|GJets_DR.*HT|ZJetsToNuNu).*{year}'),
                'sr_v'         : re.compile(f'(Top.*FXFX|Diboson|QCD_HT|DYJetsToLL_M-50_HT_MLM|WJetsToLNu.*HT|GJets_DR.*HT|ZJetsToNuNu).*{year}'),
            }
            mc_map_noV = {
                'cr_1m_v'      : re.compile(f'(QCD_HT|DYJetsToLL_M-50_HT_MLM|WJetsToLNu.*HT).*{year}'),
                'cr_1e_v'      : re.compile(f'(QCD_HT|DYJetsToLL_M-50_HT_MLM|WJetsToLNu.*HT|GJets_DR.*HT).*{year}'),
                'cr_2m_v'      : re.compile(f'(QCD_HT|DYJetsToLL_M-50_HT_MLM).*{year}'),
                'cr_2e_v'      : re.compile(f'(QCD_HT|DYJetsToLL_M-50_HT_MLM).*{year}'),
                'cr_g_v'       : re.compile(f'(QCD_HT|GJets_DR.*HT|WJetsToLNu.*HT).*{year}'),
                'cr_nobveto_v' : re.compile(f'(QCD_HT|DYJetsToLL_M-50_HT_MLM|WJetsToLNu.*HT|GJets_DR.*HT|ZJetsToNuNu).*{year}'),
                'sr_v'         : re.compile(f'(QCD_HT|DYJetsToLL_M-50_HT_MLM|WJetsToLNu.*HT|GJets_DR.*HT|ZJetsToNuNu).*{year}'),
            }
            mc_map_realV = {
                'cr_1m_v'      : re.compile(f'(Top.*FXFX|Diboson).*{year}'),
                'cr_1e_v'      : re.compile(f'(Top.*FXFX|Diboson).*{year}'),
                'cr_2m_v'      : re.compile(f'(Top.*FXFX|Diboson).*{year}'),
                'cr_2e_v'      : re.compile(f'(Top.*FXFX|Diboson).*{year}'),
                'cr_g_v'       : re.compile(f'(Diboson|VQQGamma_FXFX).*{year}'),
                'cr_nobveto_v' : re.compile(f'(Top.*FXFX|Diboson).*{year}'),
                'sr_v'         : re.compile(f'(Top.*FXFX|Diboson).*{year}'),
            }
            # use NLO GJets for the measurement if need to
            if nlogjet:
                mc_map['cr_g_v']     = re.compile(f'(Diboson|QCD_HT|GJets_1j|VQQGamma_FXFX|WJetsToLNu.*HT).*{year}')
                mc_map_noV['cr_g_v'] = re.compile(f'(QCD_HT|GJets_1j|WJetsToLNu.*HT).*{year}')
            for wp in ['loose','tight','medium']:
                region_all = f'cr_{lepton_flag}_hasmass_inclusive_v'
                region_all_nomass = f'cr_{lepton_flag}_inclusive_v'
                region_pass= f'cr_{lepton_flag}_nomistag_{wp}_v'
                region_pass_nomass= f'cr_{lepton_flag}_nomistag_nomass_{wp}_v'
                mc_All = mc_map[f'cr_{lepton_flag}_v']
                mc_False = mc_map_noV[f'cr_{lepton_flag}_v']
                mc_Real = mc_map_realV[f'cr_{lepton_flag}_v']
                if lepton_flag in ['1e','2e','g']:
                    data = re.compile(f'EGamma_{year}')
                else:
                    data = re.compile(f'MET_{year}')
                    
                ### DEBUG ###
                # print(acc[distribution][mc_All].integrate("region",region_all).values())
                # print(acc[distribution][mc_All].integrate("region",region_pass).values())
                # print(acc[distribution_Vmatched][mc_All].integrate("region",region_pass).values())
                #############
                #make stack_plot for all and pass
                try:
                    acc["alskjxkjo"]
                    make_plot(acc, region=region_all, distribution=distribution, year=year, data=data, mc=mc_All, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3))
                    make_plot(acc, region=region_all_nomass, distribution=distribution, year=year, data=data, mc=mc_All, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3))
                    make_plot(acc, region=region_all_nomass, distribution=distribution_mass, year=year, data=data, mc=mc_All, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3))
                    make_plot(acc, region=region_all, distribution=distribution, year=year, data=None, mc=mc_Real, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3), ratio=False, tag="MCHasV")
                    make_plot(acc, region=region_all, distribution=distribution_Vmatched, year=year, data=None, mc=mc_Real, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3), ratio=False, tag="MCHasV")
                    make_plot(acc, region=region_all, distribution=distribution, year=year, data=None, mc=mc_False, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3), ratio=False, tag="MCNoV")
                    make_plot(acc, region=region_all, distribution=distribution, year=year, data=data, mc=None, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3), ratio=False, tag="data")
                    make_plot(acc, region=region_pass, distribution=distribution, year=year, data=data, mc=mc_All, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3))
                    make_plot(acc, region=region_pass_nomass, distribution=distribution, year=year, data=data, mc=mc_All, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3))
                    make_plot(acc, region=region_pass_nomass, distribution=distribution_mass, year=year, data=data, mc=mc_All, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3))
                    make_plot(acc, region=region_pass, distribution=distribution, year=year, data=None, mc=mc_Real, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3), ratio=False, tag="MCHasV")
                    make_plot(acc, region=region_pass, distribution=distribution_Vmatched, year=year, data=None, mc=mc_Real, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3), ratio=False, tag="MCHasV")
                    make_plot(acc, region=region_pass, distribution=distribution, year=year, data=None, mc=mc_False, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3), ratio=False, tag="MCNoV")
                    make_plot(acc, region=region_pass, distribution=distribution, year=year, data=data, mc=None, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3), ratio=False, tag="data")
                except ValueError:
                    print(f"Warning(ValueError): skipping plots for lepton_flag={lepton_flag} year={year} wp={wp} due to negative or zero bins")
                except AssertionError:
                    print(f"Warning(AssertionError): skipping plots for lepton_flag={lepton_flag} year={year} wp={wp} due to negative or zero bins")
                except KeyError:
                    print(f"Warning(KeyError): skipping plots for lepton_flag={lepton_flag} year={year} wp={wp} due to negative or zero bins")
                try:
                    make_plot(acc, region=region_pass_nomass, distribution=distribution, year=year, data=data, mc=mc_All, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3))
                    make_plot(acc, region=region_pass_nomass, distribution=distribution_mass, year=year, data=data, mc=mc_All, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3))
                except:
                    pass
    
    
        
                # extract mistag rate for data and mc
                selector_region_all,selector_region_pass = region_all, region_pass
                if not massden:
                    selector_region_all = region_all_nomass
                if not massnum:
                    selector_region_pass = region_pass_nomass

                for sysvar in all_sysvar:
                    if sysvar=="nominal":
                        sysvar_tag = ""
                    else:
                        sysvar_tag = "_"+sysvar

                    # background substraction from data: remove real Vs
                    h_data = htmp[data].integrate('dataset')
                    #h_mc_Real  = htmp[mc_Real].integrate('dataset')
                    h_mc_False = htmp[mc_False].integrate('dataset')

                    h_mc_Real  = htmp_Vmatched[mc_Real]
                    # vary within systematics
                    # norm for both 10%, bveto unc for top 6% for diboson 2%, vtag unc for both 10% (approx)
                    if sysvar=="sysUp": h_mc_Real.scale(1.15)
                    if sysvar=="sysDn": h_mc_Real.scale(0.85)
                    if sysvar=="topNormUp": h_mc_Real.scale  ( { "Top_FXFX_2017"      : 1.10, "Top_FXFX_2018"      : 1.10} , axis="dataset" ) 
                    if sysvar=="topNormDn": h_mc_Real.scale  ( { "Top_FXFX_2017"      : 0.90, "Top_FXFX_2018"      : 0.90} , axis="dataset" ) 
                    if sysvar=="vvNormUp":  h_mc_Real.scale  ( { "Diboson_2017"       : 1.10, "Diboson_2018"       : 1.10} , axis="dataset" ) 
                    if sysvar=="vvNormDn":  h_mc_Real.scale  ( { "Diboson_2017"       : 0.90, "Diboson_2018"       : 0.90} , axis="dataset" ) 
                    if sysvar=="vgNormUp":  h_mc_Real.scale  ( { "VQQGamma_FXFX_2017" : 1.10, "VQQGamma_FXFX_2018" : 1.10} , axis="dataset" ) 
                    if sysvar=="vgNormDn":  h_mc_Real.scale  ( { "VQQGamma_FXFX_2017" : 0.90, "VQQGamma_FXFX_2018" : 0.90} , axis="dataset" ) 
                    if sysvar=="topVTagUp": h_mc_Real.scale  ( { "Top_FXFX_2017"      : 1.10, "Top_FXFX_2018"      : 1.10} , axis="dataset" ) 
                    if sysvar=="topVTagDn": h_mc_Real.scale  ( { "Top_FXFX_2017"      : 0.90, "Top_FXFX_2018"      : 0.90} , axis="dataset" ) 
                    if sysvar=="vvVTagUp":  h_mc_Real.scale  ( { "Diboson_2017"       : 1.10, "Diboson_2018"       : 1.10} , axis="dataset" ) 
                    if sysvar=="vvVTagDn":  h_mc_Real.scale  ( { "Diboson_2017"       : 0.90, "Diboson_2018"       : 0.90} , axis="dataset" ) 
                    if sysvar=="vgVTagUp":  h_mc_Real.scale  ( { "VQQGamma_FXFX_2017" : 1.10, "VQQGamma_FXFX_2018" : 1.10} , axis="dataset" ) 
                    if sysvar=="vgVTagDn":  h_mc_Real.scale  ( { "VQQGamma_FXFX_2017" : 0.90, "VQQGamma_FXFX_2018" : 0.90} , axis="dataset" ) 
                    if sysvar=="topBVetoUp": h_mc_Real.scale ( { "Top_FXFX_2017"      : 1.06, "Top_FXFX_2018"      : 1.06} , axis="dataset" ) 
                    if sysvar=="topBVetoDn": h_mc_Real.scale ( { "Top_FXFX_2017"      : 0.94, "Top_FXFX_2018"      : 0.94} , axis="dataset" ) 
                    if sysvar=="vvBVetoUp":  h_mc_Real.scale ( { "Diboson_2017"       : 1.02, "Diboson_2018"       : 1.02} , axis="dataset" ) 
                    if sysvar=="vvBVetoDn":  h_mc_Real.scale ( { "Diboson_2017"       : 0.98, "Diboson_2018"       : 0.98} , axis="dataset" ) 
                    if sysvar=="vgBVetoUp":  h_mc_Real.scale ( { "VQQGamma_FXFX_2017" : 1.02, "VQQGamma_FXFX_2018" : 1.02} , axis="dataset" ) 
                    if sysvar=="vgBVetoDn":  h_mc_Real.scale ( { "VQQGamma_FXFX_2017" : 0.98, "VQQGamma_FXFX_2018" : 0.98} , axis="dataset" ) 
                    h_mc_Real  = h_mc_Real.integrate('dataset')
                    h_mc_Real.scale(-1*realVSF) # just for background substraction
                    h_data.add(h_mc_Real)
    
                    teff_mistag_rate_data = get_mistag_rate(h_data, selector_region_all, selector_region_pass, flag=f'data_{lepton_flag}_{wp}_{year}{sysvar_tag}', isData=True)
                    teff_mistag_rate_data.SetNameTitle(f'mistag_rate_data_{lepton_flag}_{wp}_{year}{sysvar_tag}','mistagging rate')
                    teff_mistag_rate_mc = get_mistag_rate(h_mc_False, selector_region_all, selector_region_pass, flag=f'mc_{lepton_flag}_{wp}_{year}{sysvar_tag}', isData=False)
                    teff_mistag_rate_mc.SetNameTitle(f'mistag_rate_mc_{lepton_flag}_{wp}_{year}{sysvar_tag}','mistagging rate')
    
                    # get the scale factors
                    # note that it's impossible to divide two TEfficiency in ROOT, have to do that manually
                    th1_mistag_SF = ratio_of_efficiencies(f'mistag_SF_{lepton_flag}_{wp}_{year}{sysvar_tag}', 'mistag scale factor', teff_mistag_rate_data, teff_mistag_rate_mc)
                    
                    # save the mistag rate and SF histograms into root file
                    if outfile:
                        teff_mistag_rate_data.Write()
                        teff_mistag_rate_mc.Write()
                        th1_mistag_SF.Write()
    
    # soup togather all CR using a weighted_average between the regions:
    for year in [2017,2018]:
        for wp in ['loose','tight','medium']:
            for sysvar in all_sysvar:
                if sysvar=="nominal":
                    sysvar_tag = ""
                else:
                    sysvar_tag = "_"+sysvar
                teff_mistag_rate_data_1e = outfile.Get(f'mistag_rate_data_1e_{wp}_{year}{sysvar_tag}')
                teff_mistag_rate_data_2e = outfile.Get(f'mistag_rate_data_2e_{wp}_{year}{sysvar_tag}')
                teff_mistag_rate_data_1m = outfile.Get(f'mistag_rate_data_1m_{wp}_{year}{sysvar_tag}')
                teff_mistag_rate_data_2m = outfile.Get(f'mistag_rate_data_2m_{wp}_{year}{sysvar_tag}')
                teff_mistag_rate_data_g = outfile.Get(f'mistag_rate_data_g_{wp}_{year}{sysvar_tag}')
                teff_mistag_rate_mc_1e = outfile.Get(f'mistag_rate_mc_1e_{wp}_{year}{sysvar_tag}')
                teff_mistag_rate_mc_2e = outfile.Get(f'mistag_rate_mc_2e_{wp}_{year}{sysvar_tag}')
                teff_mistag_rate_mc_1m = outfile.Get(f'mistag_rate_mc_1m_{wp}_{year}{sysvar_tag}')
                teff_mistag_rate_mc_2m = outfile.Get(f'mistag_rate_mc_2m_{wp}_{year}{sysvar_tag}')
                teff_mistag_rate_mc_g = outfile.Get(f'mistag_rate_mc_g_{wp}_{year}{sysvar_tag}')
                # souped SF for all W/Z regions
                teff_mistag_rate_data_wz = teff_mistag_rate_data_1e + teff_mistag_rate_data_2e\
                        + teff_mistag_rate_data_1m + teff_mistag_rate_data_2m
                teff_mistag_rate_data_wz.SetNameTitle(f'mistag_rate_data_wz_{wp}_{year}{sysvar_tag}', 'souped mistagging rate for W and Z')
                teff_mistag_rate_mc_wz = teff_mistag_rate_mc_1e + teff_mistag_rate_mc_2e\
                        + teff_mistag_rate_mc_1m + teff_mistag_rate_mc_2m
                teff_mistag_rate_mc_wz.SetNameTitle(f'mistag_rate_mc_wz_{wp}_{year}{sysvar_tag}', 'souped mistagging rate for W and Z')
                th1_mistag_SF_wz = ratio_of_efficiencies(f'mistag_SF_wz_{wp}_{year}{sysvar_tag}', 'souped mistag scale factor for W and Z', teff_mistag_rate_data_wz, teff_mistag_rate_mc_wz)
                # souped SF for all W regions
                teff_mistag_rate_data_w = teff_mistag_rate_data_1e + teff_mistag_rate_data_1m
                teff_mistag_rate_data_w.SetNameTitle(f'mistag_rate_data_w_{wp}_{year}{sysvar_tag}', 'souped mistagging rate for W')
                teff_mistag_rate_mc_w = teff_mistag_rate_mc_1e + teff_mistag_rate_mc_1m
                teff_mistag_rate_mc_w.SetNameTitle(f'mistag_rate_mc_w_{wp}_{year}{sysvar_tag}', 'souped mistagging rate for W')
                th1_mistag_SF_w = ratio_of_efficiencies(f'mistag_SF_w_{wp}_{year}{sysvar_tag}', 'souped mistag scale factor for W', teff_mistag_rate_data_w, teff_mistag_rate_mc_w)
                # souped SF for all Z regions
                teff_mistag_rate_data_z = teff_mistag_rate_data_2e + teff_mistag_rate_data_2m
                teff_mistag_rate_data_z.SetNameTitle(f'mistag_rate_data_z_{wp}_{year}{sysvar_tag}', 'souped mistagging rate for Z')
                teff_mistag_rate_mc_z = teff_mistag_rate_mc_2e + teff_mistag_rate_mc_2m
                teff_mistag_rate_mc_z.SetNameTitle(f'mistag_rate_mc_z_{wp}_{year}{sysvar_tag}', 'souped mistagging rate for Z')
                th1_mistag_SF_z = ratio_of_efficiencies(f'mistag_SF_z_{wp}_{year}{sysvar_tag}', 'souped mistag scale factor for Z', teff_mistag_rate_data_z, teff_mistag_rate_mc_z)
                # souped SF for all regions including photon
                teff_mistag_rate_data_all = teff_mistag_rate_data_1e + teff_mistag_rate_data_2e\
                        + teff_mistag_rate_data_1m + teff_mistag_rate_data_2m + teff_mistag_rate_data_g
                teff_mistag_rate_data_all.SetNameTitle(f'mistag_rate_data_all_{wp}_{year}{sysvar_tag}', 'souped mistagging rate for all')
                teff_mistag_rate_mc_all = teff_mistag_rate_mc_1e + teff_mistag_rate_mc_2e\
                        + teff_mistag_rate_mc_1m + teff_mistag_rate_mc_2m + teff_mistag_rate_mc_g
                teff_mistag_rate_mc_all.SetNameTitle(f'mistag_rate_mc_all_{wp}_{year}{sysvar_tag}', 'souped mistagging rate for all')
                th1_mistag_SF_all = ratio_of_efficiencies(f'mistag_SF_all_{wp}_{year}{sysvar_tag}', 'souped mistag scale factor for all', teff_mistag_rate_data_all, teff_mistag_rate_mc_all)
                if outfile:
                    teff_mistag_rate_data_wz.Write()
                    teff_mistag_rate_mc_wz.Write()
                    th1_mistag_SF_wz.Write()
                    teff_mistag_rate_data_w.Write()
                    teff_mistag_rate_mc_w.Write()
                    th1_mistag_SF_w.Write()
                    teff_mistag_rate_data_z.Write()
                    teff_mistag_rate_mc_z.Write()
                    th1_mistag_SF_z.Write()
                    teff_mistag_rate_data_all.Write()
                    teff_mistag_rate_mc_all.Write()
                    th1_mistag_SF_all.Write()
    
    if outfile:
        outfile.Close()
Example #33
0
    sprayer = BuckshotSolver
    seeker = PowellDirectionalSolver
    npts = 25 # number of solvers
    retry = 1 # max consectutive iteration retries without a cache 'miss'
    tol = 8   # rounding precision
    mem = 1   # cache rounding precision

    #CUTE: 'configure' monitor and archive if they are desired
    if stepmon:
        stepmon = LoggingMonitor(1) # montor for all runs
        itermon = LoggingMonitor(1, filename='inv.txt') #XXX: log.txt?
    else:
        stepmon = itermon = None
    if archive: #python2.5
        ar_name = '__%s_%sD_cache__' % (model.__self__.__class__.__name__,ndim)
        archive = dir_archive(ar_name, serialized=True, cached=False)
        ar_name = '__%s_%sD_invcache__' % (model.__self__.__class__.__name__,ndim)
        ivcache = dir_archive(ar_name, serialized=True, cached=False)
    else:
        archive = ivcache = None

    from mystic.search import Searcher #XXX: init w/ archive, then UseArchive?
    sampler = Searcher(npts, retry, tol, mem, _map, archive, sprayer, seeker)
    sampler.Verbose(disp)
    sampler.UseTrajectories(traj)

    ### doit ###
    maxpts = 1000. #10000.
    surface = Surface(model, sampler, maxpts=maxpts, dim=ndim)
    surface.UseMonitor(stepmon, itermon)
    surface.UseArchive(archive, ivcache)
Example #34
0
#!/usr/bin/env python
#
# Author: Mike McKerns (mmckerns @caltech and @uqfoundation)
# Copyright (c) 2013-2015 California Institute of Technology.
# License: 3-clause BSD.  The full license text is available at:
#  - http://trac.mystic.cacr.caltech.edu/project/pathos/browser/klepto/LICENSE

from klepto.archives import dir_archive
from pox import rmtree

# start fresh
rmtree('foo', ignore_errors=True)


d = dir_archive('foo', cached=False)
key = '1234TESTMETESTMETESTME1234'
d._mkdir(key)
#XXX: repeat mkdir does nothing, should it clear?  I think not.
_dir = d._mkdir(key)
assert d._getdir(key) == _dir
d._rmdir(key)

# with _pickle
x = [1,2,3,4,5]
d._fast = True
d[key] = x
assert d[key] == x
d._rmdir(key)

# with dill
d._fast = False