Python dir_archive Examples, klepto.archives.dir_archive Python Examples

Example #1

0

Show file

File: test_readwrite.py Project: momongaclub/crowdsourcing_site

def test_archive():
    # try some of the different __init__
    archive = dir_archive(cached=False)
    check_basic(archive)
    check_numpy(archive)
    #rmtree('memo')

    archive = dir_archive(cached=False, fast=True)
    check_basic(archive)
    check_numpy(archive)
    #rmtree('memo')

    archive = dir_archive(cached=False, compression=3)
    check_basic(archive)
    check_numpy(archive)
    #rmtree('memo')

    archive = dir_archive(cached=False, memmode='r+')
    check_basic(archive)
    check_numpy(archive)
    #rmtree('memo')

    archive = dir_archive(cached=False, serialized=False)
    check_basic(archive)
    #check_numpy(archive) #FIXME: see issue #53
    rmtree('memo')

Example #2

0

Show file

def draw_skeleton(path_GT,path_pred,path_visual,name,MPII):
# =============================================================================
#     Draw skeleton based on model predictions
# =============================================================================
    
    prediction=klepto.dir_archive(path_pred,cached=False)
    prediction.load()
    
    archive=klepto.dir_archive(path_GT,cached=False)
    archive.load()
    
    img=archive[name]['img'].astype('uint8')
    heatmap=prediction[name]
    
    # define connections between joints for each dataset
    if MPII:
        lines = [(0,1),(1,2),(2,6),(6,3),(3,4),(4,5),(6,7),(7,8),(8,9),(10,11),(11,12),(12,7),(7,13),(13,14),(14,15)]
    else:
        lines = [(0,1),(1,2),(3,4),(4,5),(6,7),(7,8),(8,9),(9,10),(10,11),(2,8),(3,9),(12,13)]
    coords = dict(enumerate(list(rescale_joint_coords(heatmap))))
    for points in lines:
        if coords[points[0]]==(0,0) or coords[points[1]]==(0,0): continue
        else:
            cv2.line(img, coords[points[0]], coords[points[1]], (rand(0,255),rand(0,255),rand(0,255)), thickness=2, lineType=8)
    plt.imshow(img)
    plt.imsave(path_visual+'Skeleton.png',img)

Example #3

0

Show file

def test_archive():
    # try some of the different __init__
    archive = dir_archive(cached=False)
    check_basic(archive)
    check_numpy(archive)
    #rmtree('memo')

    archive = dir_archive(cached=False,fast=True)
    check_basic(archive)
    check_numpy(archive)
    #rmtree('memo')

    archive = dir_archive(cached=False,compression=3)
    check_basic(archive)
    check_numpy(archive)
    #rmtree('memo')

    archive = dir_archive(cached=False,memmode='r+')
    check_basic(archive)
    check_numpy(archive)
    #rmtree('memo')

    archive = dir_archive(cached=False,serialized=False)
    check_basic(archive)
    check_numpy(archive)
    rmtree('memo')

Example #4

0

Show file

def prediction(path_GT,path_pred,mymodel):
# =============================================================================
#     Compute Prediction of image
# =============================================================================
    
    prediction=klepto.dir_archive(path_pred,{},cached=False)

    archive= klepto.dir_archive(path_GT,cached=False)
    archive.load()

    for name in archive.keys():
        img=archive[name]['img'].reshape(1,w_pic,h_pic,3)
        predict_heat=mymodel.predict(img/255)
        prediction[name]=predict_heat

Example #5

0

Show file

def PCK(path_GT,path_pred,njoints):
# =============================================================================
#     Compute the PCK metric
# =============================================================================
    prediction_set = klepto.dir_archive(path_pred,cached=False)
    prediction_set.load()

    gt_maps = klepto.dir_archive(path_GT,cached=False)
    gt_maps.load()

    accuracy=[0]*njoints
    for name in prediction_set.keys():
        accuracy=accuracy_pred(prediction_set[name], gt_maps[name]['joints'],accuracy)
    return np.array(accuracy)/len(prediction_set)

Example #6

0

Show file

def test_foo():
    # start fresh
    rmtree('foo', ignore_errors=True)

    d = dir_archive('foo', cached=False)
    key = '1234TESTMETESTMETESTME1234'
    d._mkdir(key)
    #XXX: repeat mkdir does nothing, should it clear?  I think not.
    _dir = d._mkdir(key)
    assert d._getdir(key) == _dir
    d._rmdir(key)

    # with _pickle
    x = [1,2,3,4,5]
    d._fast = True
    d[key] = x
    assert d[key] == x
    d._rmdir(key)

    # with dill
    d._fast = False
    d[key] = x
    assert d[key] == x
    d._rmdir(key)

    # with import
    d._serialized = False
    d[key] = x
    assert d[key] == x
    d._rmdir(key)
    d._serialized = True

    try: 
        import numpy as np
        y = np.array(x)

        # with _pickle
        d._fast = True
        d[key] = y
        assert all(d[key] == y)
        d._rmdir(key)

        # with dill
        d._fast = False
        d[key] = y
        assert all(d[key] == y)
        d._rmdir(key)

        # with import
        d._serialized = False
        d[key] = y
        assert all(d[key] == y)
        d._rmdir(key)
        d._serialized = True

    except ImportError:
        pass

    # clean up
    rmtree('foo')

Example #7

0

Show file

File: trainer.py Project: kuangrongxin/Computer-Vision

def train_data_generator(path,
                         batch_size,
                         inres=(h_pic, w_pic),
                         outres=(h_heat, w_heat)):
    # =============================================================================
    #     Create data generator
    # =============================================================================
    archive_train = klepto.dir_archive(path, cached=False)
    archive_train.load()
    all_images = np.array(list(archive_train.keys()))
    size = len(all_images)

    while True:

        # take random images
        names = np.random.permutation(list(archive_train.keys()))
        num_of_batches = size // batch_size

        for im in range(num_of_batches):
            gt_stack = np.zeros(shape=(batch_size, outres[0], outres[1],
                                       nOutput))
            img_stack = np.zeros(shape=(batch_size, inres[0], inres[1], 3))

            selected_photo_names = names[im * batch_size:(im + 1) * batch_size]

            for j in range(len(selected_photo_names)):

                gt_stack[j, :, :, :] = np.transpose(
                    np.array(archive_train[selected_photo_names[j]]['joints']),
                    (1, 2, 0))
                img_stack[j, :, :, :] = archive_train[
                    selected_photo_names[j]]['img'] / 255.

            yield (img_stack, gt_stack)

Example #8

0

Show file

File: limit.py Project: AndreasAlbert/bucoffea

def main():
    args = parse_commandline()

    klepto = True
    if klepto:
        acc = dir_archive(args.inpath,
                          serialized=True,
                          compression=0,
                          memsize=1e3)
        acc.load('recoil')
        acc.load('mjj')
        acc.load('sumw')
        acc.load('sumw_pileup')
        acc.load('nevents')
    else:
        acc = acc_from_dir(args.inpath)

    outdir = pjoin('./output/', os.path.basename(args.inpath))

    if args.channel == 'monojet':
        from legacy_monojet import legacy_limit_input_monojet
        legacy_limit_input_monojet(acc, outdir=outdir)
    elif args.channel == 'monov':
        from legacy_monov import legacy_limit_input_monov
        legacy_limit_input_monov(acc, outdir=outdir)
    elif args.channel == 'vbfhinv':
        from legacy_vbf import legacy_limit_input_vbf
        legacy_limit_input_vbf(acc, outdir=outdir)

Example #9

0

Show file

File: limit.py Project: siqiyyyy/bucoffea

def main():
    args = parse_commandline()

    klepto = True
    if klepto:
        acc = dir_archive(args.inpath, serialized=True, compression=0, memsize=1e3)
        acc.load('recoil')
        acc.load('mjj')
        acc.load('sumw')
        acc.load('sumw_pileup')
        acc.load('nevents')
    else:
        acc = acc_from_dir(args.inpath)

    args.outdir = pjoin('./output/',list(filter(lambda x:x,args.inpath.split('/')))[-1])
    for channel in args.channel.split(','):
        print(channel)
        if channel == 'monojet':
            from legacy_monojet import legacy_limit_input_monojet
            legacy_limit_input_monojet(acc, args)
        elif channel == 'monov':
            from legacy_monov import legacy_limit_input_monov
            legacy_limit_input_monov(acc, args)
        elif channel == 'vbfhinv':
            from legacy_vbf import legacy_limit_input_vbf
            legacy_limit_input_vbf(acc, outdir=args.outdir, unblind=args.unblind)

Example #10

0

Show file

File: make_new_sf.py Project: alpakpinar/bucoffea

def main():
    inpath = sys.argv[1]
    #acc = acc_from_dir("./input/2019-10-07_das_lhevpt_dressed_v1")
    
    acc = dir_archive(
                      inpath,
                      serialized=True,
                      compression=0,
                      memsize=1e3
                      )
    acc.load('sumw')
    acc.load('sumw2')


    outputrootfile = uproot.recreate(f'2017_gen_v_pt_qcd_sf.root')
    sf_1d(acc, tag='wjet', regex='W.*',outputrootfile=outputrootfile)
    sf_1d(acc, tag='dy', regex='.*DY.*',outputrootfile=outputrootfile)
    # # outputrootfile = uproot.recreate(f'test.root')
    sf_2d(acc, tag='wjet', regex='W.*',pt_type='dress',outputrootfile=outputrootfile)
    sf_2d(acc, tag='dy', regex='.*DY.*',pt_type='dress',outputrootfile=outputrootfile)

    sf_1d(acc, tag='gjets', regex='G\d?Jet.*',outputrootfile=outputrootfile)
    # outputrootfile = uproot.recreate('test.root')

    sf_2d(acc, tag='gjets',regex='G\d?Jet.*',pt_type='stat1',outputrootfile=outputrootfile)

Example #11

0

Show file

File: test_readwrite.py Project: momongaclub/crowdsourcing_site

def test_foo():
    # start fresh
    rmtree('foo', ignore_errors=True)

    d = dir_archive('foo', cached=False)
    key = '1234TESTMETESTMETESTME1234'
    d._mkdir(key)
    #XXX: repeat mkdir does nothing, should it clear?  I think not.
    _dir = d._mkdir(key)
    assert d._getdir(key) == _dir
    d._rmdir(key)

    # with _pickle
    x = [1, 2, 3, 4, 5]
    d._fast = True
    d[key] = x
    assert d[key] == x
    d._rmdir(key)

    # with dill
    d._fast = False
    d[key] = x
    assert d[key] == x
    d._rmdir(key)

    # with import
    d._serialized = False
    d[key] = x
    assert d[key] == x
    d._rmdir(key)
    d._serialized = True

    try:
        import numpy as np
        y = np.array(x)

        # with _pickle
        d._fast = True
        d[key] = y
        assert all(d[key] == y)
        d._rmdir(key)

        # with dill
        d._fast = False
        d[key] = y
        assert all(d[key] == y)
        d._rmdir(key)

        # with import
        d._serialized = False
        d[key] = y
        assert all(d[key] == y)
        d._rmdir(key)
        d._serialized = True

    except ImportError:
        pass

    # clean up
    rmtree('foo')

Example #12

0

Show file

File: utils2_ted.py Project: tedtang77/fastai-deeplearning2

def load_big_data(fpath, fname):
    """
    https://stackoverflow.com/questions/17513036/pickle-dump-huge-file-without-memory-error
    """
    arch = dir_archive(fpath + fname, cached=False, serialized=True)
    arch.load(fname)
    return arch[fname]

Example #13

0

Show file

File: utils2_ted.py Project: tedtang77/fastai-deeplearning2

def save_big_data(fpath, fname, data):
    """
    https://stackoverflow.com/questions/17513036/pickle-dump-huge-file-without-memory-error
    """
    arch = dir_archive(fpath + fname, cached=False, serialized=True)
    arch[fname] = data
    # # dump from memory cache to the on-disk archive
    arch.dump()

Example #14

0

Show file

File: state_file.py Project: lanl/spotlight

    def __init__(self, path):

        # store information
        self.path = path
        self.arch = archives.dir_archive(self.path)

        # load new data in archive file
        self.arch.load()

Example #15

0

Show file

def klepto_load(inpath):
    acc = dir_archive(
        inpath,
        serialized=True,
        compression=0,
        memsize=1e3,
    )
    return acc

Example #16

0

Show file

def main():
    inpath_vbf = rebsmear_path(
        'submission/vbfhinv/merged_2021-06-11_vbfhinv_ULv8_05Feb21_rebsmear_CR'
    )
    inpath_rs = rebsmear_path(
        'submission/merged_2021-06-11_rebsmear_privatePS')

    acc_vbf = dir_archive(inpath_vbf)
    acc_vbf.load('sumw')
    acc_vbf.load('sumw_pileup')
    acc_vbf.load('nevents')

    h_qcd = extract_yields_in_cr(acc_vbf, distribution='mjj')

    # Rebalance and smear output
    acc_rs = dir_archive(inpath_rs)

    plot_rebsmear_prediction(acc_rs, h_qcd)

Example #17

0

Show file

def klepto_load(loc):
    '''
    for loading the dumped dictionarys
    :return: loaded dictionary
    '''

    dic = dir_archive(loc, {}, serialized=True)
    dic.load()
    print('dictionary loaded')
    return dic

Example #18

0

Show file

File: dataset.py Project: nadiiaaii/mystic

def read_archive(filename, axis=None): #NOTE: could return iterators
    """read 'parameters' and 'cost' from klepto.dir_archive

    Inputs:
      filename: str path to location of klepto.archives.dir_archive
      axis: int, the desired index the tuple-valued dataset [0,N]
    """
    from klepto.archives import dir_archive
    arch = dir_archive(filename, cached=True)
    return for_monitor(arch, axis=axis)

Example #19

0

Show file

def klepto_dump(merged_dict, loc):
    '''
    to dump the merged dictionary file
    :param merged_dict: the final merged dictionary obtained
    :return: None
    '''

    demo = dir_archive(loc, merged_dict, serialized =True)
    demo.dump()
    del demo

Example #20

0

Show file

def met_trigger_eff(distribution):
    if distribution == 'mjj':
        tag = '120pfht_mu_mjj'
    elif distribution == 'recoil':
        tag = '120pfht_mu_recoil'
        indir = '/afs/cern.ch/user/a/aakpinar/bucoffea/bucoffea/submission/2019-11-13_vbf_trigger_recoil'

    acc = dir_archive(indir, serialized=True, compression=0, memsize=1e3)

    # Pre-load neccessary information
    acc.load('recoil')
    acc.load('sumw')
    acc.load('sumw2')

    for year in [2017, 2018]:
        for jeteta_config in [
                'two_central_jets', 'two_forward_jets',
                'one_jet_forward_one_jet_central'
        ]:
            # Single muon CR
            region_tag = '1m'
            for dataset in ['WJetsToLNu_HT_MLM', 'SingleMuon']:
                plot_recoil(acc,
                            region_tag=region_tag,
                            distribution=distribution,
                            axis_name=distribution,
                            dataset=dataset,
                            year=year,
                            tag=tag,
                            jeteta_config=jeteta_config,
                            output_format='pdf')
            # Double muon CR
            region_tag = '2m'
            for dataset in ['VDYJetsToLL_M-50_HT_MLM', 'SingleMuon']:
                plot_recoil(acc,
                            region_tag=region_tag,
                            distribution=distribution,
                            axis_name=distribution,
                            dataset=dataset,
                            year=year,
                            tag=tag,
                            jeteta_config=jeteta_config,
                            output_format='pdf')

    for jeteta_config in [
            'two_central_jets', 'two_forward_jets',
            'one_jet_forward_one_jet_central'
    ]:
        data_mc_comparison_plot(tag,
                                distribution=distribution,
                                jeteta_config=jeteta_config,
                                output_format='pdf')

    plot_scalefactors(tag, distribution=distribution)

Example #21

0

Show file

File: plot_ht_dist.py Project: siqiyyyy/bucoffea

def main():
    inpath = sys.argv[1]

    acc = dir_archive(inpath, serialized=True, memsize=1e3, compression=0)

    acc.load('sumw')
    acc.load('sumw2')

    plot_ht_dist(acc, regex='WJetsToLNu.*(2017|2018)', tag='wjets')
    plot_ht_dist(acc, regex='DYJets.*(2017|2018)', tag='dy')
    plot_ht_dist(acc, regex='GJets_HT.*(2017)', tag='gjets_17')
    plot_ht_dist(acc, regex='GJets_DR-0p4.*(2017)', tag='gjets_dr_17')

Example #22

0

Show file

File: get_pdf_vars.py Project: siqiyyyy/bucoffea

def main():
    inpath = sys.argv[1]

    acc = dir_archive(inpath, serialized=True, compression=0, memsize=1e3)

    acc.load('sumw')
    acc.load('sumw2')

    # Create the output ROOT file to save the
    # PDF uncertainties as a function of v-pt
    outputrootpath = './output/theory_variations/rootfiles'
    if not os.path.exists(outputrootpath):
        os.makedirs(outputrootpath)

    outputrootfile_z_over_w = uproot.recreate(
        pjoin(outputrootpath, 'zoverw_pdf_unc.root'))
    outputrootfile_g_over_z = uproot.recreate(
        pjoin(outputrootpath, 'goverz_pdf_unc.root'))

    w_nom, w_unc, vpt_edges, vpt_centers = get_pdf_uncertainty(
        acc, regex='WNJetsToLNu.*', tag='wjet')
    dy_nom, dy_unc, vpt_edges, vpt_centers = get_pdf_uncertainty(
        acc, regex='DYNJetsToLL.*', tag='dy')
    gjets_nom, gjets_unc, vpt_edges, vpt_centers = get_pdf_uncertainty(
        acc, regex='G1Jet.*', tag='gjets')

    data_for_ratio = {
        'z_over_w': {
            'noms': (dy_nom, w_nom),
            'uncs': (dy_unc, w_unc),
            'rootfile': outputrootfile_z_over_w
        },
        'g_over_z': {
            'noms': (gjets_nom, dy_nom),
            'uncs': (gjets_unc, dy_unc),
            'rootfile': outputrootfile_g_over_z
        },
    }

    for tag, entry in data_for_ratio.items():
        noms = entry['noms']
        uncs = entry['uncs']
        plot_ratio(noms=noms,
                   uncs=uncs,
                   tag=tag,
                   vpt_edges=vpt_edges,
                   vpt_centers=vpt_centers,
                   outputrootfile=entry['rootfile'])

Example #23

0

Show file

def _load_and_sum(args):
    """
    merge item from list of coffea files and dump it to file

    For each file, the saved item corresponding to the
    same key is read out. The sum of the individual
    items for the individual files is dumped.

    :param args: Tuple (key to use, file list, output name)
    :type args: tuple
    :return: 0
    :rtype: int
    """

    # Args is a tuple for easy multiprocessing
    key, files, outname = args

    # Load the individual items
    items = []
    for fn in files:
        try:
            items.append(load(fn)[key])
        except KeyError:
            continue
    
    # Recursive merging
    while len(items) > 1:
        x = items.pop(0)
        y = items.pop(0)
        s = x + y
        items.append(s)
    
    assert(len(items)==1)
    
    # dump the content using klepto
    arc = dir_archive(
                    outname,
                    serialized=True,
                    compression=0,
                    memsize=1e3,
                    )
    arc[key] = items[0]
    arc.dump(key)
    arc.clear()
    return 0

Example #24

0

Show file

File: S_over_B.py Project: vukasinmilosevic/bucoffea

def main():
    inpath = "../../input/merged"
    year = 2017
    mc = re.compile(
        f'(VDY.*HT.*|QCD.*|W.*HT.*|ST_|TTJets-FXFX_|Diboson_|GJets.*HT.*|ZJetsToNuNu.*){year}'
    )
    signal = re.compile(f'WH.*{year}')
    distribution = "recoil"
    acc = dir_archive(
        inpath,
        serialized=True,
        compression=0,
        memsize=1e3,
    )
    acc.load(distribution)
    acc.load('sumw')
    acc.load('sumw_pileup')
    acc.load('nevents')
    try:
        acc[distribution] = merge_extensions(
            acc[distribution], acc, reweight_pu=not ('nopu' in distribution))
        scale_xs_lumi(acc[distribution])
        acc[distribution] = merge_datasets(acc[distribution])
        S_over_B(acc,
                 distribution,
                 'sr_tight_v',
                 mc=mc,
                 signal=signal,
                 unc=0.05,
                 outname="SB_unc005.png",
                 cutlim=(250, 750))
        S_over_B(acc,
                 distribution,
                 'sr_tight_v',
                 mc=mc,
                 signal=signal,
                 unc=0.10,
                 outname="SB_unc010.png",
                 cutlim=(250, 750))
    except KeyError:
        print("key error ")
        return -2

Example #25

0

Show file

File: get_cutflow.py Project: alpakpinar/rebsmearv2

def main():
    inpath = sys.argv[1]
    acc = dir_archive(inpath)
    cfname = 'cutflow_sr_vbf'
    acc.load(cfname)

    cf = acc[cfname]

    outtag = re.findall('merged_.*', inpath)[0].replace('/', '')

    datasets = list(cf.keys())
    cuts = cf[datasets[0]].keys()

    combined_cf = Counter({cut: 0 for cut in cuts})

    for d in datasets:
        cutflow = Counter(cf[d])
        combined_cf += cutflow

    pcutflow = []
    for idx, (c, v) in enumerate(combined_cf.items()):
        if idx == 0:
            acc = 100
        else:
            acc = v / list(combined_cf.values())[idx - 1] * 100
        pcutflow.append([c, v, acc])

    outdir = f'./output/{outtag}'
    if not os.path.exists(outdir):
        os.makedirs(outdir)
    outpath = pjoin(outdir, 'cutflow.txt')
    with open(outpath, 'w+') as f:
        f.write(outtag)
        f.write('\n')

        f.write(
            tabulate(pcutflow,
                     headers=['Cut', 'Number of Events', 'Acceptance (%)'],
                     floatfmt=[".0f", ".0f", ".3f"]))

    print(f'File saved: {outpath}')

Example #26

0

Show file

File: plot_events.py Project: alpakpinar/rebsmearv2

def main():
    args = parse_cli()
    # Path to the directory containing list of ROOT input files (R&S trees)
    inpath = args.inpath
    acc = dir_archive(inpath)

    acc.load('sumw')
    acc.load('sumw2')

    try:
        outtag = re.findall('merged_.*', inpath)[0]
    except KeyError:
        raise RuntimeError(f'Check the naming of input: {os.path.basename(inpath)}')

    outdir = f'./output/{outtag}'
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    distributions = BINNINGS.keys()

    regions = [
        'inclusive',
        'sr_vbf',
        'cr_vbf_qcd'
    ]
    
    for region in regions:
        if not re.match(args.region, region):
            continue
        for distribution in distributions:
            if not re.match(args.distribution, distribution):
                continue
            
            make_plot(acc, 
                outdir=outdir, 
                distribution=distribution,
                region=region,
                dataset='JetHT',
                years=args.years
            )

Example #27

0

Show file

File: simpleProcessor.py Project: indarasuarez/tW_scattering

def main():

    overwrite = True

    # load the config and the cache
    cfg = loadConfig()

    # Inputs are defined in a dictionary
    # dataset : list of files
    fileset = {
        'tW_scattering': glob.glob("/hadoop/cms/store/user/dspitzba/nanoAOD/ttw_samples/0p1p2/tW_scattering__nanoAOD/merged/*.root"),
        "TTW":           glob.glob("/hadoop/cms/store/user/dspitzba/nanoAOD/ttw_samples/0p1p2/TTWJetsToLNu_TuneCP5_13TeV-amcatnloFXFX-madspin-pythia8__RunIIAutumn18NanoAODv6-Nano25Oct2019_102X_upgrade2018_realistic_v20_ext1-v1/merged/*.root") \
                        + glob.glob("/hadoop/cms/store/user/dspitzba/nanoAOD/ttw_samples/0p1p2/TTWJetsToQQ_TuneCP5_13TeV-amcatnloFXFX-madspin-pythia8__RunIIAutumn18NanoAODv6-Nano25Oct2019_102X_upgrade2018_realistic_v20-v1/merged/*.root"),
        #        "ttbar":        glob.glob("/hadoop/cms/store/user/dspitzba/nanoAOD/ttw_samples/0p1p3/TTJets_SingleLeptFromT_TuneCP5_13TeV-madgraphMLM-pythia8__RunIIAutumn18NanoAODv6-Nano25Oct2019_102X_upgrade2018_realistic_v20-v1/*.root") # adding this is still surprisingly fast (20GB file!)
        "ttbar": glob.glob("/hadoop/cms/store/user/dspitzba/nanoAOD/ttw_samples/0p1p3/TTJets_SingleLeptFromTbar_TuneCP5_13TeV-madgraphMLM-pythia8__RunIIAutumn18NanoAODv6-Nano25Oct2019_102X_upgrade2018_realistic_v20-v1/merged/*.root")
    }

    # histograms
    histograms = [
        "MET_pt", "Jet_pt", "Jet_eta", "Jet_pt_fwd", "W_pt_notFromTop",
        "GenJet_pt_fwd", "Spectator_pt", "Spectator_eta"
    ]
    histograms += [
        "Top_pt", "Top_eta", "Antitop_pt", "Antitop_eta", "W_pt", "W_eta",
        "N_b", "N_jet", "dijet_mass", "dijet_mass_bestW", "dijet_mass_secondW",
        "digenjet_mass", "dijet_deltaR"
    ]

    # initialize cache
    cache = dir_archive(os.path.join(os.path.expandvars(cfg['caches']['base']),
                                     cfg['caches']['simpleProcessor']),
                        serialized=True)
    if not overwrite:
        cache.load()

    if cfg == cache.get('cfg') and histograms == cache.get(
            'histograms') and fileset == cache.get('fileset') and cache.get(
                'simple_output'):
        output = cache.get('simple_output')

    else:
        # Run the processor
        output = processor.run_uproot_job(
            fileset,
            treename='Events',
            processor_instance=exampleProcessor(),
            executor=processor.futures_executor,
            executor_args={
                'workers': 1,
                'function_args': {
                    'flatten': False
                }
            },
            chunksize=500000,
        )
        cache['fileset'] = fileset
        cache['cfg'] = cfg
        cache['histograms'] = histograms
        cache['simple_output'] = output
        cache.dump()

    # Make a few plots
    outdir = "./tmp_plots"
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    for name in histograms:
        print(name)
        histogram = output[name]
        if name == 'MET_pt':
            # rebin
            new_met_bins = hist.Bin('pt', r'$E_T^{miss} \ (GeV)$', 20, 0, 200)
            histogram = histogram.rebin('pt', new_met_bins)
        if name == 'W_pt_notFromTop':
            # rebin
            new_pt_bins = hist.Bin('pt', r'$p_{T}(W) \ (GeV)$', 25, 0, 500)
            histogram = histogram.rebin('pt', new_pt_bins)

        ax = hist.plot1d(
            histogram, overlay="dataset", density=False, stack=True
        )  # make density plots because we don't care about x-sec differences
        ax.set_yscale('linear')  # can be log
        #ax.set_ylim(0,0.1)
        ax.figure.savefig(os.path.join(outdir, "{}.pdf".format(name)))
        ax.clear()

        ax = hist.plot1d(
            histogram, overlay="dataset", density=True, stack=False
        )  # make density plots because we don't care about x-sec differences
        ax.set_yscale('linear')  # can be log
        #ax.set_ylim(0,0.1)
        ax.figure.savefig(os.path.join(outdir, "{}_shape.pdf".format(name)))
        ax.clear()

    return output

Example #28

0

Show file

def plot(inpath):
    indir = os.path.abspath(inpath)

    # The processor output is stored in an
    # 'accumulator', which in our case is
    # just a dictionary holding all the histograms
    # Put all your *coffea files into 'indir' and
    # pass the directory as an argument here.
    # All input files in the directory will
    # automatically be found, merged and read.
    # The merging only happens the first time
    # you run over a specific set of inputs.
    acc = dir_archive(inpath, serialized=True, compression=0, memsize=1e3)
    # Get a settings dictionary that details
    # which plots to make for each region,
    # what the axis limits are, etc
    # Can add plots by extending the dictionary
    # Or modify axes ranges, etc
    settings = plot_settings()

    merged = set()

    # Separate plots per year
    for year in [2017, 2018]:
        # The data to be used for each region
        # Muon regions use MET,
        # electron+photon regions use EGamma
        # ( EGamma = SingleElectron+SinglePhoton for 2017)
        data = {
            'sr_vbf': None,
            'cr_1m_vbf': f'MET_{year}',
            'cr_2m_vbf': f'MET_{year}',
            'cr_1e_vbf': f'EGamma_{year}',
            'cr_2e_vbf': f'EGamma_{year}',
            'cr_g_vbf': f'EGamma_{year}',
        }

        # Same for MC selection
        # Match datasets by regular expressions
        # Here for LO V samples (HT binned)
        mc_lo = {
            'sr_vbf':
            re.compile(
                f'(ZJetsToNuNu.*|EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*|.*WJetsToLNu.*HT.*).*{year}'
            ),
            'cr_1m_vbf':
            re.compile(
                f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*|.*WJetsToLNu.*HT.*).*{year}'
            ),
            'cr_1e_vbf':
            re.compile(
                f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*|.*WJetsToLNu.*HT.*).*{year}'
            ),
            'cr_2m_vbf':
            re.compile(
                f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*).*{year}'
            ),
            'cr_2e_vbf':
            re.compile(
                f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*).*{year}'
            ),
            'cr_g_vbf':
            re.compile(f'(GJets_(HT|SM).*|QCD_HT.*|WJetsToLNu.*HT.*).*{year}'),
        }

        # Want to compare LO and NLO,
        # so do same thing for NLO V samples
        # All non-V samples remain the same
        mc_nlo = {
            'sr_vbf':
            re.compile(
                f'(ZJetsToNuNu.*|EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*|.*WJetsToLNu.*FXFX.*).*{year}'
            ),
            'cr_1m_vbf':
            re.compile(
                f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DY.*FXFX.*|.*WJetsToLNu.*FXFX.*).*{year}'
            ),
            'cr_1e_vbf':
            re.compile(
                f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DY.*FXFX.*|.*WJetsToLNu.*FXFX.*).*{year}'
            ),
            'cr_2m_vbf':
            re.compile(
                f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DY.*FXFX.*).*{year}'
            ),
            'cr_2e_vbf':
            re.compile(
                f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DY.*FXFX.*).*{year}'
            ),
            'cr_g_vbf':
            re.compile(f'(GJets_(HT|SM).*|QCD_HT.*|W.*FXFX.*).*{year}'),
        }

        regions = list(mc_lo.keys())
        # Remove signal region, no need in ratio plots
        regions.remove('sr_vbf')

        # Make control region ratio plots for both
        # LO and NLO. Can be skipped if you only
        # want data / MC agreement plots.
        outdir = f'./output/{os.path.basename(indir)}/ratios'

        # Load ingredients from cache
        acc.load('mjj')
        acc.load('sumw')
        acc.load('sumw_pileup')
        acc.load('nevents')
        cr_ratio_plot(acc,
                      year=year,
                      tag='losf',
                      outdir=outdir,
                      mc=mc_lo,
                      regions=regions,
                      distribution='mjj')
        cr_ratio_plot(acc,
                      year=year,
                      tag='nlo',
                      outdir=outdir,
                      mc=mc_nlo,
                      regions=regions,
                      distribution='mjj')

        # Data / MC plots are made here
        # Loop over all regions
        for region in mc_lo.keys():
            ratio = True if region != 'sr_vbf' else False
            # Make separate output direcotry for each region
            outdir = f'./output/{os.path.basename(indir)}/{region}'
            # Settings for this region
            plotset = settings[region]

            # Loop over the distributions
            for distribution in plotset.keys():
                # Load from cache
                if not distribution in merged:
                    acc.load(distribution)

                    if not distribution in acc.keys():
                        print(
                            f"WARNING: Distribution {distribution} not found in input files."
                        )
                        continue
                    acc[distribution] = merge_extensions(
                        acc[distribution],
                        acc,
                        reweight_pu=not ('nopu' in distribution))
                    scale_xs_lumi(acc[distribution])
                    acc[distribution] = merge_datasets(acc[distribution])
                    acc[distribution].axis('dataset').sorting = 'integral'
                    merged.add(distribution)
                try:
                    # The heavy lifting of making a plot is hidden
                    # in make_plot. We call it once using the LO MC
                    make_plot(
                        acc,
                        region=region,
                        distribution=distribution,
                        year=year,
                        data=data[region],
                        mc=mc_lo[region],
                        ylim=plotset[distribution].get('ylim', None),
                        xlim=plotset[distribution].get('xlim', None),
                        tag='losf',
                        outdir=f'./output/{os.path.basename(indir)}/{region}',
                        output_format='pdf',
                        ratio=ratio)

                    # And then we also call it for the NLO MC
                    # The output files will be named according to the 'tag'
                    # argument, so we  will be able to tell them apart.
                    make_plot(
                        acc,
                        region=region,
                        distribution=distribution,
                        year=year,
                        data=data[region],
                        mc=mc_nlo[region],
                        ylim=plotset[distribution].get('ylim', None),
                        xlim=plotset[distribution].get('xlim', None),
                        tag='nlo',
                        outdir=f'./output/{os.path.basename(indir)}/{region}',
                        output_format='pdf',
                        ratio=ratio)

                except KeyError:
                    continue

Example #29

0

Show file

File: lo_vs_nlo_vbf.py Project: siqiyyyy/bucoffea

def plot(args):
    indir = os.path.abspath(args.inpath)

    # The processor output is stored in an
    # 'accumulator', which in our case is
    # just a dictionary holding all the histograms
    # Put all your *coffea files into 'indir' and
    # pass the directory as an argument here.
    # All input files in the directory will
    # automatically be found, merged and read.
    # The merging only happens the first time
    # you run over a specific set of inputs.
    acc = dir_archive(args.inpath, serialized=True, compression=0, memsize=1e3)
    # Get a settings dictionary that details
    # which plots to make for each region,
    # what the axis limits are, etc
    # Can add plots by extending the dictionary
    # Or modify axes ranges, etc
    settings = plot_settings()

    merged = set()

    # Separate plots per year
    for year in [2017, 2018]:
        # The data to be used for each region
        # Muon regions use MET,
        # electron+photon regions use EGamma
        # ( EGamma = SingleElectron+SinglePhoton for 2017)
        data = {
            'sr_vbf': f'MET_{year}',
            'cr_1m_vbf': f'MET_{year}',
            'cr_2m_vbf': f'MET_{year}',
            'cr_1e_vbf': f'EGamma_{year}',
            'cr_2e_vbf': f'EGamma_{year}',
            'cr_g_vbf': f'EGamma_{year}',
        }

        # Same for MC selection
        # Match datasets by regular expressions
        # Here for LO V samples (HT binned)
        mc_lo = {
            'sr_vbf':
            re.compile(
                f'(ZJetsToNuNu.*|EW.*|Top_FXFX.*|Diboson.*|.*DYJetsToLL_M-50_HT_MLM.*|.*WJetsToLNu.*HT.*).*{year}'
            ),
            'cr_1m_vbf':
            re.compile(
                f'(EWKW.*|Top_FXFX.*|Diboson.*|.*DYJetsToLL_M-50_HT_MLM.*|.*WJetsToLNu.*HT.*).*{year}'
            ),
            'cr_1e_vbf':
            re.compile(
                f'(EWKW.*|Top_FXFX.*|Diboson.*|.*DYJetsToLL_M-50_HT_MLM.*|.*WJetsToLNu.*HT.*).*{year}'
            ),
            'cr_2m_vbf':
            re.compile(
                f'(EWKZ.*ZToLL.*|Top_FXFX.*|Diboson.*|.*DYJetsToLL_M-50_HT_MLM.*).*{year}'
            ),
            'cr_2e_vbf':
            re.compile(
                f'(EWKZ.*ZToLL.*|Top_FXFX.*|Diboson.*|.*DYJetsToLL_M-50_HT_MLM.*).*{year}'
            ),
            'cr_g_vbf':
            re.compile(
                f'(GJets_(DR-0p4|SM).*|QCD_data.*|WJetsToLNu.*HT.*).*{year}'),
        }

        # Load ingredients from cache
        acc.load('sumw')
        acc.load('sumw_pileup')
        acc.load('nevents')

        # Data / MC plots are made here
        # Loop over all regions
        for region in mc_lo.keys():
            if not re.match(args.region, region):
                continue
            # Plot ratio pads for all regions (now that we're unblinded)
            ratio = True
            # Make separate output direcotry for each region
            outdir = f'./output/{os.path.basename(indir)}/{region}'
            # Settings for this region
            plotset = settings[region]

            # Loop over the distributions
            for distribution in plotset.keys():
                if not re.match(args.distribution, distribution):
                    continue
                # Load from cache
                if not distribution in merged:
                    acc.load(distribution)

                    if not distribution in acc.keys():
                        print(
                            f"WARNING: Distribution {distribution} not found in input files."
                        )
                        continue
                    acc[distribution] = merge_extensions(
                        acc[distribution],
                        acc,
                        reweight_pu=not ('nopu' in distribution))
                    scale_xs_lumi(acc[distribution])
                    acc[distribution] = merge_datasets(acc[distribution])
                    acc[distribution].axis('dataset').sorting = 'integral'
                    merged.add(distribution)
                try:
                    # The heavy lifting of making a plot is hidden
                    # in make_plot. We call it once using the LO MC
                    imc = mc_lo[region]
                    if "cr_g" in region and distribution != "recoil":
                        imc = re.compile(
                            imc.pattern.replace('QCD_data', 'QCD.*HT'))
                    make_plot(
                        acc,
                        region=region,
                        distribution=distribution,
                        year=year,
                        data=data[region],
                        mc=imc,
                        ylim=plotset[distribution].get('ylim', None),
                        xlim=plotset[distribution].get('xlim', None),
                        tag='losf',
                        outdir=f'./output/{os.path.basename(indir)}/{region}',
                        output_format='pdf',
                        ratio=ratio)
                except KeyError:
                    continue

Example #30

0

Show file

File: SS_analysis.py Project: cjmcmahon1/tW_scattering

if __name__ == '__main__':

    from klepto.archives import dir_archive
    from Tools.samples import * # fileset_2018 #, fileset_2018_small
    from processor.default_accumulators import *

    overwrite = True
    small = False
    save = True

    # load the config and the cache
    cfg = loadConfig()
    
    cacheName = 'SS_analysis'
    if small: cacheName += '_small'
    cache = dir_archive(os.path.join(os.path.expandvars(cfg['caches']['base']), cacheName), serialized=True)
    
    year = 2018
    
    fileset = {
        'topW_v3': fileset_2018['topW_v3'],
        'topW_EFT_cp8': fileset_2018['topW_EFT_cp8'],
        'topW_EFT_mix': fileset_2018['topW_EFT_mix'],
        'TTW': fileset_2018['TTW'],
        'TTZ': fileset_2018['TTZ'],
        'TTH': fileset_2018['TTH'],
        'diboson': fileset_2018['diboson'],
        'triboson': fileset_2018['triboson'],
        #'wpwp': fileset_2018['wpwp'],
        'TTTT': fileset_2018['TTTT'],
        'ttbar': fileset_2018['ttbar'],

Example #31

0

Show file

File: test_surface.py Project: Magellen/mystic

    sprayer = BuckshotSolver
    seeker = PowellDirectionalSolver
    npts = 25 # number of solvers
    retry = 1 # max consectutive iteration retries without a cache 'miss'
    tol = 8   # rounding precision
    mem = 1   # cache rounding precision

    #CUTE: 'configure' monitor and archive if they are desired
    if stepmon:
        stepmon = LoggingMonitor(1) # montor for all runs
        itermon = LoggingMonitor(1, filename='inv.txt') #XXX: log.txt?
    else:
        stepmon = itermon = None
    if archive: #python2.5
        ar_name = '__%s_%sD_cache__' % (model.__self__.__class__.__name__,ndim)
        archive = dir_archive(ar_name, serialized=True, cached=False)
        ar_name = '__%s_%sD_invcache__' % (model.__self__.__class__.__name__,ndim)
        ivcache = dir_archive(ar_name, serialized=True, cached=False)
    else:
        archive = ivcache = None

    from mystic.search import Searcher #XXX: init w/ archive, then UseArchive?
    sampler = Searcher(npts, retry, tol, mem, _map, archive, sprayer, seeker)
    sampler.Verbose(disp)
    sampler.UseTrajectories(traj)

    ### doit ###
    maxpts = 1000. #10000.
    surface = Surface(model, sampler, maxpts=maxpts, dim=ndim)
    surface.UseMonitor(stepmon, itermon)
    surface.UseArchive(archive, ivcache)

Example #32

0

Show file

def main():
    # set to True if want to update the mistag root file
    # otherwise just make the plots
    if True:
        outfile = ROOT.TFile.Open(outfilename,'recreate')
    else:
        outfile = None

    # Prepare the acc
    acc = dir_archive(
        inpath,
        serialized=True,
        compression=0,
        memsize=1e3,
        )
    acc.load('sumw')
    acc.load('sumw_pileup')
    acc.load('nevents')
    distribution = 'ak8_pt0'
    distribution_Vmatched = 'ak8_Vmatched_pt0'
    distribution_mass = 'ak8_mass0'
    acc.load(distribution)
    acc.load(distribution_Vmatched)
    acc.load(distribution_mass)
    
    # merge datasets and scale with lumi xs
    htmp = acc[distribution]
    htmp_Vmatched = acc[distribution_Vmatched]
    htmp_mass = acc[distribution_mass]
    htmp = merge_extensions(htmp, acc, reweight_pu=True)
    scale_xs_lumi(htmp)
    htmp = merge_datasets(htmp)
    acc[distribution]=htmp
    htmp_Vmatched = merge_extensions(htmp_Vmatched, acc, reweight_pu=True)
    scale_xs_lumi(htmp_Vmatched)
    htmp_Vmatched = merge_datasets(htmp_Vmatched)
    acc[distribution_Vmatched]=htmp_Vmatched
    htmp_mass = merge_extensions(htmp_mass, acc, reweight_pu=True)
    scale_xs_lumi(htmp_mass)
    htmp_mass = merge_datasets(htmp_mass)
    acc[distribution_mass]=htmp_mass

    acc[distribution].axis('dataset').sorting = 'integral'
    acc[distribution_Vmatched].axis('dataset').sorting = 'integral'
    acc[distribution_mass].axis('dataset').sorting = 'integral'
    
    #binning stuff
    if newbin:
        htmp = htmp.rebin(htmp.axis('jetpt'),newbin)
        htmp_Vmatched = htmp_Vmatched.rebin(htmp_Vmatched.axis('jetpt'),newbin)
    edges = htmp.axis('jetpt').edges()
    centers = htmp.axis('jetpt').centers()
    halfwidth = [centers[i]-edges[i] for i in range(len(centers))]

    for lepton_flag in ['1m','2m','1e','2e','g']:
    #for lepton_flag in ['g']:
        for year in [2017,2018]:
            mc_map = {
                'cr_1m_v'      : re.compile(f'(Top.*FXFX|Diboson|QCD_HT|DYJetsToLL_M-50_HT_MLM|WJetsToLNu.*HT).*{year}'),
                'cr_1e_v'      : re.compile(f'(Top.*FXFX|Diboson|QCD_HT|DYJetsToLL_M-50_HT_MLM|WJetsToLNu.*HT|GJets_DR.*HT).*{year}'),
                'cr_2m_v'      : re.compile(f'(Top.*FXFX|Diboson|DYJetsToLL_M-50_HT_MLM).*{year}'),
                'cr_2e_v'      : re.compile(f'(Top.*FXFX|Diboson|DYJetsToLL_M-50_HT_MLM).*{year}'),
                'cr_g_v'       : re.compile(f'(Diboson|QCD_HT|GJets_DR.*HT|VQQGamma_FXFX|WJetsToLNu.*HT).*{year}'),
                'cr_nobveto_v' : re.compile(f'(Top.*FXFX|Diboson|QCD_HT|DYJetsToLL_M-50_HT_MLM|WJetsToLNu.*HT|GJets_DR.*HT|ZJetsToNuNu).*{year}'),
                'sr_v'         : re.compile(f'(Top.*FXFX|Diboson|QCD_HT|DYJetsToLL_M-50_HT_MLM|WJetsToLNu.*HT|GJets_DR.*HT|ZJetsToNuNu).*{year}'),
            }
            mc_map_noV = {
                'cr_1m_v'      : re.compile(f'(QCD_HT|DYJetsToLL_M-50_HT_MLM|WJetsToLNu.*HT).*{year}'),
                'cr_1e_v'      : re.compile(f'(QCD_HT|DYJetsToLL_M-50_HT_MLM|WJetsToLNu.*HT|GJets_DR.*HT).*{year}'),
                'cr_2m_v'      : re.compile(f'(QCD_HT|DYJetsToLL_M-50_HT_MLM).*{year}'),
                'cr_2e_v'      : re.compile(f'(QCD_HT|DYJetsToLL_M-50_HT_MLM).*{year}'),
                'cr_g_v'       : re.compile(f'(QCD_HT|GJets_DR.*HT|WJetsToLNu.*HT).*{year}'),
                'cr_nobveto_v' : re.compile(f'(QCD_HT|DYJetsToLL_M-50_HT_MLM|WJetsToLNu.*HT|GJets_DR.*HT|ZJetsToNuNu).*{year}'),
                'sr_v'         : re.compile(f'(QCD_HT|DYJetsToLL_M-50_HT_MLM|WJetsToLNu.*HT|GJets_DR.*HT|ZJetsToNuNu).*{year}'),
            }
            mc_map_realV = {
                'cr_1m_v'      : re.compile(f'(Top.*FXFX|Diboson).*{year}'),
                'cr_1e_v'      : re.compile(f'(Top.*FXFX|Diboson).*{year}'),
                'cr_2m_v'      : re.compile(f'(Top.*FXFX|Diboson).*{year}'),
                'cr_2e_v'      : re.compile(f'(Top.*FXFX|Diboson).*{year}'),
                'cr_g_v'       : re.compile(f'(Diboson|VQQGamma_FXFX).*{year}'),
                'cr_nobveto_v' : re.compile(f'(Top.*FXFX|Diboson).*{year}'),
                'sr_v'         : re.compile(f'(Top.*FXFX|Diboson).*{year}'),
            }
            # use NLO GJets for the measurement if need to
            if nlogjet:
                mc_map['cr_g_v']     = re.compile(f'(Diboson|QCD_HT|GJets_1j|VQQGamma_FXFX|WJetsToLNu.*HT).*{year}')
                mc_map_noV['cr_g_v'] = re.compile(f'(QCD_HT|GJets_1j|WJetsToLNu.*HT).*{year}')
            for wp in ['loose','tight','medium']:
                region_all = f'cr_{lepton_flag}_hasmass_inclusive_v'
                region_all_nomass = f'cr_{lepton_flag}_inclusive_v'
                region_pass= f'cr_{lepton_flag}_nomistag_{wp}_v'
                region_pass_nomass= f'cr_{lepton_flag}_nomistag_nomass_{wp}_v'
                mc_All = mc_map[f'cr_{lepton_flag}_v']
                mc_False = mc_map_noV[f'cr_{lepton_flag}_v']
                mc_Real = mc_map_realV[f'cr_{lepton_flag}_v']
                if lepton_flag in ['1e','2e','g']:
                    data = re.compile(f'EGamma_{year}')
                else:
                    data = re.compile(f'MET_{year}')
                    
                ### DEBUG ###
                # print(acc[distribution][mc_All].integrate("region",region_all).values())
                # print(acc[distribution][mc_All].integrate("region",region_pass).values())
                # print(acc[distribution_Vmatched][mc_All].integrate("region",region_pass).values())
                #############
                #make stack_plot for all and pass
                try:
                    acc["alskjxkjo"]
                    make_plot(acc, region=region_all, distribution=distribution, year=year, data=data, mc=mc_All, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3))
                    make_plot(acc, region=region_all_nomass, distribution=distribution, year=year, data=data, mc=mc_All, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3))
                    make_plot(acc, region=region_all_nomass, distribution=distribution_mass, year=year, data=data, mc=mc_All, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3))
                    make_plot(acc, region=region_all, distribution=distribution, year=year, data=None, mc=mc_Real, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3), ratio=False, tag="MCHasV")
                    make_plot(acc, region=region_all, distribution=distribution_Vmatched, year=year, data=None, mc=mc_Real, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3), ratio=False, tag="MCHasV")
                    make_plot(acc, region=region_all, distribution=distribution, year=year, data=None, mc=mc_False, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3), ratio=False, tag="MCNoV")
                    make_plot(acc, region=region_all, distribution=distribution, year=year, data=data, mc=None, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3), ratio=False, tag="data")
                    make_plot(acc, region=region_pass, distribution=distribution, year=year, data=data, mc=mc_All, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3))
                    make_plot(acc, region=region_pass_nomass, distribution=distribution, year=year, data=data, mc=mc_All, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3))
                    make_plot(acc, region=region_pass_nomass, distribution=distribution_mass, year=year, data=data, mc=mc_All, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3))
                    make_plot(acc, region=region_pass, distribution=distribution, year=year, data=None, mc=mc_Real, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3), ratio=False, tag="MCHasV")
                    make_plot(acc, region=region_pass, distribution=distribution_Vmatched, year=year, data=None, mc=mc_Real, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3), ratio=False, tag="MCHasV")
                    make_plot(acc, region=region_pass, distribution=distribution, year=year, data=None, mc=mc_False, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3), ratio=False, tag="MCNoV")
                    make_plot(acc, region=region_pass, distribution=distribution, year=year, data=data, mc=None, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3), ratio=False, tag="data")
                except ValueError:
                    print(f"Warning(ValueError): skipping plots for lepton_flag={lepton_flag} year={year} wp={wp} due to negative or zero bins")
                except AssertionError:
                    print(f"Warning(AssertionError): skipping plots for lepton_flag={lepton_flag} year={year} wp={wp} due to negative or zero bins")
                except KeyError:
                    print(f"Warning(KeyError): skipping plots for lepton_flag={lepton_flag} year={year} wp={wp} due to negative or zero bins")
                try:
                    make_plot(acc, region=region_pass_nomass, distribution=distribution, year=year, data=data, mc=mc_All, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3))
                    make_plot(acc, region=region_pass_nomass, distribution=distribution_mass, year=year, data=data, mc=mc_All, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3))
                except:
                    pass
    
    
        
                # extract mistag rate for data and mc
                selector_region_all,selector_region_pass = region_all, region_pass
                if not massden:
                    selector_region_all = region_all_nomass
                if not massnum:
                    selector_region_pass = region_pass_nomass

                for sysvar in all_sysvar:
                    if sysvar=="nominal":
                        sysvar_tag = ""
                    else:
                        sysvar_tag = "_"+sysvar

                    # background substraction from data: remove real Vs
                    h_data = htmp[data].integrate('dataset')
                    #h_mc_Real  = htmp[mc_Real].integrate('dataset')
                    h_mc_False = htmp[mc_False].integrate('dataset')

                    h_mc_Real  = htmp_Vmatched[mc_Real]
                    # vary within systematics
                    # norm for both 10%, bveto unc for top 6% for diboson 2%, vtag unc for both 10% (approx)
                    if sysvar=="sysUp": h_mc_Real.scale(1.15)
                    if sysvar=="sysDn": h_mc_Real.scale(0.85)
                    if sysvar=="topNormUp": h_mc_Real.scale  ( { "Top_FXFX_2017"      : 1.10, "Top_FXFX_2018"      : 1.10} , axis="dataset" ) 
                    if sysvar=="topNormDn": h_mc_Real.scale  ( { "Top_FXFX_2017"      : 0.90, "Top_FXFX_2018"      : 0.90} , axis="dataset" ) 
                    if sysvar=="vvNormUp":  h_mc_Real.scale  ( { "Diboson_2017"       : 1.10, "Diboson_2018"       : 1.10} , axis="dataset" ) 
                    if sysvar=="vvNormDn":  h_mc_Real.scale  ( { "Diboson_2017"       : 0.90, "Diboson_2018"       : 0.90} , axis="dataset" ) 
                    if sysvar=="vgNormUp":  h_mc_Real.scale  ( { "VQQGamma_FXFX_2017" : 1.10, "VQQGamma_FXFX_2018" : 1.10} , axis="dataset" ) 
                    if sysvar=="vgNormDn":  h_mc_Real.scale  ( { "VQQGamma_FXFX_2017" : 0.90, "VQQGamma_FXFX_2018" : 0.90} , axis="dataset" ) 
                    if sysvar=="topVTagUp": h_mc_Real.scale  ( { "Top_FXFX_2017"      : 1.10, "Top_FXFX_2018"      : 1.10} , axis="dataset" ) 
                    if sysvar=="topVTagDn": h_mc_Real.scale  ( { "Top_FXFX_2017"      : 0.90, "Top_FXFX_2018"      : 0.90} , axis="dataset" ) 
                    if sysvar=="vvVTagUp":  h_mc_Real.scale  ( { "Diboson_2017"       : 1.10, "Diboson_2018"       : 1.10} , axis="dataset" ) 
                    if sysvar=="vvVTagDn":  h_mc_Real.scale  ( { "Diboson_2017"       : 0.90, "Diboson_2018"       : 0.90} , axis="dataset" ) 
                    if sysvar=="vgVTagUp":  h_mc_Real.scale  ( { "VQQGamma_FXFX_2017" : 1.10, "VQQGamma_FXFX_2018" : 1.10} , axis="dataset" ) 
                    if sysvar=="vgVTagDn":  h_mc_Real.scale  ( { "VQQGamma_FXFX_2017" : 0.90, "VQQGamma_FXFX_2018" : 0.90} , axis="dataset" ) 
                    if sysvar=="topBVetoUp": h_mc_Real.scale ( { "Top_FXFX_2017"      : 1.06, "Top_FXFX_2018"      : 1.06} , axis="dataset" ) 
                    if sysvar=="topBVetoDn": h_mc_Real.scale ( { "Top_FXFX_2017"      : 0.94, "Top_FXFX_2018"      : 0.94} , axis="dataset" ) 
                    if sysvar=="vvBVetoUp":  h_mc_Real.scale ( { "Diboson_2017"       : 1.02, "Diboson_2018"       : 1.02} , axis="dataset" ) 
                    if sysvar=="vvBVetoDn":  h_mc_Real.scale ( { "Diboson_2017"       : 0.98, "Diboson_2018"       : 0.98} , axis="dataset" ) 
                    if sysvar=="vgBVetoUp":  h_mc_Real.scale ( { "VQQGamma_FXFX_2017" : 1.02, "VQQGamma_FXFX_2018" : 1.02} , axis="dataset" ) 
                    if sysvar=="vgBVetoDn":  h_mc_Real.scale ( { "VQQGamma_FXFX_2017" : 0.98, "VQQGamma_FXFX_2018" : 0.98} , axis="dataset" ) 
                    h_mc_Real  = h_mc_Real.integrate('dataset')
                    h_mc_Real.scale(-1*realVSF) # just for background substraction
                    h_data.add(h_mc_Real)
    
                    teff_mistag_rate_data = get_mistag_rate(h_data, selector_region_all, selector_region_pass, flag=f'data_{lepton_flag}_{wp}_{year}{sysvar_tag}', isData=True)
                    teff_mistag_rate_data.SetNameTitle(f'mistag_rate_data_{lepton_flag}_{wp}_{year}{sysvar_tag}','mistagging rate')
                    teff_mistag_rate_mc = get_mistag_rate(h_mc_False, selector_region_all, selector_region_pass, flag=f'mc_{lepton_flag}_{wp}_{year}{sysvar_tag}', isData=False)
                    teff_mistag_rate_mc.SetNameTitle(f'mistag_rate_mc_{lepton_flag}_{wp}_{year}{sysvar_tag}','mistagging rate')
    
                    # get the scale factors
                    # note that it's impossible to divide two TEfficiency in ROOT, have to do that manually
                    th1_mistag_SF = ratio_of_efficiencies(f'mistag_SF_{lepton_flag}_{wp}_{year}{sysvar_tag}', 'mistag scale factor', teff_mistag_rate_data, teff_mistag_rate_mc)
                    
                    # save the mistag rate and SF histograms into root file
                    if outfile:
                        teff_mistag_rate_data.Write()
                        teff_mistag_rate_mc.Write()
                        th1_mistag_SF.Write()
    
    # soup togather all CR using a weighted_average between the regions:
    for year in [2017,2018]:
        for wp in ['loose','tight','medium']:
            for sysvar in all_sysvar:
                if sysvar=="nominal":
                    sysvar_tag = ""
                else:
                    sysvar_tag = "_"+sysvar
                teff_mistag_rate_data_1e = outfile.Get(f'mistag_rate_data_1e_{wp}_{year}{sysvar_tag}')
                teff_mistag_rate_data_2e = outfile.Get(f'mistag_rate_data_2e_{wp}_{year}{sysvar_tag}')
                teff_mistag_rate_data_1m = outfile.Get(f'mistag_rate_data_1m_{wp}_{year}{sysvar_tag}')
                teff_mistag_rate_data_2m = outfile.Get(f'mistag_rate_data_2m_{wp}_{year}{sysvar_tag}')
                teff_mistag_rate_data_g = outfile.Get(f'mistag_rate_data_g_{wp}_{year}{sysvar_tag}')
                teff_mistag_rate_mc_1e = outfile.Get(f'mistag_rate_mc_1e_{wp}_{year}{sysvar_tag}')
                teff_mistag_rate_mc_2e = outfile.Get(f'mistag_rate_mc_2e_{wp}_{year}{sysvar_tag}')
                teff_mistag_rate_mc_1m = outfile.Get(f'mistag_rate_mc_1m_{wp}_{year}{sysvar_tag}')
                teff_mistag_rate_mc_2m = outfile.Get(f'mistag_rate_mc_2m_{wp}_{year}{sysvar_tag}')
                teff_mistag_rate_mc_g = outfile.Get(f'mistag_rate_mc_g_{wp}_{year}{sysvar_tag}')
                # souped SF for all W/Z regions
                teff_mistag_rate_data_wz = teff_mistag_rate_data_1e + teff_mistag_rate_data_2e\
                        + teff_mistag_rate_data_1m + teff_mistag_rate_data_2m
                teff_mistag_rate_data_wz.SetNameTitle(f'mistag_rate_data_wz_{wp}_{year}{sysvar_tag}', 'souped mistagging rate for W and Z')
                teff_mistag_rate_mc_wz = teff_mistag_rate_mc_1e + teff_mistag_rate_mc_2e\
                        + teff_mistag_rate_mc_1m + teff_mistag_rate_mc_2m
                teff_mistag_rate_mc_wz.SetNameTitle(f'mistag_rate_mc_wz_{wp}_{year}{sysvar_tag}', 'souped mistagging rate for W and Z')
                th1_mistag_SF_wz = ratio_of_efficiencies(f'mistag_SF_wz_{wp}_{year}{sysvar_tag}', 'souped mistag scale factor for W and Z', teff_mistag_rate_data_wz, teff_mistag_rate_mc_wz)
                # souped SF for all W regions
                teff_mistag_rate_data_w = teff_mistag_rate_data_1e + teff_mistag_rate_data_1m
                teff_mistag_rate_data_w.SetNameTitle(f'mistag_rate_data_w_{wp}_{year}{sysvar_tag}', 'souped mistagging rate for W')
                teff_mistag_rate_mc_w = teff_mistag_rate_mc_1e + teff_mistag_rate_mc_1m
                teff_mistag_rate_mc_w.SetNameTitle(f'mistag_rate_mc_w_{wp}_{year}{sysvar_tag}', 'souped mistagging rate for W')
                th1_mistag_SF_w = ratio_of_efficiencies(f'mistag_SF_w_{wp}_{year}{sysvar_tag}', 'souped mistag scale factor for W', teff_mistag_rate_data_w, teff_mistag_rate_mc_w)
                # souped SF for all Z regions
                teff_mistag_rate_data_z = teff_mistag_rate_data_2e + teff_mistag_rate_data_2m
                teff_mistag_rate_data_z.SetNameTitle(f'mistag_rate_data_z_{wp}_{year}{sysvar_tag}', 'souped mistagging rate for Z')
                teff_mistag_rate_mc_z = teff_mistag_rate_mc_2e + teff_mistag_rate_mc_2m
                teff_mistag_rate_mc_z.SetNameTitle(f'mistag_rate_mc_z_{wp}_{year}{sysvar_tag}', 'souped mistagging rate for Z')
                th1_mistag_SF_z = ratio_of_efficiencies(f'mistag_SF_z_{wp}_{year}{sysvar_tag}', 'souped mistag scale factor for Z', teff_mistag_rate_data_z, teff_mistag_rate_mc_z)
                # souped SF for all regions including photon
                teff_mistag_rate_data_all = teff_mistag_rate_data_1e + teff_mistag_rate_data_2e\
                        + teff_mistag_rate_data_1m + teff_mistag_rate_data_2m + teff_mistag_rate_data_g
                teff_mistag_rate_data_all.SetNameTitle(f'mistag_rate_data_all_{wp}_{year}{sysvar_tag}', 'souped mistagging rate for all')
                teff_mistag_rate_mc_all = teff_mistag_rate_mc_1e + teff_mistag_rate_mc_2e\
                        + teff_mistag_rate_mc_1m + teff_mistag_rate_mc_2m + teff_mistag_rate_mc_g
                teff_mistag_rate_mc_all.SetNameTitle(f'mistag_rate_mc_all_{wp}_{year}{sysvar_tag}', 'souped mistagging rate for all')
                th1_mistag_SF_all = ratio_of_efficiencies(f'mistag_SF_all_{wp}_{year}{sysvar_tag}', 'souped mistag scale factor for all', teff_mistag_rate_data_all, teff_mistag_rate_mc_all)
                if outfile:
                    teff_mistag_rate_data_wz.Write()
                    teff_mistag_rate_mc_wz.Write()
                    th1_mistag_SF_wz.Write()
                    teff_mistag_rate_data_w.Write()
                    teff_mistag_rate_mc_w.Write()
                    th1_mistag_SF_w.Write()
                    teff_mistag_rate_data_z.Write()
                    teff_mistag_rate_mc_z.Write()
                    th1_mistag_SF_z.Write()
                    teff_mistag_rate_data_all.Write()
                    teff_mistag_rate_mc_all.Write()
                    th1_mistag_SF_all.Write()
    
    if outfile:
        outfile.Close()

Example #33

0

Show file

File: test_surface.py Project: shirangi/mystic

    sprayer = BuckshotSolver
    seeker = PowellDirectionalSolver
    npts = 25 # number of solvers
    retry = 1 # max consectutive iteration retries without a cache 'miss'
    tol = 8   # rounding precision
    mem = 1   # cache rounding precision

    #CUTE: 'configure' monitor and archive if they are desired
    if stepmon:
        stepmon = LoggingMonitor(1) # montor for all runs
        itermon = LoggingMonitor(1, filename='inv.txt') #XXX: log.txt?
    else:
        stepmon = itermon = None
    if archive: #python2.5
        ar_name = '__%s_%sD_cache__' % (model.__self__.__class__.__name__,ndim)
        archive = dir_archive(ar_name, serialized=True, cached=False)
        ar_name = '__%s_%sD_invcache__' % (model.__self__.__class__.__name__,ndim)
        ivcache = dir_archive(ar_name, serialized=True, cached=False)
    else:
        archive = ivcache = None

    from mystic.search import Searcher #XXX: init w/ archive, then UseArchive?
    sampler = Searcher(npts, retry, tol, mem, _map, archive, sprayer, seeker)
    sampler.Verbose(disp)
    sampler.UseTrajectories(traj)

    ### doit ###
    maxpts = 1000. #10000.
    surface = Surface(model, sampler, maxpts=maxpts, dim=ndim)
    surface.UseMonitor(stepmon, itermon)
    surface.UseArchive(archive, ivcache)

Example #34

0

Show file

File: test_readwrite.py Project: RaoUmer/klepto

#!/usr/bin/env python
#
# Author: Mike McKerns (mmckerns @caltech and @uqfoundation)
# Copyright (c) 2013-2015 California Institute of Technology.
# License: 3-clause BSD.  The full license text is available at:
#  - http://trac.mystic.cacr.caltech.edu/project/pathos/browser/klepto/LICENSE

from klepto.archives import dir_archive
from pox import rmtree

# start fresh
rmtree('foo', ignore_errors=True)


d = dir_archive('foo', cached=False)
key = '1234TESTMETESTMETESTME1234'
d._mkdir(key)
#XXX: repeat mkdir does nothing, should it clear?  I think not.
_dir = d._mkdir(key)
assert d._getdir(key) == _dir
d._rmdir(key)

# with _pickle
x = [1,2,3,4,5]
d._fast = True
d[key] = x
assert d[key] == x
d._rmdir(key)

# with dill
d._fast = False