def __init__(self,Cluster=1):
     FileName = '/Users/rca/hadley/eddy_stress/bruce/MasatoWBindices.txt'
     y = read_table(FileName)
     y = array(y).transpose()
     year = arange(1958,2002)
     self.YearStart = year[0]
     self.YearStop  = year[-1]
     self.index = y[Cluster-1]
Example #2
0
def create_df(columns, grp, myrank, ranks):
    ''' create a dataframe, given file, group and list of datasets
    arguments:
    columns: datasets to be read as columns in a dataframe
    grp: group name to be read
    fname: file name of the file to read
    myrank: process's rank
    ranks: total number of MPI ranks in the job
    output: a data frame per rank consisting of columns
    '''
    return pd.DataFrame(read_table.read_table(columns, grp, myrank, ranks))
Example #3
0
def test_read_table():

    data = [
        ["a1", "b1 author1 Search 21", "Attach1", "d1"],
        ["a2", "b2 author2 Search 22", "Attach2", "d2"],
        ["a3", "b3 author3 Search 23", "Attach3", "d3"],
        ["a4", "b4 author4 Search 24", "Attach4", "d4"],
    ]
    columns = ["No", "Title", "Attach", "Published"]

    input_table = pd.DataFrame(data, columns=columns, index=range(4))

    with tempfile.NamedTemporaryFile(mode='w+t') as fp_sample_html:
        input_table.to_html(fp_sample_html)
        fp_sample_html.seek(0)

        result = read_table.read_table(fp_sample_html)

    assert isinstance(result, pd.DataFrame)
    assert 'subject' in result.columns
    assert 'author' in result.columns
    assert 'count' in result.columns
    assert 'Attach' in result.columns
    assert 'Published' in result.columns
Example #4
0
def main_fake_rate_measurement(prefix, output_name, etaregion="", procname="ttbar6"):

    # Parse the input arguments
    try:
        ntuple_version = sys.argv[1]
        tag = sys.argv[2]
    except:
        usage()

    if "2016" in ntuple_version: lumi = 35.9
    if "2017" in ntuple_version: lumi = 41.3
    if "2018" in ntuple_version: lumi = 59.74

    basedir = "plots/{}/{}/lin/".format(ntuple_version, tag)

    # Denominator : fake from data (i.e. data - prompt)
    yields_ddfake = rt.read_table(basedir + prefix + "Prompt__lepFakeCand2PtFineVarBin"+etaregion+".txt")
    yields_ddfake["ddfake"] = []
    for datacount, bkgcount in zip(yields_ddfake["data"], yields_ddfake["Total"]):
        yields_ddfake["ddfake"].append(datacount - bkgcount)
    # print yields_ddfake["ddfake"]

    # Numerator : fake from data (i.e. data - prompt)
    yields_ddfake_tight = rt.read_table(basedir + prefix + "TightPrompt__lepFakeCand2PtFineVarBin"+etaregion+".txt")
    yields_ddfake_tight["ddfake"] = []
    for datacount, bkgcount in zip(yields_ddfake_tight["data"], yields_ddfake_tight["Total"]):
        yields_ddfake_tight["ddfake"].append(datacount - bkgcount)
    # print yields_ddfake_tight["ddfake"]

    fr_data = []
    for den, num in zip(yields_ddfake["ddfake"], yields_ddfake_tight["ddfake"]):
        if den.val != 0:
            fr = num / den
            fr_data.append(fr)
        else:
            fr_data.append(E(0, 0))
    fr_data.pop(0) # first one is underflow bin
    fr_data.pop(0) # second one is underflow bin
    fr_data.pop(-1) # last one is overflow bin
    print(fr_data)

    # Denominator: Fake directly from ttbar MC
    yields_ttbar = rt.read_table(basedir + prefix + "Fake__lepFakeCand2PtFineVarBin"+etaregion+".txt")
    # print yields_ttbar[procname]

    # Numerator: fake from data (i.e. data - prompt)
    yields_ttbar_tight = rt.read_table(basedir + prefix + "TightFake__lepFakeCand2PtFineVarBin"+etaregion+".txt")
    # print yields_ttbar_tight[procname]

    fr_mc = []
    for den, num in zip(yields_ttbar[procname], yields_ttbar_tight[procname]):
        if den.val != 0:
            fr = num / den
            fr_mc.append(fr)
        else:
            fr_mc.append(E(0, 0))
    print(fr_mc)
    fr_mc.pop(0) # first one is underflow bin
    fr_mc.pop(0) # second one is underflow bin
    fr_mc.pop(-1) # last one is overflow bin

    # bin boundaries
    # bounds = [0., 10., 15., 20., 30., 150.]
    # bounds = [0., 10., 20., 70.]
    bounds = [0., 10., 20., 30., 50., 70.]

    h_fr_data = r.TH1F("FR","",len(bounds)-1,array('d',bounds))
    h_fr_mc = r.TH1F("FR","",len(bounds)-1,array('d',bounds))

    for idx, fr in enumerate(fr_data):
        h_fr_data.SetBinContent(idx+2, fr.val)
        h_fr_data.SetBinError(idx+2, fr.err)

    for idx, fr in enumerate(fr_mc):
        h_fr_mc.SetBinContent(idx+2, fr.val)
        h_fr_mc.SetBinError(idx+2, fr.err)

    # Options
    alloptions= {
               "ratio_range":[0.0,2.0],
               "nbins": 180,
               "autobin": False,
               "legend_scalex": 0.8,
               "legend_scaley": 0.8,
               "output_name": basedir + "/"+output_name+".pdf",
               "bkg_sort_method": "unsorted",
               "no_ratio": False,
               "print_yield": True,
               "yield_prec": 3,
               "draw_points": True,
               "hist_line_none": True,
               "show_bkg_errors": True,
               "lumi_value" : lumi,
               # "yaxis_range": [0., 1],
               }

    p.plot_hist(
           sigs = [],
           bgs = [h_fr_mc.Clone()],
           data = h_fr_data.Clone(),
           syst = None,
           colors=[2001],
           legend_labels=["MC t#bar{t}"],
           options=alloptions)

    return h_fr_mc.Clone(), h_fr_data.Clone()
 def __init__(self):
     FileName = '/Users/rca/hadley/eddy_stress/bruce/WaveActivityIndex.txt'
     year,index = read_table(FileName)
     self.YearStart = int(year[0])
     self.YearStop  = int(year[-1])
     self.index = index