Example #1
0
    def _generate_tree_and_space(self, index=None):
        """Private method to generate a tree object of the given data with
        weights and a combined phase space over data ranges.

        Parameters
        ----------
        index : numpy ndarray of floats | None
            Indices for selection of the monte-carlo and weights data subset.
        """
        self.tree = None
        spaces = []
        if index is None:
            index = slice(len(self.model.values[0]))  # Index the whole array.
        for i, var in enumerate(self.model.vars):
            spaces.append(OneDimPhaseSpace(var, *self.model.ranges[i]))
            if self.tree is None:
                value_array = np.array(self.model.values[i][index],
                                       dtype=[(var, np.float32)])
                self.tree = array2tree(value_array)
            else:
                value_array = np.array(self.model.values[i][index],
                                       dtype=[(var, np.float32)])
                array2tree(value_array, tree=self.tree)

        array2tree(np.array(self.model.weights[index],
                            dtype=[("weight", np.float32)]),
                   tree=self.tree)

        if len(spaces) == 1:
            self.space = spaces[0]
        else:
            self.space = CombinedPhaseSpace("PhspCombined", *spaces)
Example #2
0
def array2root(array, filename,
               treename='tree', mode='update',
               compression='zlib'):
    # Main idea stolen from 'root_numpy', in:
    #   root_numpy/src/tree.pyx

    # Ff the file is yet to exist, forcing 'recreate' mode.
    target_file = Path(filename)
    if not target_file.is_file():
        mode = 'recreate'

    # Since we explicitly specify the compression algorithm, it is always
    # backward compatible.
    if compression == 'zlib':
        rfile = ROOT.TFile.Open(filename, mode, "", 101)
    else:
        rfile = ROOT.TFile.Open(filename, mode, "", 201)

    if mode == 'update':
        tree = rfile.Get(treename)
        datatree = array2tree(array, name=treename, tree=tree)
    else:
        datatree = array2tree(array, name=treename)

    # Possible alternative write modes:
    #   ROOT.TObject.kWriteDelete, ROOT.TObject.kOverwrite
    rfile.Write("", ROOT.TObject.kOverwrite)
    # rfile.Write()

    rfile.Close()

    del datatree
    del rfile
Example #3
0
    def __init__(self,
                 var_args,
                 weights,
                 pdf_seed=None,
                 adaptive=False,
                 mc_conv=0):

        # get input
        self.pdf_seed = pdf_seed
        self.adaptive = adaptive
        self.weights = weights
        self.variables = [meerkat_variable(arg) for arg in var_args]
        self.mc_conv = 0

        # create info
        self.ndim = len(self.variables)
        self.var_names = []
        self.data = []
        self.spaces = []
        self.space = None
        self.bws = []
        self.nbins = []

        for var in self.variables:
            print var
            self.var_names.append(var.variable_name)
            self.data.append(var.values)
            self.spaces.append(
                OneDimPhaseSpace(var.variable_name, var.range[0],
                                 var.range[1]))
            self.bws.append(var.bw)
            self.nbins.append(var.nbins)

        # create combined phase space
        if self.ndim == 1:
            self.space = self.spaces[0]
        elif self.ndim == 2:
            self.space = CombinedPhaseSpace("PhspCombined", self.spaces[0],
                                            self.spaces[1])
        elif self.ndim == 3:
            self.space = CombinedPhaseSpace("PhspCombined", self.spaces[0],
                                            self.spaces[1], self.spaces[2])
        elif self.ndim == 4:
            self.space = CombinedPhaseSpace("PhspCombined", self.spaces[0],
                                            self.spaces[1], self.spaces[2],
                                            self.spaces[3])
        else:
            print "FATAL: ndim > 4 not implemented"
            sys.exit(1)

        # create input data
        self.tree = array2tree(
            np.array(self.data[0], dtype=[(self.var_names[0], np.float32)]))
        for i in range(self.ndim - 1):
            array2tree(np.array(self.data[i + 1],
                                dtype=[(self.var_names[i + 1], np.float32)]),
                       tree=self.tree)
        array2tree(np.array(self.weights, dtype=[("weight", np.float32)]),
                   tree=self.tree)
Example #4
0
def addToFile(outPath, y, name):
    with root_open(outPath, mode='a') as myfile:
        y = np.asarray(y)
        y.dtype = [(name, 'float32')]
        y.dtype.names = [name]
        root_numpy.array2tree(y, tree=myfile.nominal)
        myfile.write()
        myfile.Close()
Example #5
0
def test_to_root(folder,result_folder,output_root_folder,variables,is_signal,model_label,sample_list=[]):

    if not os.path.isdir(output_root_folder+'/model_'+model_label): os.mkdir(output_root_folder+'/model_'+model_label)

    if sample_list==[]:
        print("   Empty sample list, will use full sample . . .")
        ##Read test sample
        store = pd.HDFStore(result_folder+'test_score_'+model_label+'.h5')
        df_test = store.select("df")

        for n, a in enumerate(var):
            back = np.array(df_test[a].loc[df_test[is_signal]==0].values, dtype=[(a, np.float64)])
            sign = np.array(df_test[a].loc[df_test[is_signal]==1].values, dtype=[(a, np.float64)])
            print(a," back: ", back)
            print(a," sign: ", sign)
            array2root(back, output_root_folder+'/model_'+model_label+'/test_bkg.root', mode='recreate' if n==0 else 'update')
            array2root(sign, output_root_folder+'/model_'+model_label+'/test_sgn.root', mode='recreate' if n==0 else 'update')
        print("  Signal and background root files written : ", output_root_folder+'/'+model_label+'/test_*.root')

    else:
        full_list = []
        for sl in sample_list:
            full_list += samples[sl]['files']

        for sample in full_list:
            ##Read test sample
            if not os.path.isfile(folder+sample+"_test.h5"):
                print("!!!File ", folder+sample+"_test.h5", " does not exist! Continuing")
                continue

            store = pd.HDFStore(result_folder+sample+"_score_"+model_label+".h5")
            df_test = store.select("df")
            newFile = TFile(output_root_folder+'/model_'+model_label+'/'+sample+'.root', 'recreate')
            newFile.cd()
            for n, a in enumerate(var):
                arr = np.array(df_test[a].values, dtype=[(a, np.float64)])
                #print(a, " values: ", arr)
                #array2root(arr, output_root_folder+'/model_'+model_label+'/'+sample+'.root', mode='update')#mode='recreate' if n==0 else 'update')
                if n==0: skim = array2tree(arr)
                else: array2tree(arr, tree=skim)#mode='recreate' if n==0 else 'update')

            skim.Write()
            ##Recreate c_nEvents histogram
            counter = TH1F("c_nEvents", "Event Counter", 1, 0., 1.)
            counter.Sumw2()
            ##Fill counter histogram with the first entry of c_nEvents
            counter.Fill(0., df_test["c_nEvents"].values[0])
            ##print("counter bin content: ", counter.GetBinContent(1))
            counter.Write()
            newFile.Close()
            #counter.Delete()

            
            print("  Root file written : ", output_root_folder+'/model_'+model_label+'/'+sample+'.root')
Example #6
0
def main(args):
    """Main"""
    with open(args.configfile, 'r') as conff:
        config = json.load(conff)

    model = BinnedFitModel(config)
    wsp = r.RooWorkspace('test_ws')

    test_vars = {}
    dset_vars = r.RooArgSet()

    for var, bounds in model.get_load_vars():
        try_factory(wsp, '{}[{}]'.format(var, bounds))
        low, high = [float(v) for v in bounds.split(',')]
        test_vars[var] = np.random.uniform(low, high, 10000)
        dset_vars.add(get_var(wsp, var))

    data = pd.DataFrame(test_vars)
    tree = array2tree(data.to_records(index=False))

    dataset = r.RooDataSet('full_data', 'test data', tree, dset_vars)
    ws_import (wsp, dataset)

    if model.define_model(wsp):
        print('configfile is a valid model definition')
Example #7
0
def hdf2root(infile, outfile, verbose=False):
    try:
        from rootpy.io import root_open
        from rootpy import asrootpy
        from root_numpy import array2tree
    except ImportError:
        raise ImportError(
            "Please load ROOT into PYTHONPATH and install rootpy+root_numpy:\n"
            "   `pip install rootpy root_numpy`"
        )

    from tables import open_file

    h5 = open_file(infile, 'r')
    rf = root_open(outfile, 'recreate')

    # 'walk_nodes' does not allow to check if is a group or leaf
    #   exception handling is bugged
    #   introspection/typecheck is buged
    # => this moronic nested loop instead of simple `walk`
    for group in h5.walk_groups():
        for leafname, leaf in group._v_leaves.items():
            arr = leaf[:]
            if arr.dtype.names is None:
                dt = np.dtype((arr.dtype, [(leafname, arr.dtype)]))
                arr = arr.view(dt)
            treename = leaf._v_pathname.replace('/', '_')
            tree = asrootpy(array2tree(arr, name=treename))
            tree.write()
    rf.close()
    h5.close()
Example #8
0
def RooFitSig(mbbarray, bdtarray, weightarray, TC_mass, binstart, binend):

    fitstart = 40
    fitend = 150

    mbbarray = range(200)
    bdtarray = range(200)
    weightarray = range(200)

    mass = RooRealVar("X", "m(bb)[GeV]", fitstart, fitend)
    BDT = RooRealVar("BDT", "BDT", -1, 100)
    weight = RooRealVar("weight", "weight", -100, 200)

    branchnames = ["X", "BDT", "weight"]

    dtype = np.dtype([(branchnames[idx], np.float64)
                      for idx in range(len(branchnames))])
    treearray = np.array([(mbbarray[idx], bdtarray[idx], weightarray[idx])
                          for idx in range(len(mbbarray))], dtype)

    tree = rnp.array2tree(treearray)

    m0 = RooRealVar("m0", "m0", TC_mass * 1., TC_mass * 1. - 60.,
                    TC_mass * 1. + 60.)
    m02 = RooRealVar("m02", "m02", TC_mass * 1., TC_mass * 1. - 60.,
                     TC_mass * 1. + 60.)
    alpha = RooRealVar("alpha", "alpha", 1.295, 1.0, 1.6)
    sigma2 = RooRealVar("sigma2", "sigma2", 35, 8., 100)
    n = RooRealVar("n", "n", 5, 1, 35)

    mean = RooRealVar("mean", "mean of gaussian", 750, 0, 6000)
    sigma = RooRealVar("sigma", "width of gaussian", 90, 38, 300)

    gauss = RooGaussian("gauss", "gaussian PDF", mass, m0, sigma)
    gauss2 = RooGaussian("gauss2", "gaussian PDF", mass, m02, sigma2)
    CBshape = RooCBShape("CBshape", "Crystal Ball PDF", mass, m0, sigma2,
                         alpha, n)

    ##PDF normalization
    num1 = RooRealVar("num1", "number of events", 400, 0, 5000)

    ##relative weight of 2 PDFs
    f = RooRealVar("f", "f", 0.95, 0.6, 1)

    sigPdf = RooAddPdf("sigPdf", "Signal PDF", RooArgList(CBshape, gauss),
                       RooArgList(f))
    extPdf = RooExtendPdf("extPdf", "extPdf", sigPdf, num1)
    data = RooDataSet("data", "data", tree, RooArgSet(mass, BDT, weight),
                      "BDT>0", "weight")

    xframe = mass.frame()
    mass.setBins(20)
    data.plotOn(xframe)
    extPdf.plotOn(
        xframe)  #,Normalization(1.0,RooAbsReal.RelativeExpected),LineColor(1))

    hist = extPdf.createHistogram("X", fitend - fitstart)
    hist.SetAxisRange(binstart, binend)
    return deepcopy(hist)
Example #9
0
def augment_file(in_folder, out_folder, tree_name, mcolls):
    # first, copy the original ROOT file to its destination, keeping the directory structure the same
    #data_outdir = data_outpath + data_file
    #if not os.path.exists(data_outdir):
    #    os.makedirs(data_outdir)
    if not os.path.exists(out_folder):
        os.makedirs(out_folder)

    data_outfile = os.path.join(out_folder, Config.MC_filename)
    data_infile = os.path.join(in_folder, Config.MC_filename)

    copyfile(data_infile, data_outfile)

    #tree_name = "ClassTree"

    # now, can read the file from its new location and change it
    fcoll = FileCollection({data_outfile: cuts.no_cut}, 0.0, 1.0, tree_name = tree_name)
    length = fcoll.get_length()

    indata = utils.read_data(fcoll, start = 0, stop = length, branches = Config.branches, tree_name = tree_name)

    # loop over ModelCollections here to get the prediction from each
    out_branches = []
    prepared_dtype = []
    branch_names = []

    for mcoll in mcolls:
        out_branches.append(mcoll.predict(indata))
        branch_names.append(mcoll.name)
        prepared_dtype.append((mcoll.name.encode("ascii"), 'f4'))

    print prepared_dtype

    # make it into the correct type and shape
    new_branches = np.array(np.zeros(length), dtype = prepared_dtype)

    for out_data, branch_name in zip(out_branches, branch_names):
        new_branches[branch_name] = out_data

    # now re-open the output file in append mode
    outfile = root_open(data_outfile, mode = "a");
    outtree = outfile.Get(tree_name + "/candTree")

    root_numpy.array2tree(new_branches, tree = outtree)
    outfile.write()
    outfile.close()
Example #10
0
def getdata(filesdict):
    """
    Return a dictionary of lists in the format of { filename1 : [f1X, f1Y], filename2 : [f2X, f2Y],..., filenameN:[fNX, fNY] }, N= # of files/axes
    Input: List of filenames
    Output: Dictionary of filenames as keys and list of [X,Y] axes as values
    
    """
    
    # if you need to make an ntuple, you have to have root_numpy...
    # if you already have it, you can comment this and uncomment it on the top of imports
    if makeNtuple:
        try:
            from root_numpy import array2tree
        except ImportError:
            critical("Missing root_numpy module.\n\033[93mPlease install root_numpy module, by doing in your command line:\npip install root_numpy --upgrade\033[0m")
            short_usage()
            dependencies()
            sys.exit(2)

    
    
    axes = []
    # for every sub-list corresponding to one sensor type, merge inputs into a "file" then pass this file into numpy
    # this loops over the different types in the dictionary
    for key in filesdict.keys():
        # now for each type get all the files into "one"
        infiles = fileinput.input(filesdict[key])
        #get the data array from numpy
        data_array = np.loadtxt(infiles, dtype={'names' : ('year', 'month', 'day', 'hour', 'minute', 'second' , 'msecond', 'value'), 'formats': ('i4', 'i4','i4','i4','i4','i4','i4','float64')})
        
        if makeNtuple:
            info('filling tree %s', key)
            tree = array2tree(data_array)
            tree.SetName(key)
            trees.append(tree)


        # convert to datetime
        dates = []
        for ientry in range(len(data_array['value'])):
            dates.append(datetime(data_array['year'][ientry], data_array['month'][ientry], data_array['day'][ientry], data_array['hour'][ientry], data_array['minute'][ientry], data_array['second'][ientry], data_array['msecond'][ientry]*1000))
        dates = np.array(dates)
        #print dates
        axes.append([dates,data_array['value'] ])



    # this is to write the file -- it has to be out of for loop
    if useROOT and makeNtuple:
        info("creating ROOT file with name %s" % ntupleName)
        fout = rt.TFile(ntupleName,'recreate')
        fout.cd()
        for outree in trees:
            outree.Write()
        fout.Close()


    return dict(zip(filesdict.keys(),axes))
Example #11
0
def run_pred(inputPath):
    f = TFile.Open(inputPath, "READ")
    try:
        nom = f.Get("nominal")
    except:
        print('cant open ' + inputPath)
        return 0
    dsid = inputPath.split('/')[-1]
    dsid = dsid.replace('.root', '')
    print(dsid)

    try:
        nom.GetEntries()
    except:
        print("failed to open")
        return 0

    try:
        nom.Mll01
    except:
        print('failed for ' + inputPath)
        return 0

    if nom.GetEntries() == 0:
        print("no entries")
        return 0
    if hasattr(nom, "tZ_score_test2"):
        print('already there')
        return 0

    event_dict = create_dict(nom)

    inDF = pd.DataFrame(event_dict)

    xgbMat = xgb.DMatrix(inDF, feature_names=list(inDF))
    tZ_score_test = xgbModel.predict(xgbMat)

    with root_open(inputPath, mode='a') as myfile:
        tZ_score_test = np.asarray(tZ_score_test)
        tZ_score_test.dtype = [('tZ_score_test2', 'float32')]
        tZ_score_test.dtype.names = ['tZ_score_test2']
        root_numpy.array2tree(tZ_score_test, tree=myfile.nominal)

        myfile.write()
        myfile.Close()
Example #12
0
def Write2TTree(data, tName):
    print("--> Writing to TTree: %s", tName)
    t0 = time.time()
    print("    * Converting to structrured array")
    data_arr = data.to_records(index=False)
    print("       dt=%i s" % (time.time() - t0))
    print("    * Converting to TTree")
    t0 = time.time()
    ttree = array2tree(data_arr, name=tName)
    print("       dt=%i s" % (time.time() - t0))
Example #13
0
def test_array2tree():
    a = np.array([
        (12345, 2., 2.1, True),
        (3, 4., 4.2, False),
    ],
                 dtype=[('x', np.int32), ('y', np.float32), ('z', np.float64),
                        ('w', np.bool)])
    tmp = ROOT.TFile.Open('test_array2tree_temp_file.root', 'recreate')
    tree = rnp.array2tree(a)
    a_conv = rnp.tree2array(tree)
    assert_array_equal(a, a_conv)
    # extend the tree
    tree2 = rnp.array2tree(a, tree=tree)
    assert_equal(tree2.GetEntries(), len(a) * 2)
    a_conv2 = rnp.tree2array(tree2)
    assert_array_equal(np.hstack([a, a]), a_conv2)
    tmp.Close()
    os.remove(tmp.GetName())
    assert_raises(TypeError, rnp.array2tree, a, tree=object)
Example #14
0
def test_array2tree():
    a = np.array([
        (12345, 2., 2.1, True),
        (3, 4., 4.2, False),
    ],
                 dtype=[('x', np.int32), ('y', np.float32), ('z', np.float64),
                        ('w', np.bool)])

    with temp() as tmp:
        tree = rnp.array2tree(a)
        a_conv = rnp.tree2array(tree)
        assert_array_equal(a, a_conv)
        # extend the tree
        tree2 = rnp.array2tree(a, tree=tree)
        assert_equal(tree2.GetEntries(), len(a) * 2)
        a_conv2 = rnp.tree2array(tree2)
        assert_array_equal(np.hstack([a, a]), a_conv2)

    assert_raises(TypeError, rnp.array2tree, a, tree=object)
Example #15
0
def array_as_tree(xarr, treename=None, fcontext=None, xkwargs={}):
    # combines array2tree and array2root but leaves TFile manipulation for the user
    context = None
    if fcontext:
        context = root.TDirectory.TContext(fcontext)
    tree = rnp.array2tree(xarr, treename, **xkwargs)
    if fcontext:
        fcontext.WriteTObject(tree, treename)
    if context:
        del context
    return tree
Example #16
0
def addToRoot(inputPath, event_dict, model, name, toDrop=None):
    '''
    given an event array, make a prediction and add it to the root file
    '''

    inDF = pd.DataFrame(event_dict)
    #if toDrop:
    #    inDF = inDF.drop(toDrop,axis=1)

    xgbMat = xgb.DMatrix(inDF, feature_names=list(inDF))
    y_pred = model.predict(xgbMat)

    with root_open(inputPath, mode='a') as myfile:
        y_pred = np.asarray(y_pred)
        y_pred.dtype = [(name, 'float32')]
        y_pred.dtype.names = [name]
        root_numpy.array2tree(y_pred, tree=myfile.nominal)

        myfile.write()
        myfile.Close()
Example #17
0
def makeRooMultiDataset(datasetName,
                        trees,
                        vars,
                        helpingVars=[],
                        datasetDescription=None,
                        categoryName='sample',
                        weightsName=None,
                        cut=''):
    '''
    trees is a dictionary with {mode: tree}
    vars is a dictionary {varName: (min, max)}
    helpingVars is a list of variables I also want in the dataset but without cuts
    '''

    rooVars = {}
    for var, spread in vars.items():
        rooVars[var] = r.RooRealVar(var, var, spread[0], spread[1])

    helpingVars = helpingVars[:]
    if weightsName:
        helpingVars.append(weightsName)
    for var in helpingVars:
        rooVars[var] = r.RooRealVar(var, var, 0)
        rooVars[var].setConstant(False)

    dataArgSet = makeRooArgSet(rooVars.values())

    _trees = {}
    for mode, tree in trees.items():
        arr = tree2array(tree,
                         branches=vars.keys() + helpingVars,
                         selection=cut)
        _trees[mode] = array2tree(arr)

    if datasetDescription is None:
        datasetDescription = datasetName

    sample = r.RooCategory(categoryName, categoryName)
    dataSets = {}
    dataSetImport = {}
    for mode, tree in _trees.items():
        sample.defineType(mode)
        dataSets[mode] = r.RooDataSet('{0}_{1}'.format(datasetName, mode),
                                      datasetDescription, tree, dataArgSet)
        dataSetImport[mode] = r.RooFit.Import(mode, dataSets[mode])

    for i in dataSets.values():
        i.Print()

    combDataSet = r.RooDataSet(datasetName, datasetDescription, dataArgSet,
                               r.RooFit.Index(sample), *dataSetImport.values())
    combDataSet.Print()
    return combDataSet
Example #18
0
def convert_to_tree(nparray, file_list):
    for i in range(len(file_list)):
        file_list[i] = file_list[i].replace("sixie", "idutta")
        dir = file_list[i].rsplit('/', 1)
        if not os.path.exists(dir[0]):
            os.makedirs(dir[0])
        os.chdir(dir[0])
        oFile = rt.TFile.Open(dir[1], "RECREATE")
        new_tree = array2tree(nparray[i], name='tree')
        new_tree.Write()
        oFile.Close()
    return
Example #19
0
def write_arrays_to_trees(arrays_, out_file_name_):
    out_file = rt.TFile.Open(out_file_name_, "recreate")
    print('converting arrays and writing to: ', out_file.GetName())
    for sample in arrays_:
        out_file.cd()
        sample_dir = out_file.mkdir(sample)
        sample_dir.cd()
        for tree in arrays_[sample]:
            tmp_tree = rnp.array2tree(arrays_[sample][tree])
            tmp_tree.Write()
    out_file.Close()
    print('finished writing')
Example #20
0
def test_array2tree():
    a = np.array([
        (12345, 2., 2.1, True),
        (3, 4., 4.2, False),],
        dtype=[
            ('x', np.int32),
            ('y', np.float32),
            ('z', np.float64),
            ('w', np.bool)])

    with temp() as tmp:
        tree = rnp.array2tree(a)
        a_conv = rnp.tree2array(tree)
        assert_array_equal(a, a_conv)
        # extend the tree
        tree2 = rnp.array2tree(a, tree=tree)
        assert_equal(tree2.GetEntries(), len(a) * 2)
        a_conv2 = rnp.tree2array(tree2)
        assert_array_equal(np.hstack([a, a]), a_conv2)

    assert_raises(TypeError, rnp.array2tree, a, tree=object)
Example #21
0
def test_array2tree():
    a = np.array([
        (12345, 2., 2.1, True),
        (3, 4., 4.2, False),],
        dtype=[
            ('x', np.int32),
            ('y', np.float32),
            ('z', np.float64),
            ('w', np.bool)])
    tmp = ROOT.TFile.Open('test_array2tree_temp_file.root', 'recreate')
    tree = rnp.array2tree(a)
    a_conv = rnp.tree2array(tree)
    assert_array_equal(a, a_conv)
    # extend the tree
    tree2 = rnp.array2tree(a, tree=tree)
    assert_equal(tree2.GetEntries(), len(a) * 2)
    a_conv2 = rnp.tree2array(tree2)
    assert_array_equal(np.hstack([a, a]), a_conv2)
    tmp.Close()
    os.remove(tmp.GetName())
    assert_raises(TypeError, rnp.array2tree, a, tree=object)
Example #22
0
def run_pred(inputPath):
    f = TFile(inputPath, "READ")

    dsid = inputPath.split('/')[-1]
    dsid = dsid.replace('.root', '')
    print(dsid)

    nom = f.Get('nominal')
    if nom.GetEntries() == 0:
        return 0

    event_dict = create_dict(nom)

    inDF = pd.DataFrame(event_dict)

    X = make_tensors(inDF, xMax)

    y_pred_pt = pred_pt(X, yMax)
    y_pred_bin = pred_bin(X)

    inDF['y_pred_pt'] = y_pred_pt
    inDF['y_pred_bin'] = y_pred_bin

    with root_open(inputPath, mode='a') as myfile:
        dNN_pt_score = np.asarray(y_pred_pt)
        dNN_pt_score.dtype = [('dNN_pt_score_2l', 'float32')]
        dNN_pt_score.dtype.names = ['dNN_pt_score_2l']
        root_numpy.array2tree(dNN_pt_score, tree=myfile.nominal)

        myfile.write()
        myfile.Close()

    with root_open(inputPath, mode='a') as myfile:
        dNN_bin_score = np.asarray(y_pred_bin)
        dNN_bin_score.dtype = [('dNN_bin_score_2l', 'float32')]
        dNN_bin_score.dtype.names = ['dNN_bin_score_2l']
        root_numpy.array2tree(dNN_bin_score, tree=myfile.nominal)

        myfile.write()
        myfile.Close()
Example #23
0
def create_workspace(model,
                     datafile,
                     binvar,
                     binning,
                     massrange,
                     fitfile,
                     weights=None):
    """
    Create the workspace with the data already imported and the model defined,
    also in charge of writing the bin info json file
    """
    wsp = r.RooWorkspace('ws_mass_fit')

    massrange = [float(v) for v in massrange.split(',')]

    # load the data and apply the mass selection of the fitting range immediately
    bin_var = parse_func_var(binvar)  # necessary for loading
    variables = [model.mname, bin_var[0]]
    if weights is not None:
        variables.append(weights)
    data = apply_selections(get_dataframe(datafile, columns=variables),
                            select_bin(model.mname, *massrange))

    costh_bins, costh_means = get_costh_bins(binning, bin_var, data)
    create_bin_info_json(fitfile.replace('.root', '_bin_sel_info.json'),
                         costh_bins, costh_means, bin_var[0], datafile)

    # Create the variables in the workspace
    try_factory(wsp, '{}[{}, {}]'.format(model.mname, *massrange))
    if 'abs' in bin_var[1].__name__:
        try_factory(
            wsp, '{}[{}, {}]'.format(bin_var[0], -np.max(costh_bins),
                                     np.max(costh_bins)))
    else:
        try_factory(
            wsp, '{}[{}, {}]'.format(bin_var[0], np.min(costh_bins),
                                     np.max(costh_bins)))
    dset_vars = r.RooArgSet(get_var(wsp, model.mname),
                            get_var(wsp, bin_var[0]))

    tree = array2tree(data.to_records(index=False))
    if weights is not None:
        try_factory(wsp, '{}[0, 1e5]'.format(weights))
        dataset = r.RooDataSet('full_data', 'full data sample', tree,
                               dset_vars, '', weights)
    else:
        dataset = r.RooDataSet('full_data', 'full data sample', tree,
                               dset_vars)

    ws_import(wsp, dataset)

    return wsp, costh_bins
Example #24
0
        def create_ttree():
            """
            Create a simple root tree for testing
            :return: created tree
            :rtype: ROOT.TTree
            """
            from root_numpy import array2tree
            import numpy as np

            array = np.zeros(10, dtype={'names': ['var1', 'var2', 'var3', 'var4'],
                                        'formats': [np.float, np.float, np.float, np.float]})

            return array2tree(array, 'test_tree')
Example #25
0
def makeRooDataset(datasetName,
                   tree,
                   vars,
                   helpingVars=[],
                   datasetDescription=None,
                   weightsName=None,
                   cut=''):
    '''
    vars is a dictionary {varName: (min, max)}
    helpingVars is a list of variables I also want in the dataset but without cuts
    '''

    rooVars = {}
    for var, spread in vars.items():
        rooVars[var] = r.RooRealVar(var, var, spread[0], spread[1])
        #cut += '&& {0} > {1} && {0} < {2}'.format(var, *spread)

    cuts_vars = ' && '.join([
        '{0} > {1} && {0} < {2}'.format(var, *spread)
        for var, spread in vars.items()
    ])
    cut = cut + ' && ' + cuts_vars if cut != '' and cuts_vars != '' else cut + cuts_vars

    helpingVars = helpingVars[:]
    if weightsName:
        helpingVars.append(weightsName)
    for var in helpingVars:
        rooVars[var] = r.RooRealVar(var, var, 0)
        rooVars[var].setConstant(False)

    dataArgSet = makeRooArgSet(rooVars.values())

    arr = tree2array(tree, branches=vars.keys() + helpingVars, selection=cut)
    tree = array2tree(arr)

    if datasetDescription is None:
        datasetDescription = datasetName

    if weightsName:
        dataSet = r.RooDataSet(datasetName, datasetDescription, tree,
                               dataArgSet, '1', weightsName)
    else:
        dataSet = r.RooDataSet(datasetName, datasetDescription, tree,
                               dataArgSet)
    return dataSet
Example #26
0
def evaluate(config, tree, names, transform=None):
    output = []
    dtype = []
    for name in names:
        setup = load(config, name.split("_")[1])
        data = rec2array(tree2array(tree.raw(), list(transform(setup["variables"])) if transform else setup["variables"]))
        if name.startswith("sklearn"):
            fn = os.path.join(config["mvadir"], name + ".pkl")
            with open(fn, 'rb') as fd:
                bdt, label = pickle.load(fd)
            scores = []
            if len(data) > 0:
                scores = bdt.predict_proba(data)[:, 1]
            output += [scores]
            dtype += [(name, 'float64')]

        fn = os.path.join(config["mvadir"], name + ".xml")
        reader = r.TMVA.Reader("Silent")
        for var in setup['variables']:
            reader.AddVariable(var, array('f', [0.]))
        reader.BookMVA("BDT", fn)
        scores = evaluate_reader(reader, "BDT", data)
        output += [scores]
        dtype += [(name.replace("sklearn", "tmvalike"), 'float64')]

    f = r.TFile(os.path.join(config.get("mvadir", config.get("indir", config["outdir"])), "mapping.root"), "READ")
    if f.IsOpen():
        likelihood = f.Get("hTargetBinning")

        def lh(values):
            return likelihood.GetBinContent(likelihood.FindBin(*values))
        indices = dict((v, n) for n, (v, _) in enumerate(dtype))
        tt = output[indices['tmvalike_tt']]
        ttZ = output[indices['tmvalike_ttZ']]
        if len(tt) == 0:
            output += [[]]
        else:
            output += [np.apply_along_axis(lh, 1, np.array([tt, ttZ]).T)]
        dtype += [('tmvalike_likelihood', 'float64')]
        f.Close()

    data = np.array(zip(*output), dtype)
    tree.mva(array2tree(data))
def fit(name, selected_branches, fit_params, results, outputfile):
  branches = np.array(selected_branches, dtype=[('BToKEE_fit_mass', 'f4')])
  tree = array2tree(branches)
  outputname = outputfile + '_{0}_mva_{1:.3f}'.format(name, fit_params['mvaCut']).replace('.','-') + '.pdf'
  #Stot, StotErr, S, SErr, B, BErr= fit_unbinned.fit(tree, outputname, **fit_params)
  #output = fit_unbinned.fit(tree, outputname, **fit_params)
  #output = fit_unbinned.fit(tree, outputname, **fit_params)
  b_fitter = fitter()
  b_fitter.init_fit_data(**fit_params)
  output = b_fitter.fit(tree, outputname)

  results['Stot_{}'.format(name)].append(output['Stot'])
  results['StotErr_{}'.format(name)].append(output['StotErr'])
  results['S_{}'.format(name)].append(output['S'])
  results['SErr_{}'.format(name)].append(output['SErr'])
  results['B_{}'.format(name)].append(output['B'])
  results['BErr_{}'.format(name)].append(output['BErr'])
  results['SNR_{}'.format(name)].append(output['S']/np.sqrt(output['S'] + output['B']))
  results['exp_alpha_{}'.format(name)].append(output['exp_alpha'])
  return results, outputname
def create_workspace(workspace_name, datafile, model):
    """
    Create the workspace and already load the data into it
    """
    wsp = r.RooWorkspace(workspace_name)

    dset_vars = r.RooArgSet()
    variables = []

    for var, bounds in model.get_load_vars():
        try_factory(wsp, '{}[{}]'.format(var, bounds))
        dset_vars.add(get_var(wsp, var))
        variables.append(var)

    data = get_dataframe(datafile, columns=variables)
    tree = array2tree(data.to_records(index=False))

    dataset = r.RooDataSet('full_data', 'full data sample', tree, dset_vars)
    ws_import(wsp, dataset)

    wsp.Print()
    return wsp
Example #29
0
def convert_data_frame_to_ttree(data_frame, tree_name, columns=None):
    """
    Convert a pandas.DataFrame to a ROOT.TTree

    :param data_frame: data that is written to a TTree
    :type data_frame: pandas.DataFrame

    :param tree_name: name of the TTree
    :type tree_name: str

    :param columns: columns that are written to the TTree. If None (default), all columns in data_frame
        are written to the TTree
    :type columns: list, None

    :return: converted TTree
    :rtype: ROOT.TTree
    """
    if columns is not None:
        df_data = data_frame[columns]
    else:
        df_data = data_frame
    array = df_data.to_records(index=False)

    return array2tree(array, name=tree_name)
Example #30
0
def reweight( sample, puType = 0  ):
    if sample.path is None:
        print '[puReweighter]: Need to know the MC tree (option --mcTree or sample.path)'
        sys.exit(1)
    

### create a tree with only weights that will be used as friend tree for reweighting different lumi periods
    print 'Opening mc file: ', sample.path[0]
    fmc = rt.TFile(sample.path[0],'read')
    tmc = None
    if sample.tnpTree is None:
        dirs = fmc.GetListOfKeys()
        for d in dirs:
            if (d.GetName() == "sampleInfo"): continue
            tmc = fmc.Get("%s/fitter_tree" % d.GetName())
    else:
        tmc = fmc.Get(sample.tnpTree)
    

#### can reweight vs nVtx but better to reweight v truePU
    puMCnVtx = []
    puMCrho = []
    if   puType == 1 :
        hmc   = rt.TH1F('hMC_nPV'  ,'MC nPV'  , 75,-0.5,74.5)
        tmc.Draw('event_nPV>>hMC_nPV','','goff')
        hmc.Scale(1/hmc.Integral())
        for ib in range(1,hmc.GetNbinsX()+1):
            puMCnVtx.append( hmc.GetBinContent(ib) )
        print 'len nvtxMC = ',len(puMCnVtx)

    elif puType == 2 :
        hmc   = rt.TH1F('hMC_rho'  ,'MC #rho'  , 75,-0.5,74.5)
        tmc.Draw('rho>>hMC_rho','','goff')
        hmc.Scale(1/hmc.Integral())
        for ib in range(1,hmc.GetNbinsX()+1):
            puMCrho.append( hmc.GetBinContent(ib) )
        print 'len rhoMC = ',len(puMCrho)
    

    puDataDist = {}
    puDataArray= {}
    weights = {}
    epochKeys = puDataEpoch.keys()
    if puType == 1  : epochKeys = nVtxDataEpoch.keys()
    if puType == 2  : epochKeys = rhoDataEpoch.keys()
 
    for pu in epochKeys:
        fpu = None
        if   puType == 1 : fpu = rt.TFile(nVtxDataEpoch[pu],'read')
        elif puType == 2 : fpu = rt.TFile(rhoDataEpoch[pu],'read')
        else             : fpu = rt.TFile(puDataEpoch[pu],'read')
        puDataDist[pu] = fpu.Get('pileup').Clone('puHist_%s' % pu)
        puDataDist[pu].Scale(1./puDataDist[pu].Integral())
        puDataDist[pu].SetDirectory(0)
        puDataArray[pu] = []
        for ipu in range(len(puMC[puMCscenario])):
            ibin_pu  = puDataDist[pu].GetXaxis().FindBin(ipu+0.00001)
            puDataArray[pu].append(puDataDist[pu].GetBinContent(ibin_pu))
        print 'puData[%s] length = %d' % (pu,len(puDataArray[pu]))
        fpu.Close()
        weights[pu] = []

    mcEvts = tree2array( tmc, branches = ['weight','truePU','event_nPV','rho'] )


    pumc = puMC[puMCscenario]
    if   puType == 1:  pumc = puMCnVtx
    elif puType == 2:  pumc = puMCrho
    else            :  pumc = puMC[puMCscenario]

    puMax = len(pumc)
    print '-> nEvtsTot ', len(mcEvts)
    for ievt in xrange(len(mcEvts)):
        if ievt%1000000 == 0 :            print 'iEvt:',ievt
        evt = mcEvts[ievt]
        for pu in epochKeys:
            pum = -1
            pud = -1
            if puType == 1 and evt['event_nPV'] < puMax:
                pud = puDataArray[pu][evt['event_nPV']]
                pum = pumc[evt['event_nPV']]
            if puType == 2 and int(evt['rho']) < puMax:
                pud = puDataArray[pu][int(evt['rho'])]
                pum = pumc[int(evt['rho'])]
            elif puType == 0:
                pud = puDataArray[pu][evt['truePU']] 
                pum = pumc[evt['truePU']]
            puw = 1
            if pum > 0: 
                puw  = pud/pum

            if evt['weight'] > 0 : totw = +puw
            else                 : totw = -puw
            weights[pu].append( ( puw,totw) )

    newFile    = rt.TFile( sample.puTree, 'recreate')

    for pu in epochKeys:
        treeWeight = rt.TTree('weights_%s'%pu,'tree with weights')
        wpuarray = np.array(weights[pu],dtype=[('PUweight',float),('totWeight',float)])
        array2tree( wpuarray, tree = treeWeight )
        treeWeight.Write()

    newFile.Close()    
    fmc.Close()
def main ():

    #outdir = '/eos/atlas/user/a/asogaard/Analysis/2016/BoostedJetISR/StatsInputs/2017-06-19/'
    #outdir = '/eos/atlas/user/a/asogaard/Analysis/2016/BoostedJetISR/StatsInputs/2017-06-28/'
    #outdir = '/eos/atlas/user/a/asogaard/Analysis/2016/BoostedJetISR/StatsInputs/2017-07-24/'
    #outdir = '/eos/atlas/user/a/asogaard/Analysis/2016/BoostedJetISR/StatsInputs/2017-08-06/'
    outdir = '/eos/atlas/user/a/asogaard/Analysis/2016/BoostedJetISR/StatsInputs/2017-10-27/'

    # Setup.
    # ----------------------------------------------------------------

    print "\nSetup."

    # Validate arguments.
    print "-- Validate arguments."
    validateArguments(sys.argv)

    # Load cross sections files.
    print "-- Load cross sections file."
    xsec = loadXsec('../share/sampleInfo.csv')

    # Get list of file paths to plot from commandline arguments.
    print "-- Get list of input paths."
    paths = [arg for arg in sys.argv[1:] if not arg.startswith('-')]

    # Specify which variables to get.
    print "-- Specify variables to read."
    categories = [
        "Nominal",
        "TF_UP",
        "TF_DOWN",

        "LARGER_JET_Comb_Baseline_All__1up",
        "LARGER_JET_Comb_Baseline_All__1down",
        "LARGER_JET_Comb_Modelling_All__1up",
        "LARGER_JET_Comb_Modelling_All__1down",
        "LARGER_JET_Comb_TotalStat_All__1up",
        "LARGER_JET_Comb_TotalStat_All__1down",
        "LARGER_JET_Comb_Tracking_All__1up",
        "LARGER_JET_Comb_Tracking_All__1down",
        "PHOTON_EG_RESOLUTION_ALL__1down",
        "PHOTON_EG_RESOLUTION_ALL__1up",
        "PHOTON_EG_SCALE_ALL__1down",
        "PHOTON_EG_SCALE_ALL__1up",
        
        #"LARGER_JET_Comb_Baseline_Kin__1up",
        #"LARGER_JET_Comb_Baseline_Kin__1down",
        #"LARGER_JET_Comb_Modelling_Kin__1up",
        #"LARGER_JET_Comb_Modelling_Kin__1down",
        #"LARGER_JET_Comb_TotalStat_Kin__1up",
        #"LARGER_JET_Comb_TotalStat_Kin__1down",
        #"LARGER_JET_Comb_Tracking_Kin__1up",
        #"LARGER_JET_Comb_Tracking_Kin__1down",
        #"LARGER_JET_Rtrk_Baseline_Sub__1up",
        #"LARGER_JET_Rtrk_Baseline_Sub__1down",
        #"LARGER_JET_Rtrk_Modelling_Sub__1up",
        #"LARGER_JET_Rtrk_Modelling_Sub__1down",
        #"LARGER_JET_Rtrk_TotalStat_Sub__1up",
        #"LARGER_JET_Rtrk_TotalStat_Sub__1down",
        #"LARGER_JET_Rtrk_Tracking_Sub__1up",
        #"LARGER_JET_Rtrk_Tracking_Sub__1down",
        #"PHOTON_EG_RESOLUTION_ALL__1down",
        #"PHOTON_EG_RESOLUTION_ALL__1up",
        #"PHOTON_EG_SCALE_ALL__1down",
        #"PHOTON_EG_SCALE_ALL__1up",
        ]

    treename       = 'BoostedJet+ISRgamma/{cat}/EventSelection/Pass/Jet_tau21DDT/Postcut'
    outputtreename = 'BoostedJet+ISRgamma/{cat}/outputTree'
    prefix   = 'Jet_'
    getvars  = ['m']

    # Load data.
    print "-- Load data."
    data, info = dict(), dict()
    for icat, cat in enumerate(categories):
        print "\n" + "=" * 10 + "[" + cat + ": %d/%d]" % (icat + 1, len(categories))+ "=" * 60
        #data[cat] = loadDataFast(paths, treename.format(cat=cat), getvars, prefix, xsec)
        data[cat] = loadData(paths, treename      .format(cat=cat), branches=getvars, prefix=prefix)
        info[cat] = loadData(paths, outputtreename.format(cat=cat), branches=['DSID', 'isMC'], stop=1)
        if data[cat] is not None:
            data[cat] = scale_weights(data[cat], info[cat], xsec)
            pass
        pass

    # Check output.
    #print "-- Check output exists."
    #if False in [bool(data[cat]) for cat in categories]:
    #    print "WARNING: No data was loaded."
    #    return


    # Fill output histograms.
    # ----------------------------------------------------------------
    print "\nFill output histograms."

    # Format: (isMC, (DSID_min, DSID_max), "name")
    ranges = {
        # Backgrounds
        'bkg': [
            (True, (100000 + mass, 100000 + mass), "bkg_%03d" % mass) for mass in np.linspace(100, 220, (220 - 100) / 5 + 1, endpoint=True)# data | TF
            ],

        # Background (GBS)
        #'gbs': [
        #    (True, (400000, 400000), "gbs"), # data | TF
        #    ],

        # W (qq) + gamma
        'W': [
            (True, (305435, 305439), "W"), # Sherpa gamma + W
            ],

        # Z (qq) + gamma
        'Z': [
            (True, (305440, 305444), "Z"), # Sherpa gamma + Z
            ],

        # Signals.
        'sig': [
            (True, (308363, 308363), "mRp100_mD10_gSp5_gD1"), # MGPy8EG_N30LO_A14N23LO_dmA_jja_Ph150_mRp100_mD10_gSp5_gD1
            (True, (308364, 308364), "mRp130_mD10_gSp5_gD1"), # MGPy8EG_N30LO_A14N23LO_dmA_jja_Ph195_mRp130_mD10_gSp5_gD1
            (True, (308365, 308365), "mRp160_mD10_gSp5_gD1"), # MGPy8EG_N30LO_A14N23LO_dmA_jja_Ph240_mRp160_mD10_gSp5_gD1
            (True, (308366, 308366), "mRp190_mD10_gSp5_gD1"), # MGPy8EG_N30LO_A14N23LO_dmA_jja_Ph285_mRp190_mD10_gSp5_gD1
            (True, (308367, 308367), "mRp220_mD10_gSp5_gD1"), # MGPy8EG_N30LO_A14N23LO_dmA_jja_Ph330_mRp220_mD10_gSp5_gD1
            ],

        # Data
        'data': [
            (False, (0, 1E+07), "data"), # Data
            ],
        }

    # Signals. (interpolated)
    #for mass in np.linspace(100, 220, (220 - 100) / 10 + 1, endpoint=True):
    for mass in np.linspace(100, 220, (220 - 100) / 5 + 1, endpoint=True):
        if mass in [100, 130, 160, 190, 220]: continue
        ranges['sig'].append( (True, (200000 + mass, 200000 + mass), "mRp%03d_xyz" % mass))
        pass


    # Get titles.
    for key in ranges:
        new_ranges = list()
        for isMC, DSIDs, name in ranges[key]:
            title = name
            if name.startswith('mR'):
                m = re.search('mR([0-9]*)p([0-9]+)*', name)
                masstext = '%s%s' % (m.group(1), m.group(2).ljust(3, '0').lstrip('0'))
                title = 'signal_%s' % masstext
                pass
            new_ranges.append((isMC, DSIDs, title))
            pass
        ranges[key] = new_ranges
        pass

    # Write to file(s).
    for key in ranges:
        for isMC, DSIDs, title in ranges[key]:
            print "Filling tree for '%s'." % title
            f = TFile(outdir + '/ISRgamma_%s.root' % title, 'RECREATE')

            for cat in categories:
                if 'data' in key.strip().lower() and cat != 'Nominal': continue
                print " -- %s" % cat,

                empty = data[cat] is None
                if not empty:
                    msk = np.where((data[cat]['isMC'] == isMC) & (data[cat]['DSID'] >= DSIDs[0]) & (data[cat]['DSID'] <= DSIDs[1]))
                    empty |= (np.sum(msk) == 0) # No events with this variation -> use 'Nominal' instead
                    pass

                if empty:
                    fallback = 'Nominal'
                    print "(defaulting to '%s')" % fallback
                    msk = np.where((data[fallback]['DSID'] >= DSIDs[0]) & (data[fallback]['DSID'] <= DSIDs[1]))
                else:
                    print ""
                    pass

                w = 36.1 if (isMC and DSIDs[0] > 300000 and DSIDs[1] < 400000) else 1.
                if w != 1:
                    print "----> Weight:", w
                    pass

                # Fill tree
                M = [tuple(el) for el in np.hstack((np.atleast_2d(data[cat if not empty else 'Nominal']['m']     [msk].ravel()).T,
                                                    np.atleast_2d(data[cat if not empty else 'Nominal']['weight'][msk].ravel() * w).T)).tolist()]

                arr = np.array(M, dtype=[('mJ',    'f4'),
                                         ('weight', 'f4'),
                                         ])

                if 'TF' in cat:
                    if cat == 'TF_UP':
                        rename = ['TF_Norm__1up', 'TF_Shape__1up']
                    else:
                        rename = ['TF_Norm__1down', 'TF_Shape__1down']
                        pass
                    for name in rename:
                        t = array2tree(arr, title.upper() + '_' + name)
                        t.Write()
                        pass
                else:
                    t = array2tree(arr, title.upper() + '_' + cat)
                    t.Write()
                    pass

                pass

            f.Close()
            pass

        pass

    return
Example #32
0
def fit_mass(data,
             column,
             x,
             sig_pdf=None,
             bkg_pdf=None,
             n_sig=None,
             n_bkg=None,
             blind=False,
             nll_profile=False,
             second_storage=None,
             log_plot=False,
             pulls=True,
             sPlot=False,
             bkg_in_region=False,
             importance=3,
             plot_importance=3):
    """Fit a given pdf to a variable distribution


    Parameter
    ---------
    data : |hepds_type|
        The data containing the variable to fit to
    column : str
        The name of the column to fit the pdf to
    sig_pdf : RooFit pdf
        The signal Probability Density Function. The variable to fit to has
        to be named 'x'.
    bkg_pdf : RooFit pdf
        The background Probability Density Function. The variable to fit to has
        to be named 'x'.
    n_sig : None or numeric
        The number of signals in the data. If it should be fitted, use None.
    n_bkg : None or numeric
        The number of background events in the data.
        If it should be fitted, use None.
    blind : boolean or tuple(numberic, numberic)
        If False, the data is fitted. If a tuple is provided, the values are
        used as the lower (the first value) and the upper (the second value)
        limit of a blinding region, which will be omitted in plots.
        Additionally, no true number of signal will be returned but only fake.
    nll_profile : boolean
        If True, a Negative Log-Likelihood Profile will be generated. Does not
        work with blind fits.
    second_storage : |hepds_type|
        A second data-storage that will be concatenated with the first one.
    importance : |importance_type|
        |importance_docstring|
    plot_importance : |plot_importance_type|
        |plot_importance_docstring|

    Return
    ------
    tuple(numerical, numerical)
        Return the number of signals and the number of backgrounds in the
        signal-region. If a blind fit is performed, the signal will be a fake
        number. If no number of background events is required, -999 will be
        returned.
    """

    if not (isinstance(column, str) or len(column) == 1):
        raise ValueError("Fitting to several columns " + str(column) +
                         " not supported.")
    if type(sig_pdf) == type(bkg_pdf) == None:
        raise ValueError("sig_pdf and bkg_pdf are both None-> no fit possible")
    if blind is not False:
        lower_blind, upper_blind = blind
        blind = True

    n_bkg_below_sig = -999
    # create data
    data_name = data.name
    data_array, _t1, _t2 = data.make_dataset(second_storage, columns=column)
    del _t1, _t2

    # double crystalball variables
    min_x, max_x = min(data_array[column]), max(data_array[column])

    #    x = RooRealVar("x", "x variable", min_x, max_x)

    # create data
    data_array = np.array([i[0] for i in data_array.as_matrix()])
    data_array.dtype = [('x', np.float64)]
    tree1 = array2tree(data_array, "x")
    data = RooDataSet("data", "Data", RooArgSet(x), RooFit.Import(tree1))

    #    # TODO: export somewhere? does not need to be defined inside...
    #    mean = RooRealVar("mean", "Mean of Double CB PDF", 5280, 5100, 5600)#, 5300, 5500)
    #    sigma = RooRealVar("sigma", "Sigma of Double CB PDF", 40, 0.001, 200)
    #    alpha_0 = RooRealVar("alpha_0", "alpha_0 of one side", 5.715)#, 0, 150)
    #    alpha_1 = RooRealVar("alpha_1", "alpha_1 of other side", -4.019)#, -200, 0.)
    #    lambda_0 = RooRealVar("lambda_0", "Exponent of one side", 3.42)#, 0, 150)
    #    lambda_1 = RooRealVar("lambda_1", "Exponent of other side", 3.7914)#, 0, 500)
    #
    #    # TODO: export somewhere? pdf construction
    #    frac = RooRealVar("frac", "Fraction of crystal ball pdfs", 0.479, 0.01, 0.99)
    #
    #    crystalball1 = RooCBShape("crystallball1", "First CrystalBall PDF", x,
    #                              mean, sigma, alpha_0, lambda_0)
    #    crystalball2 = RooCBShape("crystallball2", "Second CrystalBall PDF", x,
    #                              mean, sigma, alpha_1, lambda_1)
    #    doubleCB = RooAddPdf("doubleCB", "Double CrystalBall PDF",
    #                         crystalball1, crystalball2, frac)

    #    n_sig = RooRealVar("n_sig", "Number of signals events", 10000, 0, 1000000)

    # test input
    if n_sig == n_bkg == 0:
        raise ValueError("n_sig as well as n_bkg is 0...")

    if n_bkg is None:
        n_bkg = RooRealVar("n_bkg", "Number of background events", 10000, 0,
                           500000)
    elif n_bkg >= 0:
        n_bkg = RooRealVar("n_bkg", "Number of background events", int(n_bkg))
    else:
        raise ValueError("n_bkg is not >= 0 or None")

    if n_sig is None:
        n_sig = RooRealVar("n_sig", "Number of signal events", 1050, 0, 200000)

        # START BLINDING
        blind_cat = RooCategory("blind_cat", "blind state category")
        blind_cat.defineType("unblind", 0)
        blind_cat.defineType("blind", 1)
        if blind:
            blind_cat.setLabel("blind")
            blind_n_sig = RooUnblindPrecision("blind_n_sig",
                                              "blind number of signals",
                                              "wasistdas", n_sig.getVal(),
                                              10000, n_sig, blind_cat)
        else:
            #            blind_cat.setLabel("unblind")
            blind_n_sig = n_sig

        print "n_sig value " + str(n_sig.getVal())
#        raw_input("blind value " + str(blind_n_sig.getVal()))

#        n_sig = blind_n_sig

# END BLINDING
    elif n_sig >= 0:
        n_sig = RooRealVar("n_sig", "Number of signal events", int(n_sig))
    else:
        raise ValueError("n_sig is not >= 0")

#    if not blind:
#        blind_n_sig = n_sig

#    # create bkg-pdf
#    lambda_exp = RooRealVar("lambda_exp", "lambda exp pdf bkg", -0.00025, -1., 1.)
#    bkg_pdf = RooExponential("bkg_pdf", "Background PDF exp", x, lambda_exp)

    if blind:
        comb_pdf = RooAddPdf("comb_pdf", "Combined DoubleCB and bkg PDF",
                             RooArgList(sig_pdf, bkg_pdf),
                             RooArgList(blind_n_sig, n_bkg))
    else:
        comb_pdf = RooAddPdf("comb_pdf", "Combined DoubleCB and bkg PDF",
                             RooArgList(sig_pdf, bkg_pdf),
                             RooArgList(n_sig, n_bkg))

    # create test dataset
#    mean_gauss = RooRealVar("mean_gauss", "Mean of Gaussian", 5553, -10000, 10000)
#    sigma_gauss = RooRealVar("sigma_gauss", "Width of Gaussian", 20, 0.0001, 300)
#    gauss1 = RooGaussian("gauss1", "Gaussian test dist", x, mean_gauss, sigma_gauss)
#    lambda_data = RooRealVar("lambda_data", "lambda exp data", -.002)
#    exp_data = RooExponential("exp_data", "data example exp", x, lambda_data)
#    frac_data = RooRealVar("frac_data", "Fraction PDF of data", 0.15)
#
#    data_pdf = RooAddPdf("data_pdf", "Data PDF", gauss1, exp_data, frac_data)
#    data = data_pdf.generate(RooArgSet(x), 30000)

#    data.printValue()
#    xframe = x.frame()
#    data_pdf.plotOn(xframe)
#    print "n_cpu:", meta_config.get_n_cpu()
#    input("test")
#    comb_pdf.fitTo(data, RooFit.Extended(ROOT.kTRUE), RooFit.NumCPU(meta_config.get_n_cpu()))
#     HACK to get 8 cores in testing
    c5 = TCanvas("c5", "RooFit pdf not fit vs " + data_name)
    c5.cd()
    x_frame1 = x.frame()
    #    data.plotOn(x_frame1)
    #    comb_pdf.pdfList()[1].plotOn(x_frame1)

    if __name__ == "__main__":
        n_cpu = 8
    else:
        n_cpu = meta_config.get_n_cpu()
        print "n_cpu = ", n_cpu
        # HACK
#        n_cpu = 8
    result_fit = comb_pdf.fitTo(data, RooFit.Minos(ROOT.kTRUE),
                                RooFit.Extended(ROOT.kTRUE),
                                RooFit.NumCPU(n_cpu))
    # HACK end
    if bkg_in_region:
        x.setRange("signal", bkg_in_region[0], bkg_in_region[1])
        bkg_pdf_fitted = comb_pdf.pdfList()[1]
        int_argset = RooArgSet(x)
        #        int_argset = x
        #        int_argset.setRange("signal", bkg_in_region[0], bkg_in_region[1])
        integral = bkg_pdf_fitted.createIntegral(int_argset,
                                                 RooFit.NormSet(int_argset),
                                                 RooFit.Range("signal"))
        bkg_cdf = bkg_pdf_fitted.createCdf(int_argset, RooFit.Range("signal"))
        bkg_cdf.plotOn(x_frame1)

        #        integral.plotOn(x_frame1)
        n_bkg_below_sig = integral.getVal(int_argset) * n_bkg.getVal()
        x_frame1.Draw()

    if plot_importance >= 3:
        c2 = TCanvas("c2", "RooFit pdf fit vs " + data_name)
        c2.cd()
        x_frame = x.frame()
        #        if log_plot:
        #            c2.SetLogy()
        #        x_frame.SetTitle("RooFit pdf vs " + data_name)
        x_frame.SetTitle(data_name)
        if pulls:
            pad_data = ROOT.TPad("pad_data", "Pad with data and fit", 0, 0.33,
                                 1, 1)
            pad_pulls = ROOT.TPad("pad_pulls", "Pad with data and fit", 0, 0,
                                  1, 0.33)
            pad_data.SetBottomMargin(0.00001)
            pad_data.SetBorderMode(0)
            if log_plot:
                pad_data.SetLogy()
            pad_pulls.SetTopMargin(0.00001)
            pad_pulls.SetBottomMargin(0.2)
            pad_pulls.SetBorderMode(0)
            pad_data.Draw()
            pad_pulls.Draw()
            pad_data.cd()
        else:
            if log_plot:
                c2.SetLogy()
    if blind:
        # HACK
        column = 'x'
        # END HACK
        x.setRange("lower", min_x, lower_blind)
        x.setRange("upper", upper_blind, max_x)
        range_str = "lower,upper"
        lower_cut_str = str(
            min_x) + "<=" + column + "&&" + column + "<=" + str(lower_blind)
        upper_cut_str = str(
            upper_blind) + "<=" + column + "&&" + column + "<=" + str(max_x)
        sideband_cut_str = "(" + lower_cut_str + ")" + "||" + "(" + upper_cut_str + ")"

        n_entries = data.reduce(
            sideband_cut_str).numEntries() / data.numEntries()
        #        raw_input("n_entries: " + str(n_entries))
        if plot_importance >= 3:
            data.plotOn(x_frame, RooFit.CutRange(range_str),
                        RooFit.NormRange(range_str))
            comb_pdf.plotOn(
                x_frame, RooFit.Range(range_str),
                RooFit.Normalization(n_entries, RooAbsReal.Relative),
                RooFit.NormRange(range_str))
            if pulls:
                #                pull_hist(pull_frame=x_frame, pad_data=pad_data, pad_pulls=pad_pulls)
                x_frame_pullhist = x_frame.pullHist()
    else:
        if plot_importance >= 3:
            data.plotOn(x_frame)
            comb_pdf.plotOn(x_frame)
            if pulls:
                pad_pulls.cd()
                x_frame_pullhist = x_frame.pullHist()
                pad_data.cd()

            comb_pdf.plotOn(x_frame,
                            RooFit.Components(sig_pdf.namePtr().GetName()),
                            RooFit.LineStyle(ROOT.kDashed))
            comb_pdf.plotOn(x_frame,
                            RooFit.Components(bkg_pdf.namePtr().GetName()),
                            RooFit.LineStyle(ROOT.kDotted))
#            comb_pdf.plotPull(n_sig)

    if plot_importance >= 3:
        x_frame.Draw()

        if pulls:
            pad_pulls.cd()
            x_frame.SetTitleSize(0.05, 'Y')
            x_frame.SetTitleOffset(0.7, 'Y')
            x_frame.SetLabelSize(0.04, 'Y')

            #            c11 = TCanvas("c11", "RooFit\ pulls" + data_name)
            #            c11.cd()
            #            frame_tmp = x_frame
            frame_tmp = x.frame()

            #            frame_tmp.SetTitle("significance")

            frame_tmp.SetTitle("Roofit\ pulls\ " + data_name)
            frame_tmp.addObject(x_frame_pullhist)

            frame_tmp.SetMinimum(-5)
            frame_tmp.SetMaximum(5)

            #            frame_tmp.GetYaxis().SetTitle("significance")
            frame_tmp.GetYaxis().SetNdivisions(5)
            frame_tmp.SetTitleSize(0.1, 'X')
            frame_tmp.SetTitleOffset(1, 'X')
            frame_tmp.SetLabelSize(0.1, 'X')
            frame_tmp.SetTitleSize(0.1, 'Y')
            frame_tmp.SetTitleOffset(0.5, 'Y')
            frame_tmp.SetLabelSize(0.1, 'Y')

            frame_tmp.Draw()

#    raw_input("")

    if not blind and nll_profile:

        #        nll_range = RooRealVar("nll_range", "Signal for nLL", n_sig.getVal(),
        #                               -10, 2 * n_sig.getVal())
        sframe = n_sig.frame(RooFit.Bins(20), RooFit.Range(1, 1000))
        # HACK for best n_cpu
        lnL = comb_pdf.createNLL(data, RooFit.NumCPU(8))
        # HACK end
        lnProfileL = lnL.createProfile(ROOT.RooArgSet(n_sig))
        lnProfileL.plotOn(sframe, RooFit.ShiftToZero())
        c4 = TCanvas("c4", "NLL Profile")
        c4.cd()

        #        input("press ENTER to show plot")
        sframe.Draw()

    if plot_importance >= 3:
        pass

    params = comb_pdf.getVariables()
    params.Print("v")

    #    print bkg_cdf.getVal()

    if sPlot:
        sPlotData = ROOT.RooStats.SPlot(
            "sPlotData",
            "sPlotData",
            data,  # variable fitted to, RooDataSet
            comb_pdf,  # fitted pdf
            ROOT.RooArgList(
                n_sig,
                n_bkg,
                #                                                NSigB0s
            ))
        sweights = np.array([
            sPlotData.GetSWeight(i, 'n_sig') for i in range(data.numEntries())
        ])
        return n_sig.getVal(), n_bkg_below_sig, sweights

    if blind:
        return blind_n_sig.getVal(), n_bkg_below_sig, comb_pdf
    else:
        return n_sig.getVal(), n_bkg_below_sig, comb_pdf
Example #33
0
canvas = Canvas(width=500, height=400)
hist = Hist2D(10, -3, 3, 10, -3, 3, drawstyle='LEGO2')

output = root_open('bootstrap.root', 'recreate')

# bootstrap 100 times
for bootstrap_idx in range(100):
    sys.stdout.write("bootstrap {0} ...\r".format(bootstrap_idx))
    sys.stdout.flush()
    # resample with replacement
    # http://docs.scipy.org/doc/numpy-dev/reference/generated/numpy.random.choice.html
    sample_idx = np.random.choice(len(array), size=len(array), replace=True)
    array_bootstrapped = array[sample_idx]
    # convert back to a TTree and write it out
    tree_bootstrapped = array2tree(
        array_bootstrapped,
        name='bootstrap_{0}'.format(bootstrap_idx))
    tree_bootstrapped.Write()
    tree_bootstrapped.Delete()
    # fill the ROOT histogram with the numpy array
    hist.Reset()
    fill_hist(hist, rec2array(array_bootstrapped))
    hist.Draw()
    hist.xaxis.title = 'x'
    hist.yaxis.title = 'y'
    hist.zaxis.title = 'Events'
    hist.xaxis.limits = (-2.5, 2.5)
    hist.yaxis.limits = (-2.5, 2.5)
    hist.zaxis.range_user = (0, 60)
    hist.xaxis.divisions = 5
    hist.yaxis.divisions = 5
Example #34
0
def make_tree(entries, branches=1, dtype=np.double):
    dtype = np.dtype([(randomword(20), dtype) for idx in range(branches)])
    array = np.zeros(entries, dtype=dtype)
    return array2tree(array, name=uuid.uuid4().hex)
Example #35
0
def to_root(df, path, key='my_ttree', mode='w', store_index=True, *args, **kwargs):
    """
    Write DataFrame to a ROOT file.

    Parameters
    ----------
    path: string
        File path to new ROOT file (will be overwritten)
    key: string
        Name of tree that the DataFrame will be saved as
    mode: string, {'w', 'a'}
        Mode that the file should be opened in (default: 'w')
    store_index: bool (optional, default: True)
        Whether the index of the DataFrame should be stored as
        an __index__* branch in the tree

    Notes
    -----

    Further *args and *kwargs are passed to root_numpy's array2root.

    >>> df = DataFrame({'x': [1,2,3], 'y': [4,5,6]})
    >>> df.to_root('test.root')

    The DataFrame index will be saved as a branch called '__index__*',
    where * is the name of the index in the original DataFrame
    """

    if mode == 'a':
        mode = 'update'
    elif mode == 'w':
        mode = 'recreate'
    else:
        raise ValueError('Unknown mode: {}. Must be "a" or "w".'.format(mode))

    from root_numpy import array2tree
    # We don't want to modify the user's DataFrame here, so we make a shallow copy
    df_ = df.copy(deep=False)

    if store_index:
        name = df_.index.name
        if name is None:
            # Handle the case where the index has no name
            name = ''
        df_['__index__' + name] = df_.index

    # Convert categorical columns into something root_numpy can serialise
    for col in df_.select_dtypes(['category']).columns:
        name_components = ['__rpCaT', col, str(df_[col].cat.ordered)]
        name_components.extend(df_[col].cat.categories)
        if ['*' not in c for c in name_components]:
            sep = '*'
        else:
            raise ValueError('Unable to find suitable separator for columns')
        df_[col] = df_[col].cat.codes
        df_.rename(index=str, columns={col: sep.join(name_components)}, inplace=True)

    arr = df_.to_records(index=False)

    root_file = ROOT.TFile.Open(path, mode)
    if not root_file:
        raise IOError("cannot open file {0}".format(path))
    if not root_file.IsWritable():
        raise IOError("file {0} is not writable".format(path))

    # Navigate to the requested directory
    open_dirs = [root_file]
    for dir_name in key.split('/')[:-1]:
        current_dir = open_dirs[-1].Get(dir_name)
        if not current_dir:
            current_dir = open_dirs[-1].mkdir(dir_name)
        current_dir.cd()
        open_dirs.append(current_dir)

    # The key is now just the top component
    key = key.split('/')[-1]

    # If a tree with that name exists, we want to update it
    tree = open_dirs[-1].Get(key)
    if not tree:
        tree = None
    tree = array2tree(arr, name=key, tree=tree)
    tree.Write(key, ROOT.TFile.kOverwrite)
    root_file.Close()
Example #36
0
def make_tree(entries, branches=1, dtype=np.double):
    dtype = np.dtype([(randomword(20), dtype) for idx in range(branches)])
    array = np.zeros(entries, dtype=dtype)
    return array2tree(array, name=uuid.uuid4().hex)
Example #37
0
def fit_doubleCB(a_mc_x, a_data, out_path, s_info=''):
    # Initialise dictionary for storing fit info
    d_fit_info = {}
    # Format arrays
    a_fit_mc   = a_mc_x.astype(dtype=[('b_mass_mc', np.float)])
    a_fit_data = a_data.astype(dtype=[('b_mass'   , np.float)])
    # Estimate sig/bkg yields
    max_yield_est = a_fit_data.shape[0]                         # max possible signal yield
    bkg_yield_est = len(a_fit_data[a_fit_data['b_mass'] > 5350.]) * 4.     # estimate background from sideband
    # Create tree for fitting
    t_fit_data = root_numpy.array2tree(a_fit_data)
    t_fit_mc   = root_numpy.array2tree(a_fit_mc)
    
    ## Monte Carlo
    # Inialise parameters for fit
    b_mass_mc     = ROOT.RooRealVar("b_mass_mc"   , "B mass MC [MeV]", 5200. , 5400.)
    mean_mc       = ROOT.RooRealVar("mean_mc"     , "mean_mc"        , 5279. , 5195.  , 5400.)
    sig_1_mc      = ROOT.RooRealVar("sig_1_mc"    , "sig_1_mc"       ,    5. ,     .1 ,   15.)
    alpha_1_mc    = ROOT.RooRealVar("alpha_1_mc"  , "alpha_1_mc"     ,    5. ,     .1 ,   10.)
    n_1_mc        = ROOT.RooRealVar("n_1_mc"      , "n_1_mc"         ,    2. ,    0.  ,   15.)
    r_s1_s2_mc    = ROOT.RooRealVar("r_s1_s2_mc"  , "r_s1_s2_mc"     ,     .5,     .1 ,   10.)
    sig_2_mc      = ROOT.RooFormulaVar("sig_2_mc" , "sig_1_mc*r_s1_s2_mc", ROOT.RooArgList(sig_1_mc, r_s1_s2_mc))
    alpha_2_mc    = ROOT.RooRealVar("alpha_2_mc"  , "alpha_2_mc"     ,   -5. ,  -10.  ,    -.1)
    n_2_mc        = ROOT.RooRealVar("n_2_mc"      , "n_2_mc"         ,    2. ,    0.  ,   15.)
    r_cb1_cb2_mc  = ROOT.RooRealVar("r_cb1_cb2_mc",  "r_cb1_cb2_mc"  ,     .5,     .01,    1.)
    # Initialise fit model
    cb_1_mc      = ROOT.RooCBShape("cb_1_mc"     , "cb_1_mc"     , b_mass_mc, mean_mc, sig_1_mc, alpha_1_mc, n_1_mc)
    cb_2_mc      = ROOT.RooCBShape("cb_2_mc"     , "cb_2_mc"     , b_mass_mc, mean_mc, sig_2_mc, alpha_2_mc, n_2_mc)
    model_sig_mc = ROOT. RooAddPdf("model_sig_mc", "model_sig_mc", ROOT.RooArgList(cb_1_mc, cb_2_mc), ROOT.RooArgList(r_cb1_cb2_mc))
    # Initialise dataset
    dataset_mc = ROOT.RooDataSet("dataset_mc","dataset from tree", t_fit_mc, ROOT.RooArgSet(b_mass_mc))
    # Perform fit
    mean_mc     .setConstant(ROOT.kFALSE)
    sig_1_mc    .setConstant(ROOT.kFALSE)
    alpha_1_mc  .setConstant(ROOT.kFALSE)
    n_1_mc      .setConstant(ROOT.kFALSE)
    r_s1_s2_mc  .setConstant(ROOT.kFALSE)
    alpha_2_mc  .setConstant(ROOT.kFALSE)
    n_2_mc      .setConstant(ROOT.kFALSE)
    r_cb1_cb2_mc.setConstant(ROOT.kFALSE)
    model_sig_mc.fitTo(dataset_mc, ROOT.RooFit.Range(5220., 5400.))
    # Store fitted values
    f_mean_mc       = ROOT.RooRealVar("f_mean_mc"   , "f_mean_mc"   , mean_mc   .getValV())
    f_sig_1_mc      = ROOT.RooRealVar("f_sig_1_mc"  , "f_sig_1_mc"  , sig_1_mc  .getValV())
    f_alpha_1_mc    = ROOT.RooRealVar("f_alpha_1_mc", "f_alpha_1_mc", alpha_1_mc.getValV())
    f_n_1_mc        = ROOT.RooRealVar("f_n_1_mc"    , "f_n_1_mc"    , n_1_mc    .getValV())
    f_sig_2_mc      = ROOT.RooRealVar("f_sig_2_mc"  , "f_sig_2_mc"  , sig_2_mc  .getValV())
    f_alpha_2_mc    = ROOT.RooRealVar("f_alpha_2_mc", "f_alpha_2_mc", alpha_2_mc.getValV())
    f_n_2_mc        = ROOT.RooRealVar("f_n_2_mc"    , "f_n_2_mc"    , n_1_mc    .getValV())
    # Store fitted models
    f_cb_1_mc      = ROOT.RooCBShape("cb_1_mc"     , "cb_1_mc"     , b_mass_mc, f_mean_mc, f_sig_1_mc, f_alpha_1_mc, f_n_1_mc)
    f_cb_2_mc      = ROOT.RooCBShape("cb_2_mc"     , "cb_2_mc"     , b_mass_mc, f_mean_mc, f_sig_2_mc, f_alpha_2_mc, f_n_2_mc)
    f_model_sig_mc = ROOT. RooAddPdf("model_sig_mc", "model_sig_mc", ROOT.RooArgList(f_cb_1_mc, f_cb_2_mc), ROOT.RooArgList(r_cb1_cb2_mc))
    ## Plot
    # Frame for fit
    frame_mc = b_mass_mc.frame()
    # Frame for pulls
    frame_mc_pull = b_mass_mc.frame()
    # Add data and fit to frame
    dataset_mc.plotOn(frame_mc)
    f_model_sig_mc.plotOn(frame_mc)
    # Plot on split canvas
    c = ROOT.TCanvas("X3872MC", "X3872MC", 400, 500)
    c.Divide(1, 2, 0, 0)
    # Plot data and fit
    c.cd(2)
    ROOT.gPad.SetTopMargin(0)
    ROOT.gPad.SetLeftMargin(0.15)
    ROOT.gPad.SetRightMargin(0.035)
    ROOT.gPad.SetPad(.01,.01,.95,.77)
    frame_mc.SetTitle("Fitted Monte-Carlo Bmass")
    frame_mc.SetMaximum(frame_mc.GetMaximum()*1.1)
    frame_mc.GetYaxis().SetTitleOffset(1.6)
    frame_mc.Draw()
    # Plot pulls
    c.cd(1)
    ROOT.gPad.SetTopMargin(0)
    ROOT.gPad.SetLeftMargin(0.15)
    ROOT.gPad.SetRightMargin(0.035)
    ROOT.gPad.SetPad(.01,.76,.95,.97)
    # Determine pulls and format
    h_pull_mc = frame_mc.pullHist()
    h_pull_mc.SetFillColor(15)
    h_pull_mc.SetFillStyle(3144)
    # Add pulls to frame
    frame_mc_pull.addPlotable(h_pull_mc,'L3')
    frame_mc_pull.GetYaxis().SetNdivisions(505)
    frame_mc_pull.GetYaxis().SetLabelSize(0.20)
    frame_mc_pull.SetTitle("")
    frame_mc_pull.Draw()
    # Save canvas
    c.SaveAs(out_path+s_info+'_FittedMassDistribution_MonteCarlo.pdf')
    # Store fit variables
    d_fit_info['mc_mean']     = mean_mc.getValV()
    d_fit_info['mc_mean_err'] = mean_mc.getError()
    d_fit_info['mc_sig1']     = sig_1_mc.getValV() 
    d_fit_info['mc_sig1_err'] = sig_1_mc.getError()
    d_fit_info['mc_r_s1_s2']     = r_s1_s2_mc.getValV() 
    d_fit_info['mc_r_s1_s2_err'] = r_s1_s2_mc.getError()
    d_fit_info['mc_alpha1']     = alpha_1_mc.getValV()
    d_fit_info['mc_alpha1_err'] = alpha_1_mc.getError()
    d_fit_info['mc_alpha2']     = alpha_2_mc.getValV()
    d_fit_info['mc_alpha2_err'] = alpha_2_mc.getError()
    d_fit_info['mc_n1']     = n_1_mc.getValV()
    d_fit_info['mc_n1_err'] = n_1_mc.getError()
    d_fit_info['mc_n2']     = n_2_mc.getValV()
    d_fit_info['mc_n2_err'] = n_2_mc.getError()
    d_fit_info['mc_r_cb1_cb2']     = r_cb1_cb2_mc.getValV()
    d_fit_info['mc_r_cb1_cb2_err'] = r_cb1_cb2_mc.getError()
    d_fit_info['mc_fit_chi2'] = frame_mc.chiSquare()

    ## Data
    b_mass    = ROOT.RooRealVar("b_mass"   , "B mass [MeV]" , 5220., 5400.)
    mean      = ROOT.RooRealVar("mean"     , "mean"         , mean_mc     .getValV(), 5220., 5400.)
    sig_1     = ROOT.RooRealVar("sig_1"    , "sig_1"        , sig_1_mc    .getValV(), .1, 15.)
    alpha_1   = ROOT.RooRealVar("alpha_1"  , "alpha_1"      , alpha_1_mc  .getValV(), .1, 10.)
    n_1       = ROOT.RooRealVar("n_1"      , "n_1"          , n_1_mc      .getValV(), 0., 15.)
    r_s1_s2   = ROOT.RooRealVar("r_s1_s2"  , "r_s1_s2"      , r_s1_s2_mc  .getValV(), 0.01, 10)
    sig_2     = ROOT.RooFormulaVar("sig_2" , "sig_1*r_s1_s2", ROOT.RooArgList(sig_1, r_s1_s2))
    alpha_2   = ROOT.RooRealVar("alpha_2"  , "alpha_2"      , alpha_2_mc  .getValV(), -10., -.1)
    n_2       = ROOT.RooRealVar("n_2"      , "n_2"          , n_2_mc      .getValV(), 0., 15.)
    r_cb1_cb2 = ROOT.RooRealVar("r_cb1_cb2",  "r_cb1_cb2"   , r_cb1_cb2_mc.getValV(), 0.01  , 1.);
    sig_yield = ROOT.RooRealVar("sig_yield", "sig_yield"    , 0, (max_yield_est - bkg_yield_est)*1.5)
    # Background
    exp_c     = ROOT.RooRealVar("exp_c"        , "exp_c"    , 0., -.02, .02)
    bgr_yield = ROOT.RooRealVar("bgr_yield"    , "bgr_yield", bkg_yield_est*.5, max_yield_est)
    # Initialise fit model
    cb_1      = ROOT.RooCBShape("cb_1"     , "cb_1"     , b_mass, mean, sig_1, alpha_1, n_1)
    cb_2      = ROOT.RooCBShape("cb_2"     , "cb_2"     , b_mass, mean, sig_2, alpha_2, n_2)
    exp_bg    = ROOT.RooExponential("exp_bg", "exp_bg", b_mass, exp_c)
    model_sig = ROOT.RooAddPdf("model_sig", "model_sig", ROOT.RooArgList(cb_1, cb_2)       , ROOT.RooArgList(r_cb1_cb2))  
    model_tot = ROOT.RooAddPdf("model_tot", "model_tot", ROOT.RooArgList(model_sig, exp_bg), ROOT.RooArgList(sig_yield, bgr_yield))
    # Initialise dataset
    dataset = ROOT.RooDataSet("dataset","dataset from tree", t_fit_data, ROOT.RooArgSet(b_mass))
    # Perform fit - kTRUE vars determined from MC fit
    mean     .setConstant(ROOT.kFALSE)
    sig_1    .setConstant(ROOT.kFALSE)
    alpha_1  .setConstant(ROOT.kTRUE)
    n_1      .setConstant(ROOT.kTRUE)
    r_s1_s2  .setConstant(ROOT.kTRUE)
    alpha_2  .setConstant(ROOT.kTRUE)
    n_2      .setConstant(ROOT.kTRUE)
    r_cb1_cb2.setConstant(ROOT.kTRUE) 
    sig_yield.setConstant(ROOT.kFALSE)
    exp_c    .setConstant(ROOT.kFALSE)
    bgr_yield.setConstant(ROOT.kFALSE)
    model_tot.fitTo(dataset, ROOT.RooFit.Range(5220., 5380.))
    # Store fitted values
    f_mean      = ROOT.RooRealVar("f_mean"     , "f_mean"     , mean     .getValV())
    f_sig_1     = ROOT.RooRealVar("f_sig_1"    , "f_sig_1"    , sig_1    .getValV())
    f_alpha_1   = ROOT.RooRealVar("f_alpha_1"  , "f_alpha_1"  , alpha_1  .getValV())
    f_n_1       = ROOT.RooRealVar("f_n_1"      , "f_n_1"      , n_1      .getValV())
    f_sig_2     = ROOT.RooRealVar("f_sig_2"    , "f_sig_2"    , sig_2    .getValV())
    f_alpha_2   = ROOT.RooRealVar("f_alpha_2"  , "f_alpha_2"  , alpha_2  .getValV())
    f_n_2       = ROOT.RooRealVar("f_n_2"      , "f_n_2"      , n_1      .getValV())
    f_exp_c     = ROOT.RooRealVar("f_exp_c"    , "f_exp_c"    , exp_c    .getValV())
    f_sig_yield = ROOT.RooRealVar("f_sig_yield", "f_sig_yield", sig_yield.getValV())
    f_bgr_yield = ROOT.RooRealVar("f_bgr_yield", "f_bgr_yield", bgr_yield.getValV())
    # Store fitted models
    f_cb_1      = ROOT.    RooCBShape("cb_1"     , "cb_1"     , b_mass, f_mean, f_sig_1, f_alpha_1, f_n_1)
    f_cb_2      = ROOT.    RooCBShape("cb_2"     , "cb_2"     , b_mass, f_mean, f_sig_2, f_alpha_2, f_n_2)
    f_exp_bg    = ROOT.RooExponential("exp_bg"   , "exp_bg"   , b_mass, f_exp_c)
    f_model_sig = ROOT.     RooAddPdf("model_sig", "model_sig", ROOT.RooArgList(f_cb_1, f_cb_2), ROOT.RooArgList(r_cb1_cb2))
    f_model_tot = ROOT.     RooAddPdf("model_tot", "model_tot", ROOT.RooArgList(f_model_sig, f_exp_bg), ROOT.RooArgList(f_sig_yield, f_bgr_yield))
    # Plot
    # Frame for fit
    frame = b_mass.frame()
    # Frame for pulls
    frame_pull = b_mass.frame()
    # Add data and fit to frame
    dataset.plotOn(frame)
    f_model_tot.plotOn(frame)
    # Plot on split canvas
    c = ROOT.TCanvas("data_fit", "data_fit", 400, 500)
    c.Divide(1, 2, 0, 0)
    # Plot data and fit
    c.cd(2)
    ROOT.gPad.SetTopMargin(0)
    ROOT.gPad.SetLeftMargin(0.15)
    ROOT.gPad.SetRightMargin(0.035)
    ROOT.gPad.SetPad(.01,.01,.95,.77)
    frame.SetTitle("Fitted Data Bmass")
    frame.SetMaximum(frame.GetMaximum()*1.1)
    frame.GetYaxis().SetTitleOffset(1.6)
    frame.Draw()
    # Plot pulls
    c.cd(1)
    ROOT.gPad.SetTopMargin(0)
    ROOT.gPad.SetLeftMargin(0.15)
    ROOT.gPad.SetRightMargin(0.035)
    ROOT.gPad.SetPad(.01,.76,.95,.97)
    # Determine pulls and format
    h_pull = frame.pullHist()
    h_pull.SetFillColor(15)
    h_pull.SetFillStyle(3144)
    # Add pulls to frame
    frame_pull.addPlotable(h_pull,'L3')
    frame_pull.GetYaxis().SetNdivisions(505)
    frame_pull.GetYaxis().SetLabelSize(0.20)
    frame_pull.SetTitle("")
    frame_pull.Draw()
    # Save plot
    c.SaveAs(out_path+s_info+'_FittedMassDistribution_Data.pdf')
    # Store fit variables
    d_fit_info['data_mean']     = mean.getValV()
    d_fit_info['data_mean_err'] = mean.getError()
    d_fit_info['data_sig1']     = sig_1.getValV() 
    d_fit_info['data_sig1_err'] = sig_1.getError()
    d_fit_info['data_r_s1_s2']     = r_s1_s2.getValV() 
    d_fit_info['data_r_s1_s2_err'] = r_s1_s2.getError()
    d_fit_info['data_alpha1']     = alpha_1.getValV()
    d_fit_info['data_alpha1_err'] = alpha_1.getError()
    d_fit_info['data_alpha2']     = alpha_2.getValV()
    d_fit_info['data_alpha2_err'] = alpha_2.getError()
    d_fit_info['data_n1']     = n_1.getValV()
    d_fit_info['data_n1_err'] = n_1.getError()
    d_fit_info['data_n2']     = n_2.getValV()
    d_fit_info['data_n2_err'] = n_2.getError()
    d_fit_info['data_r_cb1_cb2']     = r_cb1_cb2.getValV()
    d_fit_info['data_r_cb1_cb2_err'] = r_cb1_cb2.getError()
    d_fit_info['data_expc']     = exp_c.getValV()
    d_fit_info['data_expc_err'] = exp_c.getError()
    d_fit_info['data_sig_yield']     = sig_yield.getValV()
    d_fit_info['data_sig_yield_err'] = sig_yield.getError()
    d_fit_info['data_bgr_yield']     = bgr_yield.getValV()
    d_fit_info['data_bgr_yield_err'] = bgr_yield.getError()
    d_fit_info['data_fit_chi2'] = frame.chiSquare()

    # Return fit info dictionary
    return d_fit_info
                                   ('jet1eta', np.float32),
                                   ('jet1phi', np.float32),
                                   ('jet1M', np.float32),
                                   ('jet2pt', np.float32),
                                   ('jet2eta', np.float32),
                                   ('jet2phi', np.float32),
                                   ('jet2M', np.float32), ('MET', np.float32),
                                   ('METphi', np.float32),
                                   ('LL_Helicity', np.float32),
                                   ('TTTL_Helicity', np.float32)])

ROOT_filename = "tens_model_class/" + Model_name + "/TEST_TRAIN_ROOT/" + "TEST_ROOT.root"
ROOT_filename_LL = "tens_model_class/" + Model_name + "/TEST_TRAIN_ROOT/" + "TEST_ROOT_LL.root"
ROOT_filename_TTTL = "tens_model_class/" + Model_name + "/TEST_TRAIN_ROOT/" + "TEST_ROOT_TTTL.root"
Test_ROOT = TFile(ROOT_filename, "RECREATE")
tree_test = array2tree(TEST_nplist)
tree_test.Write()
Test_ROOT.Close()

Test_ROOT_LL = TFile(ROOT_filename_LL, "RECREATE")
tree_test_LL = array2tree(TEST_nplist_LL)
tree_test_LL.Write()
Test_ROOT_LL.Close()

Test_ROOT_TTTL = TFile(ROOT_filename_TTTL, "RECREATE")
tree_test_TTTL = array2tree(TEST_nplist_TTTL)
tree_test_TTTL.Write()
Test_ROOT_TTTL.Close()
del Test_List
del TEST_nplist
## </SAVE TEST_ROOT>