Exemple #1
0
def test_object_selection():
    a = rnp.root2array(load('vary*.root'), branches='n_int',
                       object_selection={'n_int % 2 == 0': 'n_int'})
    for suba in a:
        assert_true((suba % 2 == 0).all())

    # branch does not exist
    assert_raises(ValueError, rnp.root2array, load('vary*.root'),
                  branches='n_int', object_selection={'n_int % 2 == 0': 'DNE'})

    # duplicate branch in selection list
    assert_raises(ValueError, rnp.root2array, load('vary*.root'),
                  branches='n_int', object_selection={'n_int % 2 == 0': ['n_int', 'n_int']})

    # test object selection on variable-length expression
    a = rnp.root2array(load('object*.root'), branches='lines.GetX1()',
                       object_selection={'lines.GetX1() > 3': 'lines.GetX1()'})

    for suba in a:
        assert_true((suba > 3).all())

    # attempting to apply object selection on fixed-length array
    # currently not implemented since this changes the output type from
    # fixed-length to variable-length
    assert_raises(TypeError, rnp.root2array, load("fixed*.root"),
                  branches='n_int',
                  object_selection={'n_int % 2 == 0': 'n_int'})

    # test with vectors
    a = rnp.root2array(load('vector.root'), branches='v_i',
                       object_selection={'v_i % 2 == 0': 'v_i'})

    for suba in a:
        assert_true((suba % 2 == 0).all())
Exemple #2
0
def test_array2root():
    a = np.array([
        (12345, 2., 2.1, True),
        (3, 4., 4.2, False),],
        dtype=[
            ('x', np.int32),
            ('y', np.float32),
            ('z', np.float64),
            ('w', np.bool)])
    with temp() as tmp:
        rnp.array2root(a, tmp.GetName(), mode='recreate')
        a_conv = rnp.root2array(tmp.GetName())
        assert_array_equal(a, a_conv)
        # extend the tree
        rnp.array2root(a, tmp.GetName(), mode='update')
        a_conv2 = rnp.root2array(tmp.GetName())
        assert_array_equal(np.hstack([a, a]), a_conv2)
        # write into subdirectory
        tname = 'root/sub/tree'
        rnp.array2root(a, tmp.GetName(), treename=tname, mode='update')
        a_conv3 = rnp.root2array(tmp.GetName(), treename=tname)
        assert_array_equal(a, a_conv3)
        # try creating tree with conflicting name
        assert_raises(IOError, rnp.array2root, a, tmp.GetName(),
                treename='root/sub', mode='update')
        # try creating subdirectory with conflicting name
        assert_raises(IOError, rnp.array2root, a, tmp.GetName(),
                treename='root/sub/tree/error', mode='update')
Exemple #3
0
def test_single():
    f = load('single1.root')
    a = rnp.root2array(f)
    check_single(a)
    # specify tree name
    a = rnp.root2array(f, treename='tree')
    check_single(a)
Exemple #4
0
def test_array2tree_fixed_length_arrays():
    f = load(['fixed1.root', 'fixed2.root'])
    a = rnp.root2array(f)
    with temp() as tmp:
        rnp.array2root(a, tmp.GetName(), mode='recreate')
        a_conv = rnp.root2array(tmp.GetName())
        assert_array_equal(a, a_conv)
Exemple #5
0
def load_data( data_path, branch_names, dataset_names, dataset_ranges = []):  
    """ Import data from several ROOT files to a recarray """
    l_raw_vars = []
    l_weight = []
    l_origin = []
    for i, d_name in enumerate(dataset_names):
        f_name =  "{}{}.root".format(data_path,d_name)
        if "BTagCSV" in d_name:
            d_weight = 1.
        else:
            d_weight = mc_samples[d_name]["xs"]/mc_samples[d_name]["gen_events"] 
        if len(dataset_ranges) == len(dataset_names): 
            l_raw_vars.append(root2array(f_name,"tree", branch_names,
                              stop=dataset_ranges[i]))
        else:    
            l_raw_vars.append(root2array(f_name,"tree", branch_names))
        n_ev = l_raw_vars[-1].shape[0]
        l_weight.append(np.full((n_ev),d_weight, 'f8'))
        l_origin.append(np.full((n_ev),d_name, 'a20'))
    raw_vars = stack_arrays(l_raw_vars, asrecarray=True, usemask=False)     
    weight = stack_arrays(l_weight, asrecarray=True, usemask=False)     
    origin = stack_arrays(l_origin, asrecarray=True, usemask=False)     
    raw_vars = append_fields(raw_vars, ["origin","weight"], [origin, weight],
                             asrecarray=True, usemask=False)
    return raw_vars
Exemple #6
0
def test_expression():
    rec = rnp.root2array(load('single*.root'))
    rec2 = rnp.root2array(load('single*.root'), branches=['f_float*2'])
    assert_array_equal(rec['f_float'] * 2, rec2['f_float*2'])

    a = rnp.root2array(load('single*.root'), branches='Entry$')
    assert_equal(a.dtype, np.int32)
    assert_array_equal(a, np.arange(a.shape[0]))
Exemple #7
0
def compute_N_B_events_MC(track_file, vertex_file, name=""):    
    Bevents_tracks = pandas.DataFrame(root_numpy.root2array(track_file, branches=['run', 'event', 'IPs']))
    Bevents_tracks = Bevents_tracks.ix[numpy.isfinite(Bevents_tracks.IPs), :]
    B_events_vertices = pandas.DataFrame(root_numpy.root2array(vertex_file, branches=['run', 'event', 'vcharge']))
    B_events_vertices = B_events_vertices[B_events_vertices.vcharge > 0]
    B_events = pandas.concat([Bevents_tracks, B_events_vertices])

    B_events['event_id'] = B_events.run.apply(str) + '_' + B_events.event.apply(str)
    B_events['N_sig_sw'] = 1
    N_B_events = get_events_number(B_events)
    return N_B_events
Exemple #8
0
def test_selection_and_expression():
    ref = len(rnp.root2array(
        load('test.root'), branches=['x', 'y'], selection='z>0'))
    assert_equal(ref,
        len(rnp.root2array(
            load('test.root'), branches=['x', 'y', 'z'], selection='z>0')))
    assert_equal(ref,
        len(rnp.root2array(
            load('test.root'), branches=['x', 'x*y'], selection='z>0')))
    assert_equal(ref,
        len(rnp.root2array(
            load('test.root'), branches=['x', 'x*z'], selection='z>0')))
Exemple #9
0
def test_slice():
    a = rnp.root2array(load('single1.root'), stop=10).view(np.recarray)
    assert_equal(len(a), 10)
    assert_equal(a.n_int[-1], 10)

    a = rnp.root2array(load('single1.root'), stop=11, start=1).view(np.recarray)
    assert_equal(len(a), 10)
    assert_equal(a.n_int[-1], 11)

    a = rnp.root2array(load('single1.root'), stop=105, start=95).view(np.recarray)
    assert_equal(len(a), 5)
    assert_equal(a.n_int[-1], 100)
Exemple #10
0
def test_single():
    f = load('single1.root')
    a = rnp.root2array(f)
    check_single(a)

    # specify tree name
    a = rnp.root2array(f, treename='tree')
    check_single(a)

    # tree2array
    f = get_file('single1.root')
    tree = f.Get('tree')
    check_single(rnp.tree2array(tree))
Exemple #11
0
def run(name, source, quick=False):
    print time.asctime(time.localtime()), "Filling BDT Branches"  

    branch_names = joblib.load("pickle/variables.pkl")
    
    if quick == True:
        signal = joblib.load('pickle/all_signalq.pkl')   
        clf = joblib.load("pickle/" + name + "quick.pkl")     
    else:
        signal = joblib.load('pickle/all_signal.pkl')
        clf = joblib.load("pickle/" + name + ".pkl")

    # predict and write probability of each MC event being signal
    bdt_MC_predicted = clf.predict_proba(signal)
    bdt_MC_predicted.dtype = [('GradBoost_prob', np.float64)]
    array2root((np.hsplit(bdt_MC_predicted,2)[1]), "/net/storage03/data/users/dlafferty/NTuples/SignalMC/2012/combined/Bs2phiphi_MC_2012_combined_corrected_TupleA_BDT.root", "DecayTree")

    # predict and write probability of every data event being signal
    all_data = root2array("/net/storage03/data/users/dlafferty/NTuples/data/2012/combined/Bs2phiphi_data_2012_corrected_TupleA_BDT.root", "DecayTree", branch_names)
    all_data = rec2array(all_data)

    bdt_data_predicted = clf.predict_proba(all_data)
    bdt_data_predicted.dtype = [('GradBoost_prob', np.float64)]
    array2root((np.hsplit(bdt_data_predicted,2)[1]), "/net/storage03/data/users/dlafferty/NTuples/data/2012/combined/Bs2phiphi_data_2012_corrected_TupleA_BDT.root", "DecayTree")
        
    print time.asctime(time.localtime()), "Branches Filled!"
def main(): 

    # Use the Bayesian Methods for Hackers design
    plt.style.use('bmh')
    matplotlib.rcParams.update({'font.size': 8})

    # Parse command line arguments
    parser = argparse.ArgumentParser(description='')
    parser.add_argument("-l", "--file_list", help="ROOT file")
    args = parser.parse_args()

    # If a list of files has not been specified, warn the user and exit
    # the application.
    if not args.file_list: 
        print 'A list of ROOT files needs to be specified'
        sys.exit(2)

    # Open the file containing the list of files to process
    root_file_list = None
    try:
        root_file_list = open(args.file_list, 'r')
    except IOError: 
        print 'Unable to open file %s' % args.file_list
        sys.exit(2)

    root_files = []
    for line in root_file_list: 
        root_files.append(line.strip())

    rec = rnp.root2array(root_files, 'results')

    make_plots(rec)
Exemple #13
0
def readFiles():
    print 'Reading files...'

    weightsS = root2rec(files_signal, treename='tree', branches=['full_weight'], selection=selection)['full_weight']
    weightsB = root2rec(files_bg, treename='tree', branches=['full_weight'], selection=selection)['full_weight']

    sum_weightsS = np.sum(weightsS)
    sum_weightsB = np.sum(weightsB)

    weightsB = weightsB * sum_weightsS/sum_weightsB

    nS = len(weightsS)
    nB = len(weightsB)

    fullWeight = np.concatenate((weightsS, weightsB))
    # fullWeight = fullWeight['weight']

    # fullWeight = np.ones(len(fullWeight))

    # del weightsS, weightsB

    arrSB = root2array(files_signal + files_bg, treename='tree', branches=trainVars(), selection=selection)

    # Need a matrix-like array instead of a 1-D array of lists for sklearn
    arrSB = (np.asarray([arrSB[var] for var in trainVars()])).transpose()

    targets = np.concatenate((np.ones(nS),np.zeros(nB)))

    print 'Done reading files.'

    return arrSB, fullWeight, targets
Exemple #14
0
    def _GenData(self, args):
        ''' Loads the data for a general data value. '''
    
        if len(args) > 1:
            warn('WARNING in ' + self.__name__ + ':\n\t' + self.__name__ + 
                 ' takes a single argument: cut. Ignoring additional' +
                 ' arguments.', UserWarning)

        # If it's not a valid cut
        if not self._Check_Cut(args[0]):
        
            # Get Last Cut
            cut = CAPy_globals.GetLastCut()

        # If it is a valid cut
        else:
            cut = args[0]
        
            # Store Cut
            CAPy_globals.SetLastCut(cut)

        files, dirName, treeName = CAPy_globals._FileInfo(self.__name__, 1)
            
        # Now call data
        m = root2array(files, dirName + '/' + treeName, [self.__name__])
            
        # If cut, apply
        if cut:
            print "#TODO: Implement Cut"
            
        return m             
def list_flat_branches(filename, treename, use_dtype=True):
    """ Lists branches in the file, vector branches, say D_p, turns into D_p[0], D_p[1], D_p[2], D_p[3].
    First event is used to count number of components
    :param filename: filename
    :param treename: name of tree
    :return: list of strings
    """
    import root_numpy
    import numpy
    result = []
    data = root_numpy.root2array(filename, treename=treename, stop=1)
    for branch, value in data.dtype.fields.items():
        if use_dtype:
            if value[0].name != 'object':
                result.append(branch)
            else:
                matrix = numpy.array(list(data[branch]))
                for index in range(matrix.shape[1]):
                    result.append("{}[{}]".format(branch, index))
        else:
            try:
                for index in range(len(data[branch][0])):
                    result.append("{column}[{index}]".format(column=branch, index=index))
            except TypeError:
                result.append(branch)
    return result
Exemple #16
0
def main() :

    # Parse command line arguments
    parser = argparse.ArgumentParser(description='')
    parser.add_argument("-f", "--file_list", help="List of ROOT files to process.")
    parser.add_argument("-l", "--lumi",      help="Luminosity")
    args = parser.parse_args()

    if not args.file_list:
        print 'A list of ROOT files to process needs to be specified.'
        sys.exit(2)

    # Open the file containing the list of files to process
    root_file_list = None
    try:
        root_file_list = open(args.file_list, 'r')
    except IOError: 
        print "Unable to open file %s" % args.file_list
        sys.exit(2)

    root_files = []
    for line in root_file_list: 
        root_files.append(line.strip())

    rec = rnp.root2array(root_files, 'results')

    apply_tri_selection(rec, args.lumi)
Exemple #17
0
def datagen(sel, brs, infname, n_chunks=10):

    f = ROOT.TFile.Open(infname)
    entries = f.Get("multiclass_6j").GetEntries()
    f.Close()

    # Initialize
    step = entries/n_chunks
    i_start = 0

    # Generate data forever
    while True:
        
        d = root_numpy.root2array(infname, treename="multiclass_6j", branches=brs, selection = sel, start=i_start, stop = i_start + step)

        i_start += step

        # roll over
        if i_start + step >= entries:
            i_start = 0
            
        df = pandas.DataFrame(d)
                    
        # Shuffle
        df = df.iloc[np.random.permutation(len(df))]
                
        yield df
Exemple #18
0
def test_efficiency(functions, function_inputs, variables, inputfile, tree, selection):
    # Retrieve data from tree
    ninputs = len(function_inputs)
    branches = copy.deepcopy(function_inputs)
    branches.extend(variables)
    data = root2array(inputfile, treename=tree, branches=branches, selection=selection)
    data = data.view((np.float64, len(data.dtype.names)))
    inputs = data[:, range(ninputs)].astype(np.float32)
    # Compute efficiencies along each variable and for each function
    graphs = []
    try:
        for ifu, function in enumerate(functions):
            for i,variable in enumerate(variables):
                xs  = data[:, [ninputs+i]].astype(np.float32).ravel()
                graphs.append(efficiency_graph(pass_function=function,\
                                               function_inputs=inputs,\
                                               xs=xs))
                graphs[-1].SetName('efficiency_{}_{}'.format(ifu,variable))
    except TypeError:
        for i,variable in enumerate(variables):
            xs  = data[:, [ninputs+i]].astype(np.float32).ravel()
            graphs.append(efficiency_graph(pass_function=functions,\
                                           function_inputs=inputs,\
                                           xs=xs))
            graphs[-1].SetName('efficiency_'+variable)
    return graphs
Exemple #19
0
def read_batch(path, treename, leaves, batch_size, each = 1, test_leaf = 0):
  event_batches = None
  need_another_batch = True

  batch_offset = 0

  while need_another_batch:
    branches = get_index(leaves, np.arange(batch_size)[::each] + batch_offset)

    data_root = root_numpy.root2array(path, treename=treename, branches=branches, )

    need_another_batch, events = split_by_events(data_root, leaves, batch_size / each, test_leaf = test_leaf)

    batch_offset += batch_size

    if event_batches is None:
      event_batches = [ [event] for event in events ]
    else:
      assert len(event_batches) == len(events)
      event_batches = [
        batches + [batch] for batches, batch in zip(event_batches, events)
        ]


  return [ np.vstack(event) for event in event_batches ]
Exemple #20
0
def read_geometry(filename, treename, subdet, layer, wafer=-1):
    # Read cells from one layer
    selection = "zside==1 && layer=={0} && subdet=={1}".format(layer,subdet)
    if wafer!=-1:
        selection += ' && wafer=={}'.format(wafer)
    branches = ['id',
            'wafer', 'wafertype', 'cell', 
            'x', 'y']
    cells = root2array(filename, treename=treename, branches=branches, selection=selection)
    # Create cell shapes
    output_cells = []
    for cell in cells:
        vertices = cell_vertices(cell['x'], cell['y'], cell['wafertype'], cell['cell']) 
        barycenter = Point((cell['x'],cell['y']))
        output_cells.append(Cell(
            #  id=int(cell['id']),
            id=int(compute_id(cell['wafer'], cell['cell'])),
            layer=layer,
            subdet=subdet,
            zside=1,
            module=int(cell['wafer']),
            center=barycenter,
            vertices=vertices
            ))
    return output_cells
Exemple #21
0
def read_bh_geometry(filename, treename):
    # Read cells from one side
    selection = "zside==1 && subdet==2 && layer==1"
    branches = ['id',
            'ieta', 'iphi', 
            'x', 'y']
    for corner in xrange(1,5):
        branches.append('x{}'.format(corner))
        branches.append('y{}'.format(corner))
    cells = root2array(filename, treename=treename, branches=branches, selection=selection)
    # Create cell shapes
    output_cells = []
    for cell in cells:
        vertices = Polygon([(cell['x1'],cell['y1']),
            (cell['x2'],cell['y2']),
            (cell['x3'],cell['y3']),
            (cell['x4'],cell['y4'])])
        barycenter = Point((cell['x'],cell['y']))
        output_cells.append(Cell(
            id=int(cell['id']),
            layer=1,
            subdet=5,
            zside=1,
            module=1,
            ieta=int(cell['ieta']),
            iphi=int(cell['iphi']),
            center=barycenter,
            vertices=vertices
            ))
    return output_cells
def fit(filename, treename, inputsname, targetname, workingpoint=0.9, test=False):
    # Reading inputs and targets
    ninputs = len(inputsname)
    branches = copy.deepcopy(inputsname)
    branches.append(targetname)
    data = root2array(filename, treename=treename, branches=branches)
    data = data.view((np.float64, len(data.dtype.names)))
    # Extract and format inputs and targets from numpy array
    inputs = data[:, range(ninputs)].astype(np.float32)
    targets = data[:, [ninputs]].astype(np.float32).ravel()
    # if test requested, use 60% of events for training and 40% for testing
    inputs_train = inputs
    targets_train = targets
    if test:
        inputs_train, inputs_test, targets_train, targets_test = cross_validation.train_test_split(inputs, targets, test_size=0.4, random_state=0)
    # Define and fit quantile regression (quantile = workingpoint)
    # Default training parameters are used
    regressor = GradientBoostingRegressor(loss='quantile', alpha=workingpoint)
    regressor.fit(inputs_train, targets_train)
    if test:
        # Compare regression prediction with the true value and count the fraction of time it falls below
        # This should give the working point value
        predict_test = regressor.predict(inputs_test)
        compare = np.less(targets_test, predict_test)
        print 'Testing regression with inputs', inputsname, 'and working point', workingpoint
        print '    Test efficiency =', float(list(compare).count(True))/float(len(compare))
        # TODO: add 1D efficiency graphs vs input variables
    return regressor
Exemple #23
0
def test_single_branch():
    f = get_file('single1.root')
    tree = f.Get('tree')
    arr1_1d = rnp.tree2array(tree, branches='n_int')
    arr2_1d = rnp.root2array(load('single1.root'), branches='n_int')
    assert_equal(arr1_1d.dtype, np.dtype('<i4'))
    assert_equal(arr2_1d.dtype, np.dtype('<i4'))
Exemple #24
0
 def _DetData(self, args):
     ''' Loads the data for a detector specific value. '''
 
     # If there are more than 2 arguments
     if len(args) > 2:
         warn('WARNING in ' + self.__name__ + ':\n\t' + self.__name__ + 
              ' takes two arguments: detnum and cut. Ignoring additional' +
              ' arguments.', UserWarning)
         detnum = args[0]
         cut = args[1]
 
     # If there are just 2 arguments
     elif len(args) == 2:
         detnum = args[0]
         cut = args[1]
   
     # If there is one argument
     elif len(args) == 1:
         if self._Check_Detnum(args[0]):
             detnum = args[0]
             CAPy_globals.SetLastDetnum(detnum)
             cut = CAPy_globals.GetLastCut()
         elif self._Check_Cut(args[1]):
             cut = args[0]
             CAPy_globals.SetLastCut(cut)
             detnum = CAPy_globals.GetLastDetnum()
         else:
             warn('WARNING in ' + self.__name__ + ':\n\tArgument is' +
                  ' neither a detnum nor a cut. Ignoring argument.',
                  UserWarning)
             detnum = CAPy_globals.GetLastDetnum()
             cut = CAPy_globals.GetLastCut()
 
     # If there are no arguments:
     else:
 
         print 'No Arguments'
         detnum = CAPy_globals.GetLastDetnum()
         cut = CAPy_globals.GetLastCut()
         print detnum, cut
 
     # Now call data
     if detnum:
         files, dirName, treeName = CAPy_globals._FileInfo(self.__name__, 1)
         
         # Now call data
         m = root2array(files, dirName + '/' + treeName, [self.__name__])
         
         # If cut, apply
         if cut:
             print "#TODO: Implement Cut"
         
         return m
         
     else:
         warn('WARNING in ' + self.__name__ + ':\n\t' + 'No detector' +
              ' given and none stored in globals. Returning nothing',
              UserWarning)
         return None
Exemple #25
0
def test_array2root():
    a = np.array([
        (12345, 2., 2.1, True),
        (3, 4., 4.2, False),],
        dtype=[
            ('x', np.int32),
            ('y', np.float32),
            ('z', np.float64),
            ('w', np.bool)])
    with temp() as tmp:
        rnp.array2root(a, tmp.GetName(), mode='recreate')
        a_conv = rnp.root2array(tmp.GetName())
        assert_array_equal(a, a_conv)
        # extend the tree
        rnp.array2root(a, tmp.GetName(), mode='update')
        a_conv2 = rnp.root2array(tmp.GetName())
        assert_array_equal(np.hstack([a, a]), a_conv2)
Exemple #26
0
def test_array2tree_charstar():
    a = np.array([b'', b'a', b'ab', b'abc', b'xyz', b''],
                 dtype=[('string', 'S3')])

    with temp() as tmp:
        rnp.array2root(a, tmp.GetName(), mode='recreate')
        a_conv = rnp.root2array(tmp.GetName())
        assert_array_equal(a, a_conv)
Exemple #27
0
def test_chain():
    chain = ROOT.TChain('tree')
    chain.Add(load('single1.root'))
    check_single(rnp.tree2array(chain))

    f = load(['single1.root', 'single2.root'])
    a = rnp.root2array(f)
    check_single(a, 200)
Exemple #28
0
def test_struct():
    assert_array_equal(rnp.root2array(load('struct.root')),
        np.array([(10, 15.5, 20, 781.2)],
            dtype=[
                ('branch1_intleaf', '<i4'),
                ('branch1_floatleaf', '<f4'),
                ('branch2_intleaf', '<i4'),
                ('branch2_floatleaf', '<f4')]))
Exemple #29
0
def test_object_expression():
    rec = rnp.root2array(load(['object1.root', 'object2.root']),
                       branches=['vect.Pt()'])
    assert_array_equal(
        rec['vect.Pt()'],
        np.concatenate([
            np.arange(10, dtype='d') + 1,
            np.arange(10, dtype='d') + 2]))
Exemple #30
0
def read_inputs(config, setup):
    from ttH.TauRoast.processing import Process

    fn = os.path.join(config.get("indir", config["outdir"]), "ntuple.root")

    signal = None
    signal_weights = None
    for proc, weight in sum([cfg.items() for cfg in setup['signals']], []):
        for p in sum([Process.expand(proc)], []):
            logging.debug('reading {}'.format(p))
            d = rec2array(root2array(fn, str(p), setup['variables']))
            if isinstance(weight, float) or isinstance(weight, int):
                w = np.array([weight] * len(d))
            else:
                w = rec2array(root2array(fn, str(p), [weight])).ravel()
            w *= p.cross_section / p.events
            if signal is not None:
                signal = np.concatenate((signal, d))
                signal_weights = np.concatenate((signal_weights, w))
            else:
                signal = d
                signal_weights = w

    background = None
    background_weights = None
    for proc, weight in sum([cfg.items() for cfg in setup['backgrounds']], []):
        for p in sum([Process.expand(proc)], []):
            logging.debug('reading {}'.format(p))
            d = rec2array(root2array(fn, str(p), setup['variables']))
            if isinstance(weight, float) or isinstance(weight, int):
                w = np.array([weight] * len(d))
            else:
                w = rec2array(root2array(fn, str(p), [weight])).ravel()
            w *= p.cross_section / p.events
            if background is not None:
                background = np.concatenate((background, d))
                background_weights = np.concatenate((background_weights, w))
            else:
                background = d
                background_weights = w

    factor = np.sum(signal_weights) / np.sum(background_weights)
    logging.info("renormalizing background events by factor {}".format(factor))
    background_weights *= factor

    return signal, signal_weights, background, background_weights
Exemple #31
0
def main():
    from root_numpy import root2array
    import sys
    import argparse
    import matplotlib.pyplot as plt
    import numpy as np

    parser = argparse.ArgumentParser(
        description='Plots output from blochSiegert.cpp')
    parser.add_argument("-f",
                        "--file",
                        type=str,
                        help="Filename",
                        required=True)
    parser.add_argument("-rf",
                        "--ramseyFringe",
                        type=int,
                        help="rf___ branch to draw")
    args = parser.parse_args()

    filename = args.file
    print("Loading...", filename)

    try:
        phi = np.array(root2array(filename, branches="phi")[0])
        gridMin = np.array(root2array(filename, branches="gridMin")[0])
        polyMin = np.array(root2array(filename, branches="polyMin")[0])
        params = np.array(root2array(filename, branches="params")[0])
    except:
        sys.exit()

    if (args.ramseyFringe != None):
        try:
            branchname = "rf" + str(args.ramseyFringe)
            fringe = np.array(root2array(filename, branches=branchname)[0])
            wRange = np.array(root2array(filename, branches="wRange")[0])

            fig2 = plt.figure(branchname)
            ax2 = fig2.add_subplot(111)
            ax2.set(title=branchname)
            ax2.set(xlabel='w [rad/s]')
            ax2.set(ylabel='P(z)')
            ax2.plot(wRange, fringe)
            ax2.grid(True)

        except:
            print("Could not read branch ", branchname)

    if (params[3] == 1):
        print("Circular RF Ramsey fringe")
    else:
        print("Linear RF Ramsey fringe")
    print("{W0_VAL, PRECESS_TIME, PULSE_TIME}")
    print(params[0], "  ", params[1], "  ", params[2])

    fig1 = plt.figure("blochSiegertShift")
    ax1 = fig1.add_subplot(111)

    ax1.plot(phi, params[0] - polyMin, label="Polynomial fit")
    ax1.plot(phi, params[0] - gridMin, label="Gridsearch")
    ax1.grid(True)
    ax1.set(title='Bloch Siergert shift for optimized Ramsey Fringes')
    ax1.set(xlabel='Initial phase angle [rad]')
    ax1.set(ylabel='Shift [rad/s]')
    ax1.legend()
    plt.show()
    return
Exemple #32
0
def test_single_pattern_not_exist():
    f = load(['single1.root', 'does_not_exist.root'])
    a = rnp.root2array(f)
Exemple #33
0
def test_preserve_branch_order():
    a = rnp.root2array(load('test.root'))
    assert_equal(a.dtype.names, ('i', 'x', 'y', 'z'))

    a = rnp.root2array(load('test.root'), branches=['y', 'x', 'z'])
    assert_equal(a.dtype.names, ('y', 'x', 'z'))
Exemple #34
0
ar = root2rec('../test/test.root', 'tree')
print ar.i
print ar.f
#ipython autocomplete columnname patch is available with this numpy patch
#https://github.com/piti118/numpy/commit/a996292238ab98dcf53f2d48476d637eab9f1a72
ar.i[0]  #ar[0].i won't work
ar[0][0]

# <codecell>

ar.f[ar.i > 5]

# <codecell>

#root2array is available if you don't like recarray
a = root2array('../test/test.root', 'tree')
#this tree has two column i and integer and f as float
a  #you will see that a is a structure array

# <codecell>

#access whole column
print a['i']
print a['f']

# <codecell>

#access 0th record
print a[0]
#and the first record
print a[1]
                Ztob.SetPxPyPzE(Z.Px(),Z.Py(),Z.Pz(),Z.E())
                Zboost = ROOT.TVector3()
                Zboost = Ztob.BoostVector()
                v = Zboost.Unit()
                eletron1.Boost(-Zboost)
                Htob = ROOT.TLorentzVector()
                Htob.SetPxPyPzE(H.Px(),H.Py(),H.Pz(),H.E())
                Hboost = ROOT.TVector3()
                Hboost = Htob.BoostVector()
                ang = Hboost.Unit()
                bjato1.Boost(-Hboost)
                tree.Cos_Hb1 = np.cos(bjato1.Angle(ang))
                tree.Cos_lZ = np.cos(eletron1.Angle(v))
		tree.Fill()


# Show resulting histograms
#hist_PT_l1.Draw()
#raw_input("Press Enter to continue...")
tree.write()
f.close()

#create the csv output

to_convert = root2array(root_name,'test')

df_conv = pd.DataFrame(to_convert)

df_conv.to_csv( csv_name + '.csv', index=False, header= df_conv.keys(), mode='w', sep=' ')

Exemple #36
0
def optimize_background_rejection_vs_ieta(effs,
                                          isolations,
                                          signalfile,
                                          signaltree,
                                          backgroundfile,
                                          backgroundtree,
                                          inputnames=['abs(ieta)', 'ntt'],
                                          targetname='iso'):
    #ieta_binning = np.arange(0.5,28.5,1)
    ieta_binning = [0.5, 3.5, 6.5, 9.5, 13.5, 18.5, 22.5, 27.5]
    # Compute signal efficiencies
    ninputs = len(inputnames)
    branches = copy.deepcopy(inputnames)
    branches.append(targetname)
    data = root2array(signalfile,
                      treename=signaltree,
                      branches=branches,
                      selection='et>10')
    data = data.view((np.float64, len(data.dtype.names)))
    inputs = data[:, range(ninputs)].astype(np.float32)
    targets = data[:, [ninputs]].astype(np.float32).ravel()
    xs = data[:, [0]].astype(np.float32).ravel()
    # signal_efficiencies is a 2D array
    # The first dimension corresponds to different ieta values
    # The second dimension corresponds to different working points
    signal_efficiencies = [
        graph2array(
            efficiency.efficiency_graph(
                pass_function=(lambda x: np.less(x[1], iso.predict(x[0]))),
                function_inputs=(inputs, targets),
                xs=xs,
                bins=ieta_binning))[:, [1]].ravel() for iso in isolations
    ]
    signal_efficiencies = np.column_stack(signal_efficiencies)
    # Compute background efficiencies
    ninputs = len(inputnames)
    branches = copy.deepcopy(inputnames)
    branches.append(targetname)
    data = root2array(backgroundfile,
                      treename=backgroundtree,
                      branches=branches,
                      selection='et>10')
    data = data.view((np.float64, len(data.dtype.names)))
    inputs = data[:, range(ninputs)].astype(np.float32)
    targets = data[:, [ninputs]].astype(np.float32).ravel()
    xs = data[:, [0]].astype(np.float32).ravel()
    # background_efficiencies is a 2D array
    # The first dimension corresponds to different ieta values
    # The second dimension corresponds to different working points
    background_efficiencies = [
        graph2array(
            efficiency.efficiency_graph(
                pass_function=(lambda x: np.less(x[1], iso.predict(x[0]))),
                function_inputs=(inputs, targets),
                xs=xs,
                bins=ieta_binning))[:, [1]].ravel() for iso in isolations
    ]
    background_efficiencies = np.column_stack(background_efficiencies)
    signal_efficiencies_diff_graphs = []
    background_efficiencies_diff_graphs = []
    optimal_points_graphs = []
    optimal_points = []
    # compute best working point for each ieta
    for i, (signal_effs, background_effs) in enumerate(
            zip(signal_efficiencies, background_efficiencies)):
        signal_efficiencies_diff_graph, background_efficiencies_diff_graph, optimal_points_graph, optimal_point = find_best_working_point(
            effs, signal_effs, background_effs)
        signal_efficiencies_diff_graph.SetName(
            'efficiencies_signal_ieta_{}'.format(i))
        background_efficiencies_diff_graph.SetName(
            'efficiencies_background_ieta_{}'.format(i))
        optimal_points_graph.SetName(
            'signal_background_optimal_points_ieta_{}'.format(i))
        signal_efficiencies_diff_graphs.append(signal_efficiencies_diff_graph)
        background_efficiencies_diff_graphs.append(
            background_efficiencies_diff_graph)
        optimal_points_graphs.append(optimal_points_graph)
        optimal_points.append(optimal_point)

    return signal_efficiencies_diff_graphs, background_efficiencies_diff_graphs, optimal_points_graphs, optimal_points
Exemple #37
0
epochs=2000
number_hidden_nodes=[20,15,1]
number_layers=len(number_hidden_nodes)
activations=['tanh','tanh','tanh']
batch_size=128
filenameBkg = 'hadded/uhh2.AnalysisModuleRunner.MC.TTbar.root'
filenameSig = 'hadded/uhh2.AnalysisModuleRunner.MC.TstarTstar_M-Combined.root'
split_train_test = 0.7
z_mean = 1.0
z_sigma = 0.1
reduced_training = True
reduced_dimension = 10

##########################################################################

arrBkg = pandas.DataFrame(root2array(filenameBkg, treename='AnalysisTree',branches=branches_to_analyze))
arrSig = pandas.DataFrame(root2array(filenameSig, treename='AnalysisTree',branches=branches_to_analyze))

#save the numpy format
if(save_numpy_format):
    outfileBkg = 'outBkg.npy'
    np.save(outfileBkg,arrBkg) #save as numpy

    outfileSig = 'outSig.npy'
    np.save(outfileSig,arrSig) #save as numpy

#define train and test arrays for training

msk = np.random.rand(len(arrBkg)) < split_train_test
train_sample = arrBkg[msk]
test_sample = arrBkg[~msk]
Exemple #38
0
import numpy as np  #1.11.0
from root_numpy import root2array, array2root, list_branches  #4.6.0

#Concatenate different root files

#list of the pathes to the different root files to concatenate
path_sig = [
    "/data/lhcb/marin/hhpi0gamma/radiativehhpi0RG_R16S28r1p1_MC_2.root",
    "/data/lhcb/marin/hhpi0gamma/radiativehhpi0RG_R16S28r1p1_MC.root",
    "/data/lhcb/marin/hhpi0gamma/radiativehhpi0RG_R16S28r1p1_MC_4.root",
    "/data/lhcb/marin/hhpi0gamma/radiativehhpi0RG_R16S28r1p1_MC_5.root",
    "/data/lhcb/marin/hhpi0gamma/radiativehhpi0RG_R16S28r1p1_MC_6.root",
    "/data/lhcb/marin/hhpi0gamma/radiativehhpi0RG_R16S28r1p1_MC_7.root"
]
#path where to save the concatenated file
path_concat = "/users/LHCb/corentin/radiative_dataset/data/concat_signal.root"

data = []
for path in path_sig:
    data += [root2array(filenames=path)]

print("import complete")

signal = np.concatenate(data)
print('concatenation complete')

array2root(signal, path_concat, mode='recreate')
print('export complete')
input1 = "../../../storage/cc14398/Co60-10s-15Mar/Co60-10s-PS1m.root"
input2 = "../../../storage/cc14398/Cs137-10s-18Mar/PS1m.root"
#input2="SavedData/Shielding/Isotropic/1Mar-PSonly-Co60-33MBq-100ms-3m-Bp.root"
#input3="SavedData/Shielding/Isotropic/6Mar-Co60-37MBq-100ms-2m-Phantom.root"
#input4="SavedData/Shielding/Isotropic/6Mar-Co60-37MBq-100ms-8m-Phantom.root"
#input2="SavedData/Shielding/ParticleGun/5Mar-Co60-PartGun-Bp-1m.root"
#input3="SavedData/Shielding/ParticleGun/5Mar-Co60-PartGun-Ph-1m.root"
#input4="SavedData/Shielding/Isotropic/1Mar-PSonly-Co60-33MBq-100ms-8m-Bp.root"

#input1="../../../storage/cc14398/Co60-10s-15Mar/Co60-10s-PS1m.root"
#input2="../../../storage/cc14398/Co60-10s-15Mar/Co60-10s-PS3m.root"
#input3="../../../storage/cc14398/Co60-10s-15Mar/Co60-10s-PS7m.root"
#input4="../../../storage/cc14398/Co60-10s-15Mar/Co60-10s-PS8m.root"

KE1 = root2array(input1, treename="PhaseSpace", branches="Ekine")
KE2 = root2array(input2, treename="PhaseSpace", branches="Ekine")
#KE3=root2array(input3,treename="PhaseSpace",branches="Ekine")
#KE4=root2array(input4,treename="PhaseSpace",branches="Ekine")

#dX=root2array(inputFile,treename="PhaseSpace", branches="dX")

plt.hist(KE1, bins=20, histtype='step', label='Co 60')
plt.hist(KE2, bins=10, histtype='step', label='Cs 137')
#plt.hist2d(E_kinetic,dX)

plt.xlabel('Kinetic Energy (MeV)')
plt.ylabel('Counts')

#fig, ax1 = plt.subplots()
#ax1.hist([KE1,KE2,KE3], label=['1m','3m','7m'],bins=15,density=True)
Exemple #40
0
        os.mkdir(output)

    if isinstance(inputs, (list,)):
        treeName = list_trees(inputs[0])
    else:
        treeName = list_trees(inputs)
        inputs = [inputs, ]

    if (len(treeName) > 1):
        print("more then one tree in file ... specify, which tree to use")
        exit()



    print(">>> Load Events in gen acceptance")
    dfGen = [tree_to_df(root2array(i, treeName[0], selection=selection, branches=branches), 1) for i in inputs]
    dfGen = pd.concat(dfGen)

    dfGen = dfGen.rename(columns={
        'Muon_ID[muon_genRecoObj]_0': 'muon_ID',
        'Muon_ID[antiMuon_genRecoObj]_0': 'antiMuon_ID',
        'Muon_triggerBits[muon_genRecoObj]_0': 'muon_triggerBits',
        'Muon_triggerBits[antiMuon_genRecoObj]_0': 'antiMuon_triggerBits',
        'Muon_tkRelIso[muon_genRecoObj]_0': 'muon_tkIso',
        'Muon_tkRelIso[antiMuon_genRecoObj]_0': 'antiMuon_tkIso',
        'Muon_pfRelIso04_all[muon_genRecoObj]_0': 'muon_pfIso',
        'Muon_pfRelIso04_all[antiMuon_genRecoObj]_0': 'antiMuon_pfIso',
        'Muon_eta[muon_genRecoObj]_0': 'muon_eta',
        'Muon_eta[antiMuon_genRecoObj]_0': 'antiMuon_eta',
        'Muon_pt[muon_genRecoObj]_0': 'muon_pt',
        'Muon_pt[antiMuon_genRecoObj]_0': 'antiMuon_pt',
pars = []
xsec_list = []
tracks_per_evt = []

for path in file_path:
    name = os.path.basename(path[:-14])
    print name

    out = re.search('_M[0-9]+_', name)
    out = out.group(0)
    stopmass = int(out[2:-1])

    # if stopmass >300 : continue

    t = root2array(path)
    # branches:
    # ['Nev','tof_reco', 'PID', 'tof_gen',
    # 'P_reco', 'vtx_SumPT2', 'vtx_NDOF','vtx_SumPT',
    # 'Zout','Tout','pt','ctgtheta','phi','d0','dz', 'L',
    # 'sigma_pt', 'sigma_d0', 'sigma_dz', 'sigma_Tin',
    # 'M_reco', 'beta_reco'
    # ]

    c_pt = rt.TCanvas('c_' + name, 'c_' + name, 1600, 600)
    c_pt.Divide(3, 1)

    bsm_sel = np.logical_and(
        np.abs(t['PID']) >= 1000612,
        np.abs(t['PID']) <= 1093334)
    kin_sel = np.logical_and(
Exemple #42
0
def main(args):
    ### Perpare data for processing

    # Ensure output directory exists
    out_path = args.data_dir + 'results/'
    if not os.path.exists(out_path):
        os.makedirs(out_path)

    ## Load files
    l_fit_vars = [
        'logDIRA', 'log_bplus_IPCHI2_OWNPV', 'bplus_LOKI_DTF_CHI2NDOF',
        'log_bplus_FDCHI2_OWNPV', 'bplus_ETA', 'log_1_IPCHI2_OWNPV',
        'log_2_IPCHI2_OWNPV', 'log_3_IPCHI2_OWNPV', 'log_4_IPCHI2_OWNPV',
        'log_5_IPCHI2_OWNPV', 'mu_PT_max', 'mu_PT_min'
    ]
    l_mass_vars = ['scaledmass', 'mjpipi']
    l_load_branches = l_fit_vars + l_mass_vars
    # Load files into arrays
    print('*** Loading Data ***')
    a_mc_x = root_numpy.root2array(args.data_dir + 'mc_x_proba.root',
                                   treename=args.tree_name,
                                   branches=l_load_branches)
    a_mc_p = root_numpy.root2array(args.data_dir + 'mc_p_proba.root',
                                   treename=args.tree_name,
                                   branches=l_load_branches)
    a_side = root_numpy.root2array(args.data_dir + 'side_proba.root',
                                   treename=args.tree_name,
                                   branches=l_load_branches)
    a_data = root_numpy.root2array(args.data_dir + 'data_proba.root',
                                   treename=args.tree_name,
                                   branches=l_load_branches)

    print('*** Processing Data ***')
    # Convert to DataFrames
    df_mc_x = pd.DataFrame(a_mc_x)
    df_mc_p = pd.DataFrame(a_mc_p)
    df_side = pd.DataFrame(a_side)
    df_data = pd.DataFrame(a_data)
    # Add categoriastion
    df_mc_x['cat'] = 'mc_x'
    df_mc_p['cat'] = 'mc_p'
    df_side['cat'] = 'side'
    # Add target
    df_mc_x['class'] = 1
    df_mc_p['class'] = 1
    df_side['class'] = 0
    # Combine into training set
    df_train = pd.concat([df_mc_x, df_mc_p, df_side])
    # Print summary stats
    print('   *** Data loaded ***')
    print('   *** Training events: %d ***' % (df_train.shape[0]))
    print('   ***     Data events: %d ***' % (df_data.shape[0]))

    # Dictionaries for storing information on each run
    d_run_info = {}
    d_roc_plot = {}

    ### Estimate signal yield - all data
    d_sig_est_alldata = fit_doubleCB(pd.concat([df_mc_x, df_mc_p
                                                ])['scaledmass'].as_matrix(),
                                     df_data['scaledmass'].as_matrix(),
                                     out_path,
                                     s_info='alldata_signal_est')
    ### Estimate signal yield - X region
    s0 = None
    if args.find_s0:
        df_data_p = df_data[(df_data['mjpipi'] > 3676)
                            & (df_data['mjpipi'] < 3696)]
        df_data_x = df_data[(df_data['mjpipi'] > 3862)
                            & (df_data['mjpipi'] < 3882)]
        d_sig_est_p = fit_doubleCB(
            df_mc_p['scaledmass'].as_matrix(),
            df_data_p['scaledmass'].as_matrix(),
            out_path,
            s_info='psi(2S)_s0_est',
        )
        d_sig_est_x = fit_doubleCB(
            df_mc_x['scaledmass'].as_matrix(),
            df_data_x['scaledmass'].as_matrix(),
            out_path,
            s_info='x(3872)_s0_est',
        )
        print("*** Expected psi(2S) signal yield: %d ***" %
              (d_sig_est_p['data_sig_yield']))
        print("*** Expected X(3872) signal yield: %d ***" %
              (d_sig_est_x['data_sig_yield']))
        print("*** Expected X(3823) signal yield: %d ***" %
              (float(d_sig_est_x['data_sig_yield']) / 20.))
        s0 = float(d_sig_est_x['data_sig_yield']) / 20.
        d_run_info['sig_est_x_reg'] = d_sig_est_x

    print('*** Performing run %s ***' % (run))
    d_run_info[run] = {}
    d_roc_plot[run] = {}

    out_path_plots = out_path + 'plots/'
    if not os.path.exists(out_path_plots):
        os.makedirs(out_path_plots)

    if args.opt_cut is None:
        ### Find optimal cut
        print('   *** Determining optimal cut ***')
        # Optimise the probability cut
        sig_effs_mcp = []  # record signal efficiencies - on MC psi(2S) only
        sig_effs_mcx = []  # record signal efficiencies - on MC X(3823) only
        sig_effs_all = []  # record signal efficiencies
        bgr_rejs = []  # record background rejections
        cut_scores = []  # record cut optimisation metric
        # Determine cut metric for a range of cuts
        cuts = np.linspace(.0, 1., 200, endpoint=False)
        for prob_threshold in cuts:
            # Determine how many predictions are correct
            signal_efficiency_mcx = float(
                df_train[(df_train['prob_' + run] > prob_threshold)
                         & (df_train['cat'] == 'mc_x')].shape[0]) / float(
                             df_train[df_train['cat'] == 'mc_x'].shape[0])
            signal_efficiency_mcp = float(
                df_train[(df_train['prob_' + run] > prob_threshold)
                         & (df_train['cat'] == 'mc_p')].shape[0]) / float(
                             df_train[df_train['cat'] == 'mc_p'].shape[0])
            signal_efficiency_all = float(
                df_train[(df_train['prob_' + run] > prob_threshold)
                         & (df_train['class'] == 1)].shape[0]) / float(
                             df_train[df_train['class'] == 1].shape[0])
            background_rejection = float(
                df_train[(df_train['prob_' + run] > prob_threshold)
                         & (df_train['class'] == 0)].shape[0]) / float(
                             df_train[df_train['class'] == 0].shape[0])
            # Store scores
            sig_effs_all.append(signal_efficiency_all)
            sig_effs_mcp.append(signal_efficiency_mcp)
            sig_effs_mcx.append(signal_efficiency_mcx)
            bgr_rejs.append(background_rejection)
            # Optimize cut
            eff = signal_efficiency_all
            a = 5.  # expected significance
            # Background events, scaled to 40MeV window about B peak, considering only those in X(3823) region
            B = df_train[((df_train['prob_' + run] > prob_threshold)) & (
                (df_train['scaledmass'] > 5400.) &
                (df_train['scaledmass'] < 5450.)) &
                         ((df_train['mjpipi'] > 3773) &
                          (df_train['mjpipi'] < 3873))].shape[0] * .8
            if s0 is not None:
                cut_scores.append((s0 * eff) / sqrt((s0 * eff) + B))
            else:
                cut_scores.append(eff / ((a / 2) + sqrt(B)))
        # Find optimal cut
        if args.bck_cut:  # Hard cut at 99% background rejection
            cut_index = np.argmax(np.array(bgr_rejs) > .99)
            print("Background used: {:.3f}".format(bgr_rejs[cut_index]))
            prob_threshold = cuts[cut_index]
        else:  # Base on cut optimisation metric
            cut_index = np.argmax(cut_scores)
            prob_threshold = cuts[cut_index]

        ### Store some parameters of interest
        d_run_info[run]['optimal_cut'] = np.asscalar(prob_threshold)
        d_run_info[run]['all_signal_efficiency'] = sig_effs_all[cut_index]
        d_run_info[run]['mcj_signal_efficiency'] = sig_effs_mcp[cut_index]
        d_run_info[run]['mcx_signal_efficiency'] = sig_effs_mcx[cut_index]
        d_roc_plot[run]['sig_effs'] = sig_effs_all
        d_roc_plot[run]['bgr_rejs'] = bgr_rejs
        ### Print some summary stats
        print('   ***           Optimal cut: %1.2f ***' % (prob_threshold))
        print('   ***     Signal efficiency: %1.2f ***' %
              (d_run_info[run]['all_signal_efficiency']))
        print('   *** MCJ Signal efficiency: %1.2f ***' %
              (d_run_info[run]['mcj_signal_efficiency']))
        print('   *** MCX Signal efficiency: %1.2f ***' %
              (d_run_info[run]['mcx_signal_efficiency']))
    else:
        prob_threshold = float(args.opt_cut)

    ### Apply model to data
    df_data['class'] = df_data['prob_' + run] > prob_threshold

    ### Plot cut optimisation
    print('   *** Plotting cut optimisation ***')
    fig = plt.figure()
    plt.plot(cuts, cut_scores)
    plt.ylabel("Cut Score")
    plt.xlabel("Probability Threshold")
    plt.xlim(0., 1.)
    plt.title("Cut Score " + run)
    plt.tight_layout(pad=2.0)
    fig.savefig(out_path_plots + 'cut_score.pdf')
    plt.close()

    ### Plot mass histogram for optimal cut
    print('   *** Plotting Mass histograms ***')
    # Initialise canvas
    c_name = 'B_Mass_Distribution ' + run
    c = ROOT.TCanvas(c_name, c_name, 600, 400)
    c.cd()
    # Select required quantity
    a_raw = df_data['scaledmass'].as_matrix()
    a_cut = df_data[df_data['class'] == 1]['scaledmass'].as_matrix()
    # Create and format histograms
    h_raw = ROOT.TH1F(
        c_name + '_No_Cut', c_name +
        '_No_Cut;B Mass [MeV/#it{c}^{2}];candidates/18[MeV/#it{c}^{2}]', 100,
        5220., 5400.)
    h_cut = ROOT.TH1F(
        c_name + '_XGB_Cut', c_name +
        '_XGB_Cut;B Mass [MeV/#it{c}^{2}];candidates/18[MeV/#it{c}^{2}]', 100,
        5220., 5400.)
    # Fill histograms
    map(h_raw.Fill, a_raw)
    map(h_cut.Fill, a_cut)
    # Normalise
    ## Make it pretty
    h_raw.SetTitle('B Mass Distribution ' + run)
    # Format for each case of x-axis
    h_raw.GetYaxis().SetTitleOffset(1.6)
    y_max = 1.1 * max(h_raw.GetBinContent(h_raw.GetMaximumBin()),
                      h_cut.GetBinContent(h_cut.GetMaximumBin()))
    y_min = 0.9 * min(h_raw.GetBinContent(h_raw.GetMinimumBin()),
                      h_cut.GetBinContent(h_cut.GetMinimumBin()))
    h_raw.GetYaxis().SetRangeUser(y_min, y_max)
    # Format plotting style
    h_raw.SetLineColor(ROOT.kRed)
    h_raw.SetFillColorAlpha(ROOT.kRed - 10, 0.7)
    h_cut.SetLineColor(ROOT.kBlue)
    h_cut.SetFillColorAlpha(ROOT.kBlue - 10, 0.7)
    # Remove stats boxes
    h_raw.SetStats(False)
    h_cut.SetStats(False)
    # Print
    h_raw.Draw('HIST')
    h_cut.Draw('HISTsame')
    # Create legend
    leg = ROOT.TLegend(0.6, 0.75, 0.9, 0.9)
    leg.AddEntry(h_raw, 'Uncut Data', 'L')
    leg.AddEntry(h_cut, 'XGBoost cut: {:.3f}'.format(prob_threshold), 'L')
    leg.SetLineColor(0)
    leg.SetLineStyle(0)
    leg.SetFillStyle(0)
    leg.SetBorderSize(0)
    leg.Draw('same')
    # Save
    c.SaveAs(out_path_plots + 'Mass_histogram_B_XGBcut.pdf')

    ### BDT answer
    print('   *** Plotting classification probabilities ***')
    # Initialise canvas
    c_name = 'BDT_Predicted_Probability ' + run
    c = ROOT.TCanvas(c_name, c_name, 600, 400)
    c.cd()
    # Select required quantity
    a_train_sig_prob = df_train['prob_' +
                                run][df_train['class'] == 1].as_matrix()
    a_train_bkg_prob = df_train['prob_' +
                                run][df_train['class'] == 0].as_matrix()
    a_data_prob = df_data['prob_' + run].as_matrix()
    # Create and format histograms
    h_train_sig_prob = ROOT.TH1F(c_name + '_Sig_Prob',
                                 c_name + '_Sig_Prob;Probability;Candidates',
                                 100, 0., 1.)
    h_train_bkg_prob = ROOT.TH1F(c_name + '_Bkg_Prob',
                                 c_name + '_Bkg_Prob;Probability;Candidates',
                                 100, 0., 1.)
    h_data_prob = ROOT.TH1F(c_name + '_Data_Prob',
                            c_name + '_Data_Prob;Probability;Candidates', 100,
                            0., 1.)
    # Fill histograms
    map(h_train_sig_prob.Fill, a_train_sig_prob)
    map(h_train_bkg_prob.Fill, a_train_bkg_prob)
    map(h_data_prob.Fill, a_data_prob)
    # Normalise
    h_train_sig_prob.Scale(1. / h_train_sig_prob.Integral())
    h_train_bkg_prob.Scale(1. / h_train_bkg_prob.Integral())
    h_data_prob.Scale(1. / h_data_prob.Integral())
    ## Make it pretty
    h_train_sig_prob.SetTitle('Event Probability Distribution ' + run)
    # Format for each case of x-axis
    h_train_sig_prob.GetYaxis().SetTitleOffset(1.6)
    y_max = 1.1 * max(
        max(h_train_sig_prob.GetBinContent(h_train_sig_prob.GetMaximumBin()),
            h_train_bkg_prob.GetBinContent(h_train_bkg_prob.GetMaximumBin())),
        h_data_prob.GetBinContent(h_data_prob.GetMaximumBin()))
    y_min = 0.9 * min(
        min(h_train_sig_prob.GetBinContent(h_train_sig_prob.GetMinimumBin()),
            h_train_bkg_prob.GetBinContent(h_train_bkg_prob.GetMinimumBin())),
        h_data_prob.GetBinContent(h_data_prob.GetMinimumBin()))
    h_train_sig_prob.GetYaxis().SetRangeUser(y_min, y_max)
    # Format plotting style
    h_train_sig_prob.SetLineColor(ROOT.kRed)
    h_train_sig_prob.SetFillColorAlpha(ROOT.kRed - 10, 0.7)
    h_train_bkg_prob.SetLineColor(ROOT.kBlue)
    h_train_bkg_prob.SetFillColorAlpha(ROOT.kBlue - 10, 0.7)
    h_data_prob.SetLineColor(ROOT.kGreen)
    h_data_prob.SetFillColorAlpha(ROOT.kGreen - 10, 0.7)
    # Remove stats boxes
    h_train_sig_prob.SetStats(False)
    h_train_bkg_prob.SetStats(False)
    h_data_prob.SetStats(False)
    # Print
    h_train_sig_prob.Draw('HIST')
    h_train_bkg_prob.Draw('HISTsame')
    h_data_prob.Draw('HISTsame')
    # Create legend
    leg = ROOT.TLegend(0.6, 0.75, 0.9, 0.9)
    leg.AddEntry(h_train_sig_prob, 'Training Signal Events', 'L')
    leg.AddEntry(h_train_bkg_prob, 'Training Background Events', 'L')
    leg.AddEntry(h_data_prob, 'Data Events', 'L')
    leg.SetLineColor(0)
    leg.SetLineStyle(0)
    leg.SetFillStyle(0)
    leg.SetBorderSize(0)
    leg.Draw('same')
    # Save
    c.SaveAs(out_path_plots + 'BDT_answer.pdf')

    ### Plot variable distributions
    print('   *** Plotting training variable distributions ***')
    out_path_var = out_path_plots + 'dist_vars/'
    if not os.path.exists(out_path_var):
        os.makedirs(out_path_var)
    for var in D_CONFIGS[run]['fit_vars'] + ['bplus_PT'] + ['prob_' + run]:
        # Initialise canvas
        c_name = var + '_Distribution_' + run
        c = ROOT.TCanvas(c_name, c_name, 600, 400)
        c.cd()
        # Select required quantity
        a_plt_sig = df_train[var][df_train['prob_' +
                                           run] >= prob_threshold].as_matrix()
        a_plt_bkg = df_train[var][df_train['prob_' +
                                           run] < prob_threshold].as_matrix()
        # Scale DIRA and IPCHI2
        i_str = ''
        if (var == 'bplus_DIRA_OWNPV'):
            a_plt_sig = np.arccos(a_plt_sig)
            a_plt_bkg = np.arccos(a_plt_bkg)
            i_str = 'arccos '
        if ('CHI2' in var):
            a_plt_sig = np.log(a_plt_sig)
            a_plt_bkg = np.log(a_plt_bkg)
            i_str = 'log '
        # Create and format histograms
        x_max = max(max(a_plt_sig), max(a_plt_bkg))
        x_min = min(min(a_plt_sig), min(a_plt_bkg))
        h_plt_sig = ROOT.TH1F(c_name + '_Sig',
                              c_name + '_Sig;' + i_str + var + ';candidates',
                              100, x_min, x_max)
        h_plt_bkg = ROOT.TH1F(c_name + '_Bkg',
                              c_name + '_Bkg;' + i_str + var + ';candidates',
                              100, x_min, x_max)
        # Fill histograms
        map(h_plt_sig.Fill, a_plt_sig)
        map(h_plt_bkg.Fill, a_plt_bkg)
        ## Make it pretty
        h_plt_sig.SetTitle(var + ' Distribution ' + run)
        # Format for each case of x-axis
        h_plt_sig.GetYaxis().SetTitleOffset(1.6)
        y_max = 1.1 * max(h_plt_sig.GetBinContent(h_plt_sig.GetMaximumBin()),
                          h_plt_bkg.GetBinContent(h_plt_bkg.GetMaximumBin()))
        h_plt_sig.GetYaxis().SetRangeUser(0, y_max)
        h_plt_sig.GetXaxis().SetRangeUser(x_min, x_max)
        # Format plotting style
        h_plt_sig.SetLineColor(ROOT.kRed)
        h_plt_sig.SetFillColorAlpha(ROOT.kRed - 10, 0.7)
        h_plt_bkg.SetLineColor(ROOT.kBlue)
        h_plt_bkg.SetFillColorAlpha(ROOT.kBlue - 10, 0.7)
        # Remove stats boxes
        h_plt_sig.SetStats(False)
        h_plt_bkg.SetStats(False)
        # Print
        h_plt_sig.Draw('HIST')
        h_plt_bkg.Draw('HISTsame')
        # Create legend
        leg = ROOT.TLegend(0.6, 0.75, 0.9, 0.9)
        leg.AddEntry(h_plt_sig, 'Training Events Identified as Signal', 'L')
        leg.AddEntry(h_plt_bkg, 'Training Events Identified as Background',
                     'L')
        leg.SetLineColor(0)
        leg.SetLineStyle(0)
        leg.SetFillStyle(0)
        leg.SetBorderSize(0)
        leg.Draw('same')
        # Save
        c.SaveAs(out_path_var + var + '.pdf')

        ### Plot comparison to MC data
        out_path_mcp_data = out_path_plots + 'mcp_v_data/'
        if not os.path.exists(out_path_mcp_data):
            os.makedirs(out_path_mcp_data)
        print('   *** Plotting comparison to psi(2S) MC ***')
        df_data_comp = df_data[(
            (df_data['mjpipi'] < 3696) & (df_data['mjpipi'] > 3676))
                               & ((df_data['scaledmass'] < 5299)
                                  & (df_data['scaledmass'] > 5259))]
        df_side_comp = df_side[(df_side['mjpipi'] < 3696)
                               & (df_side['mjpipi'] > 3676)]
        for var in D_CONFIGS[run]['fit_vars'] + ['bplus_PT'] + ['prob_' + run]:
            # Initialise canvas
            c_name = var + '_MC_#psi(2S)_Comparison_' + run
            c = ROOT.TCanvas(c_name, c_name, 600, 400)
            c.cd()
            # Select required quantity
            a_mc_p = df_mc_p[var].as_matrix()
            a_data_comp = df_data_comp[var].as_matrix()
            a_side_comp = df_side_comp[var].as_matrix()
            # Scale DIRA and IPCHI2
            i_str = ''
            if (var == 'bplus_DIRA_OWNPV'):
                a_mc_p = np.arccos(a_mc_p)
                a_data_comp = np.arccos(a_data_comp)
                a_side_comp = np.arccos(a_side_comp)
                i_str = 'arccos '
            #if ('CHI2' in var):
            #    a_mc_p = np.log(a_mc_p)
            #    a_data_comp = np.log(a_data_comp)
            #    a_side_comp = np.log(a_side_comp)
            #    i_str = 'log '
            # Create and format histograms
            x_max = max(max(a_mc_p), max(a_data_comp))
            x_min = min(min(a_mc_p), min(a_data_comp))
            h_mc_p = ROOT.TH1F(
                c_name + '_mc_#psi',
                c_name + '_mc_#psi;' + i_str + var + ';candidates', 100, x_min,
                x_max)
            h_comp = ROOT.TH1F(c_name + '_data',
                               c_name + '_data;' + i_str + var + ';candidates',
                               100, x_min, x_max)
            h_side = ROOT.TH1F(c_name + '_side',
                               c_name + '_side;' + i_str + var + ';candidates',
                               100, x_min, x_max)
            # Fill histograms
            map(h_mc_p.Fill, a_mc_p)
            map(h_comp.Fill, a_data_comp)
            map(h_side.Fill, a_side_comp)
            # Background reduce
            h_comp.Add(h_side, -1)
            # Normalise
            h_mc_p.Scale(1. / h_mc_p.Integral())
            h_comp.Scale(1. / h_comp.Integral())
            h_side.Scale(1. / h_side.Integral())
            ## Make it pretty
            h_mc_p.SetTitle(var + ' Data vs MC J(2S) Distribution ' + run)
            # Format for each case of x-axis
            h_mc_p.GetYaxis().SetTitleOffset(1.6)
            y_max = 1.1 * max((h_mc_p.GetBinContent(h_mc_p.GetMaximumBin()),
                               h_comp.GetBinContent(h_comp.GetMaximumBin()),
                               h_side.GetBinContent(h_side.GetMaximumBin())))
            y_min = 0.9 * min((h_mc_p.GetBinContent(h_mc_p.GetMinimumBin()),
                               h_comp.GetBinContent(h_comp.GetMinimumBin()),
                               h_side.GetBinContent(h_side.GetMinimumBin())))
            h_mc_p.GetYaxis().SetRangeUser(y_min, y_max)
            # Format plotting style
            h_mc_p.SetLineColor(ROOT.kRed)
            h_mc_p.SetFillColorAlpha(ROOT.kRed - 10, 0.7)
            h_comp.SetLineColor(ROOT.kBlue)
            h_comp.SetFillColorAlpha(ROOT.kBlue - 10, 0.7)
            h_side.SetLineColor(ROOT.kGreen)
            h_side.SetFillColorAlpha(ROOT.kGreen - 10, 0.7)
            # Remove stats boxes
            h_mc_p.SetStats(False)
            h_comp.SetStats(False)
            h_side.SetStats(False)
            # Print
            h_mc_p.Draw('HIST')
            h_comp.Draw('HISTsame')
            h_side.Draw('HISTsame')
            # Create legend
            leg = ROOT.TLegend(0.6, 0.75, 0.9, 0.9)
            leg.AddEntry(h_mc_p, '#psi(2S) Monte-Carlo', 'L')
            leg.AddEntry(h_comp, 'Background Reduced Data in #psi(2S) Region',
                         'L')
            leg.AddEntry(h_side, 'Background Data in #psi(2S) Region', 'L')
            leg.SetLineColor(0)
            leg.SetLineStyle(0)
            leg.SetFillStyle(0)
            leg.SetBorderSize(0)
            leg.Draw('same')
            # Save
            c.SaveAs(out_path_mcp_data + var + '.pdf')

        ### Plot comparison to MC data
        out_path_mcx_data = out_path_plots + 'mcx_v_data/'
        if not os.path.exists(out_path_mcx_data):
            os.makedirs(out_path_mcx_data)
        print('   *** Plotting comparison to X(3872) MC ***')
        df_data_comp = df_data[(
            (df_data['mjpipi'] < 3882) & (df_data['mjpipi'] > 3862))
                               & ((df_data['scaledmass'] < 5299)
                                  & (df_data['scaledmass'] > 5259))]
        df_side_comp = df_side[(df_side['mjpipi'] < 3882)
                               & (df_side['mjpipi'] > 3862)]
        for var in D_CONFIGS[run]['fit_vars'] + ['bplus_PT'] + ['prob_' + run]:
            # Initialise canvas
            c_name = var + '_MC_X(3872)_Comparison_' + run
            c = ROOT.TCanvas(c_name, c_name, 600, 400)
            c.cd()
            # Select required quantity
            a_mc_p = df_mc_x[var].as_matrix()
            a_data_comp = df_data_comp[var].as_matrix()
            a_side_comp = df_side_comp[var].as_matrix()
            # Scale DIRA and IPCHI2
            i_str = ''
            if (var == 'bplus_DIRA_OWNPV'):
                a_mc_p = np.arccos(a_mc_p)
                a_data_comp = np.arccos(a_data_comp)
                a_side_comp = np.arccos(a_side_comp)
                i_str = 'arccos '
            if ('CHI2' in var):
                a_mc_p = np.log(a_mc_p)
                a_data_comp = np.log(a_data_comp)
                a_side_comp = np.log(a_side_comp)
                i_str = 'log '
            # Create and format histograms
            x_max = max(max(a_mc_p), max(a_data_comp))
            x_min = min(min(a_mc_p), min(a_data_comp))
            h_mc_p = ROOT.TH1F(
                c_name + '_mc_#psi',
                c_name + '_mc_#psi;' + i_str + var + ';candidates', 100, x_min,
                x_max)
            h_comp = ROOT.TH1F(c_name + '_data',
                               c_name + '_data;' + i_str + var + ';candidates',
                               100, x_min, x_max)
            h_side = ROOT.TH1F(c_name + '_side',
                               c_name + '_side;' + i_str + var + ';candidates',
                               100, x_min, x_max)
            # Fill histograms
            map(h_mc_p.Fill, a_mc_p)
            map(h_comp.Fill, a_data_comp)
            map(h_side.Fill, a_side_comp)
            # Background reduce
            h_comp.Add(h_side, -1)
            # Normalise
            h_mc_p.Scale(1. / h_mc_p.Integral())
            h_comp.Scale(1. / h_comp.Integral())
            h_side.Scale(1. / h_side.Integral())
            ## Make it pretty
            h_mc_p.SetTitle(var + ' Data vs MC X(3872) Distribution ' + run)
            # Format for each case of x-axis
            h_mc_p.GetYaxis().SetTitleOffset(1.6)
            y_max = 1.1 * max((h_mc_p.GetBinContent(h_mc_p.GetMaximumBin()),
                               h_comp.GetBinContent(h_comp.GetMaximumBin()),
                               h_side.GetBinContent(h_side.GetMaximumBin())))
            y_min = 0.9 * min((h_mc_p.GetBinContent(h_mc_p.GetMinimumBin()),
                               h_comp.GetBinContent(h_comp.GetMinimumBin()),
                               h_side.GetBinContent(h_side.GetMinimumBin())))
            h_mc_p.GetYaxis().SetRangeUser(y_min, y_max)
            # Format plotting style
            h_mc_p.SetLineColor(ROOT.kRed)
            h_mc_p.SetFillColorAlpha(ROOT.kRed - 10, 0.7)
            h_comp.SetLineColor(ROOT.kBlue)
            h_comp.SetFillColorAlpha(ROOT.kBlue - 10, 0.7)
            h_side.SetLineColor(ROOT.kGreen)
            h_side.SetFillColorAlpha(ROOT.kGreen - 10, 0.7)
            # Remove stats boxes
            h_mc_p.SetStats(False)
            h_comp.SetStats(False)
            h_side.SetStats(False)
            # Print
            h_mc_p.Draw('HIST')
            h_comp.Draw('HISTsame')
            h_side.Draw('HISTsame')
            # Create legend
            leg = ROOT.TLegend(0.6, 0.75, 0.9, 0.9)
            leg.AddEntry(h_mc_p, 'X(3872) Monte-Carlo', 'L')
            leg.AddEntry(h_comp, 'Background Reduced Data in X(3872) Region',
                         'L')
            leg.AddEntry(h_side, 'Background Data in X(3872) Region', 'L')
            leg.SetLineColor(0)
            leg.SetLineStyle(0)
            leg.SetFillStyle(0)
            leg.SetBorderSize(0)
            leg.Draw('same')
            # Save
            c.SaveAs(out_path_mcx_data + var + '.pdf')

        ### Plot comparison of MC data
        out_path_mc_mc = out_path_plots + 'mcp_v_mcx/'
        if not os.path.exists(out_path_mc_mc):
            os.makedirs(out_path_mc_mc)
        print('   *** Plotting comparison of psi(2S) MC and X(3972) MC ***')
        for var in D_CONFIGS[run]['fit_vars'] + ['bplus_PT'] + ['prob_' + run]:
            # Initialise canvas
            c_name = var + '_MC_Comparison_' + run
            c = ROOT.TCanvas(c_name, c_name, 600, 400)
            c.cd()
            # Select required quantity
            a_mc_p = df_mc_p[var].as_matrix()
            a_mc_x = df_mc_x[var].as_matrix()
            # Scale DIRA and IPCHI2
            i_str = ''
            if (var == 'bplus_DIRA_OWNPV'):
                a_mc_p = np.arccos(a_mc_p)
                a_mc_x = np.arccos(a_mc_x)
                i_str = 'arccos '
            if ('CHI2' in var):
                a_mc_p = np.log(a_mc_p)
                a_mc_x = np.log(a_mc_x)
                i_str = 'log '
            # Create and format histograms
            x_max = max(max(a_mc_p), max(a_mc_x))
            x_min = min(min(a_mc_p), min(a_mc_x))
            h_mc_p = ROOT.TH1F(
                c_name + '_mc_#psi',
                c_name + '_mc_#psi;' + i_str + var + ';candidates', 100, x_min,
                x_max)
            h_mc_x = ROOT.TH1F(c_name + '_mc_X',
                               c_name + '_mc_X;' + i_str + var + ';candidates',
                               100, x_min, x_max)
            # Fill histograms
            map(h_mc_p.Fill, a_mc_p)
            map(h_mc_x.Fill, a_mc_x)
            # Normalise
            h_mc_p.Scale(1. / h_mc_p.Integral())
            h_mc_x.Scale(1. / h_mc_x.Integral())
            ## Make it pretty
            h_mc_p.SetTitle(var + ' MC X(3872) vs MC #psi(2S) Distribution ' +
                            run)
            # Format for each case of x-axis
            h_mc_p.GetYaxis().SetTitleOffset(1.6)
            y_max = 1.1 * max(h_mc_p.GetBinContent(h_mc_p.GetMaximumBin()),
                              h_mc_x.GetBinContent(h_mc_x.GetMaximumBin()))
            y_min = 0.9 * min(h_mc_p.GetBinContent(h_mc_p.GetMinimumBin()),
                              h_mc_x.GetBinContent(h_mc_x.GetMinimumBin()))
            h_mc_p.GetYaxis().SetRangeUser(y_min, y_max)
            # Format plotting style
            h_mc_p.SetLineColor(ROOT.kRed)
            h_mc_p.SetFillColorAlpha(ROOT.kRed - 10, 0.7)
            h_mc_x.SetLineColor(ROOT.kBlue)
            h_mc_x.SetFillColorAlpha(ROOT.kBlue - 10, 0.7)
            # Remove stats boxes
            h_mc_p.SetStats(False)
            h_mc_x.SetStats(False)
            # Print
            h_mc_p.Draw('HIST')
            h_mc_x.Draw('HISTsame')
            # Create legend
            leg = ROOT.TLegend(0.6, 0.75, 0.9, 0.9)
            leg.AddEntry(h_mc_p, '#psi(2S) Monte-Carlo', 'L')
            leg.AddEntry(h_mc_x, 'X(3872) Monte-Carlo', 'L')
            leg.SetLineColor(0)
            leg.SetLineStyle(0)
            leg.SetFillStyle(0)
            leg.SetBorderSize(0)
            leg.Draw('same')
            # Save
            c.SaveAs(out_path_mc_mc + var + '.pdf')

        ## Perform fit to XGB cut data
        # Filter dataframes
        a_cut_mc = df_train[df_train['class'] == 1]['scaledmass'].as_matrix()
        a_cut_data = df_data[df_data['class'] == 1]['scaledmass'].as_matrix()
        # Fit
        d_cut_fit = fit_doubleCB(a_cut_mc,
                                 a_cut_data,
                                 out_path_plots,
                                 s_info='cut_data_plot')
        # Store params
        d_run_info[run]['cut_fit_params'] = d_cut_fit
        # Fit in X region only
        a_mc_x = df_train[df_train['cat'] == 'mc_x']['scaledmass'].as_matrix()
        a_data_x = df_data[(df_data['class'] == 1) & (
            (df_data['mjpipi'] > 3862)
            & (df_data['mjpipi'] < 3882))]['scaledmass'].as_matrix()
        d_sig_est = fit_doubleCB(a_mc_x,
                                 a_data_x,
                                 out_path_plots,
                                 s_info='x_signal_yield_est')
        d_run_info[run]['x_reg_fit_params'] = d_sig_est

        print('*** Estimated fitted signal efficiency: {:.3f} ***'.format(
            float(d_cut_fit['data_sig_yield']) /
            d_sig_est_alldata['data_sig_yield']))

    print('*** Plotting ROC curve ***')
    ### Plot ROC curve
    fig = plt.figure()
    for run in list(D_CONFIGS.keys()):
        plt.plot(d_roc_plot[run]['bgr_rejs'],
                 d_roc_plot[run]['sig_effs'],
                 label=run)
    plt.legend(loc=3)
    plt.ylabel("Background Rejection")
    plt.xlabel("Signal Efficiency")
    plt.xlim(0., 1.)
    plt.ylim(0., 1.)
    plt.title("ROC Curve")
    plt.tight_layout(pad=2.0)
    fig.savefig(out_path + 'ROC_curve.pdf')
    plt.close()

    print('*** Dumping run information ***')
    with open(out_path + args.out_dict, 'w') as outfile:
        yaml.dump(d_run_info, outfile, default_flow_style=False)
    with open(out_path + 'roc_plot.yml', 'w') as outfile:
        yaml.dump(d_roc_plot, outfile, default_flow_style=False)
Exemple #43
0
import pandas
import numpy

from hep_ml import reweight
from sklearn.cross_validation import train_test_split

from utils.plot import draw_distributions
from utils.stats import print_statistics

###############
# Import data #
###############

columns = ['hSPD', 'pt_b', 'pt_phi', 'vchi2_b', 'mu_pt_sum']

original = root_numpy.root2array('MC_distribution.root', branches=columns)
original = pandas.DataFrame(original)
target = root_numpy.root2array('RD_distribution.root', branches=columns)
target = pandas.DataFrame(target)

original_weights = numpy.ones(len(original))

##################################
# Prepare train and test samples #
##################################

# Divide original samples into training ant test parts
original_train, original_test = train_test_split(original)

# Divide target samples into training ant test parts
target_train, target_test = train_test_split(target)
Exemple #44
0
#filename = testdata.get_filepath('Nue_LowE.root')
#arr = root2array(filename, 'MCNeutrinoAna/pot_tree')

from root_numpy import root2array, tree2array
from root_numpy import testdata

filename = testdata.get_filepath('test.root')

# Convert a TTree in a ROOT file into a NumPy structured array
arr = root2array(filename, 'tree')
# The TTree name is always optional if there is only one TTree in the file

# Or first get the TTree from the ROOT file
import ROOT
rfile = ROOT.TFile(filename)
intree = rfile.Get('tree')

# and convert the TTree into an array
array = tree2array(intree)
print array
raw_input()
parser.add_argument('-a', '--alpha', type=str, default='0.01')
parser.add_argument('-g', '--gamma', type=str, default='0.01')
parser.add_argument('-s', '--step', type=int, default='20')
parser.add_argument('-o', '--offset', type=int, default='0')
args = parser.parse_args()

# specified parameters
apar = args.alpha
gpar = args.gamma
scaledown = args.step
offset = args.offset

# retrieve training data and official reco hadronic energy for comparison
X = root2array('../training_data.root',
               branches='calehad',
               selection='mustopz<1275&&isnumucc==1',
               step=scaledown,
               start=offset)
recoemu_official = root2array('../training_data.root',
                              branches='recoemu',
                              selection='mustopz<1275&&isnumucc==1',
                              step=scaledown,
                              start=offset)
trueenu = root2array('../training_data.root',
                     branches='trueenu',
                     selection='mustopz<1275&&isnumucc==1',
                     step=scaledown,
                     start=offset)
y = trueenu - recoemu_official
yoff = root2array('../training_data.root',
                  branches='recoehad',
Exemple #46
0
import os, sys
import ROOT
import numpy as np
import root_numpy as rn
import pandas as pd
from array import array

FILE = str(sys.argv[1])
print "GOT %s" % FILE
NAME = str(os.path.basename(FILE).split(".")[0])
print "NAME %s" % NAME
df = pd.DataFrame(rn.root2array(
    FILE, treename="analysistree/pottree"))[['run', 'subrun', 'pot']]
df['pot_fname'] = NAME
FOUT = "pot_%s.root" % NAME
tf = ROOT.TFile.Open(FOUT, "RECREATE")
print "OPEN %s" % FOUT
tf.cd()

run = array('i', [0])
subrun = array('i', [0])
pot = array('d', [0])
fname = ROOT.std.string()

tree = ROOT.TTree("pot_tree", "")

tree.Branch("run", run, "run/I")
tree.Branch("subrun", subrun, "subrun/I")
tree.Branch("pot", pot, "pot/D")
tree.Branch("pot_fname", fname)
Exemple #47
0
def write_h5_v4(folder,
                output_folder,
                file_name,
                xs,
                LUMI,
                counter,
                cols,
                tree_name="",
                counter_hist="",
                sel_cut="",
                obj_sel_cut="",
                verbose=True):
    print("    Opening ", folder)
    print("\n")
    if verbose:
        print("\n")
        #print("   Initialized df for sample: ", file_name)
        print("   Initialized df for sample: ", file_name)
    #print(cols)

    # loop over files, called file_name
    oldFile = TFile(folder + file_name, "READ")
    if (oldFile.GetListOfKeys().Contains(counter_hist) == False):
        return
    #counter = oldFile.Get(counter_hist)#).GetBinContent(1)
    #nevents_gen = counter.GetBinContent(1)
    nevents_gen = counter
    print("  n events gen.: ", nevents_gen)
    if (nevents_gen == 0):
        return
        print("   empty root file! ")
    oldTree = oldFile.Get(tree_name)
    nevents_tot = oldTree.GetEntries()  #?#-1
    #tree_weight = oldTree.GetWeight()
    tree_weight = LUMI * xs / nevents_gen
    print("   Tree weight:   ", tree_weight)

    if verbose:
        print("   Reading n. events in tree: ", nevents_tot)
        #print("\n")

    if nevents_tot <= 0:
        print("   Empty tree!!! ")
        return

    # First loop: check how many events are passing selections
    count = rnp.root2array(folder + file_name,
                           selection=sel_cut,
                           object_selection=obj_sel_cut,
                           treename=tree_name,
                           branches=["EventNumber"],
                           start=0,
                           stop=nevents_tot)
    nevents = count.shape[0]
    if verbose:
        print("   Cut applied: ", sel_cut)
        print("   Events passing cuts: ", nevents)
        print("\n")

    #avoid loop over variables, read all together
    #we have already zero padded
    startTime = time.time()
    b = rnp.root2array(folder + file_name,
                       selection=sel_cut,
                       object_selection=obj_sel_cut,
                       treename=tree_name,
                       branches=cols,
                       start=0,
                       stop=nevents_tot)
    df = pd.DataFrame(b)  #,columns=cols)

    #Remove dots from column names
    column_names = []
    for a in cols:
        if isinstance(a, tuple):
            column_names.append(a[0].replace('.', '_').replace('s[',
                                                               '_').replace(
                                                                   ']', ''))
        else:
            column_names.append(a.replace('.', '_'))
    df.columns = column_names
    print(df)

    #add is_signal flag
    df["is_signal"] = np.ones(nevents) if (
        ("n3n2" in folder) or ("H2ToSSTobbbb" in folder) or
        ("TChiHH" in folder)) else np.zeros(nevents)
    df["c_nEvents"] = np.ones(nevents) * nevents_gen
    df["EventWeight"] = df["EventWeight"] * tree_weight
    df["SampleWeight"] = np.ones(nevents) * tree_weight
    #print(df)
    print("\n")
    print("  * * * * * * * * * * * * * * * * * * * * * * *")
    print("  Time needed root2array: %.2f seconds" % (time.time() - startTime))
    print("  * * * * * * * * * * * * * * * * * * * * * * *")
    print("\n")

    #df.rename(columns={"nJets" : "nCHSJets"},inplace=True)
    if verbose:
        print(df)

    #shuffle
    df.sample(frac=1).reset_index(drop=True)

    print("  -------------------   ")
    print("  Events : ", df.shape[0])

    # Write h5
    if ".root" in file_name:
        file_name = file_name[:-5]
    df.to_hdf(output_folder + '/' + file_name + '.h5',
              'df',
              format='table' if (len(cols) <= 2000) else 'fixed')
    print("  " + output_folder + "/" + file_name + ".h5 stored")
    print("  -------------------   ")
Exemple #48
0
    base = os.path.basename(fname)
    match = fname_regex.match(base)
    if not match:
        raise ValueError("Could not match the regex to the file %s" % fname)
    flavor = match.group('flavor')
    full_category = match.group('category')
    category = [i for i in sv_categories if i in full_category][0]
    if flavor != args.signal and flavor != args.bkg:
        log.info(
            'flavour %s is not considered signal or background in this training and is omitted'
            % flavor)
        continue

    nfiles_per_sample = None
    tree = rootnp.root2array(fname, 'tree', variables, None, 0,
                             nfiles_per_sample, args.pickEvery, False,
                             'weight')
    tree = rootnp.rec2array(tree)
    X = np.concatenate((X, tree), 0)
    if flavor == args.signal:
        y = np.concatenate((y, np.ones(tree.shape[0])))
    else:
        y = np.concatenate((y, np.zeros(tree.shape[0])))

    # Getting the weights out
    ## if args.sample.lower() == 'qcd':
    ##    weights_tree = rootnp.root2array(fname,'tree','total_weight',None,0,nfiles_per_sample,args.pickEvery,False,'total_weight')
    ##    weights = np.concatenate((weights,weights_tree),0)
    ## else:
    ##    weights = np.concatenate((weights,np.ones(tree.shape[0])))
    
    return plt.show()
    


# In[49]:

## Visualize weights: Heat Map of neural network weights

dnn_weight_map( pipe_classifiers["DNN"].named_steps['kerasclassifier'])


# In[50]:

# Load dataset
rec_np_data = root2array("combined/run2016Data.root", 
                         "event_mvaVariables_step7_cate4", features)
np_data = rec2array(rec_np_data)


# convert to numpy ndarray into pandas dataframe
df_raw_data = pd.DataFrame(data=np_data, columns=features)

df_raw_data.describe()
df_raw_data.info()

X_data = df_raw_data.values


# In[51]:

# Plot a mva distribution
Exemple #50
0
def test_single_chain():
    f = load(['single1.root', 'single2.root'])
    a = rnp.root2array(f)
    check_single(a, 200)

start = 1000000
stop = 2000000

storage_output ="/mnt/storage/lborgna/BkgMatched/Final/"
out_file = "BkgAll6_HighPt_Test" +'.root'


fnew = ROOT.TFile(storage_output+out_file,"recreate")
Tree = ROOT.TTree("FlatSubstructureJetTree", "Reconst ntuple")

test = ROOT.TFile.Open(bkg_storage + bkg_file)
old_tree = test.Get("FlatSubstructureJetTree")

Wpt = rtnp.root2array(sig_storage + sig_file, treename = treename, selection = selection, branches = fjet_pt)
QCD_pt = rtnp.root2array(bkg_storage + bkg_file, treename = treename, selection = selectionQCD, branches = fjet_pt)

print(Wpt)
print(QCD_pt)

Nbins = 100
n, bins, patches = plt.hist(QCD_pt, Nbins, normed=False, facecolor='green', alpha=0.5)
nn, bbins, ppatches = plt.hist(Wpt, bins, normed=False, facecolor='red', alpha=0.5)

ratio = nn/(n+ 0.00000000001)
A = np.max(ratio)
ratio = (1/A) * ratio

cluster_E_entry = ROOT.vector('float')()
cluster_eta_entry = ROOT.vector('float')()
# calibrated energy and other features that indicate the "hardness"
# of the interaction.

from __future__ import print_function

from ROOT import *
from root_numpy import root2array
from sklearn.externals import joblib
from sklearn.svm import SVR
import matplotlib.pyplot as plt
import os

# retrieve data, scaled down by factor of 20
scaledown = 20
Xhad = root2array('../grid_output_stride5_offset0.root',
                  branches=['calehad', 'cvnchargedpion'],
                  selection='mustopz<1275',
                  step=scaledown)
Xmu = root2array('../grid_output_stride5_offset0.root',
                 branches='recotrklenact',
                 selection='mustopz<1275',
                 step=scaledown).reshape(-1, 1)
ynu = root2array('../grid_output_stride5_offset0.root',
                 branches='trueenu',
                 selection='mustopz<1275',
                 step=scaledown)
svr_mu = joblib.load('../muon/models/muon_energy_estimator_active.pkl')
recoemu = svr_mu.predict(Xmu)
yhad = ynu - recoemu

hfit = TH2F('hfit', '', 100, 0, 2, 100, 0, 5)
for i in range(len(Xhad)):
Exemple #53
0
input8="../../../hdfs/user/cc14398/Cs137-10s-18Mar/PS8m.root"
input9="../../../hdfs/user/cc14398/Cs137-10s-18Mar/PS9m.root"
input10="../../../hdfs/user/cc14398/Cs137-10s-18Mar/PS1m10.root"

input1="../../..//hdfs/user/cc14398/Cs137-10s-28Mar/PS550cm1.root"
input2="../../..//hdfs/user/cc14398/Cs137-10s-28Mar/PS550cm2.root"
input3="../../..//hdfs/user/cc14398/Cs137-10s-28Mar/PS550cm3.root"
input4="../../..//hdfs/user/cc14398/Cs137-10s-28Mar/PS550cm4.root"
input5="../../..//hdfs/user/cc14398/Cs137-10s-28Mar/PS550cm5.root"
input6="../../..//hdfs/user/cc14398/Cs137-10s-28Mar/PS550cm6.root"
input7="../../..//hdfs/user/cc14398/Cs137-10s-28Mar/PS550cm7.root"
input8="../../..//hdfs/user/cc14398/Cs137-10s-28Mar/PS550cm8.root"
input9="../../..//hdfs/user/cc14398/Cs137-10s-28Mar/PS550cm9.root"
input10="../../..//hdfs/user/cc14398/Cs137-10s-28Mar/PS550cm10.root"
"""
KE1 = root2array(input1, treename="PhaseSpace", branches="Ekine")
KE2 = root2array(input2, treename="PhaseSpace", branches="Ekine")
KE3 = root2array(input3, treename="PhaseSpace", branches="Ekine")
KE4 = root2array(input4, treename="PhaseSpace", branches="Ekine")
KE5 = root2array(input5, treename="PhaseSpace", branches="Ekine")
KE6 = root2array(input6, treename="PhaseSpace", branches="Ekine")
"""
KE7=root2array(input7,treename="PhaseSpace",branches="Ekine")
KE8=root2array(input8,treename="PhaseSpace",branches="Ekine")
KE9=root2array(input9,treename="PhaseSpace",branches="Ekine")
KE10=root2array(input10,treename="PhaseSpace",branches="Ekine")
"""

print(KE1.size)
print(KE2.size)
print(KE3.size)
Exemple #54
0
def test_ntuple():
    f = load('ntuple.root')
    a = rnp.root2array(f)
    assert_equal(len(a), 10)
    assert_equal(len(a.dtype.names), 3)
Exemple #55
0
def test_single_filename_not_exist():
    f = load('does_not_exist.root')
    a = rnp.root2array(f)
Exemple #56
0
# -*- coding: utf-8 -*-
import root_numpy
import ROOT
import numpy as np
import pandas as pd
from shutil import copyfile
ROOT.gROOT.SetBatch(True)

# Load data B mass branches
print('*** Loading Data ***')
data_loc = '/home/s1305440/PPE_disk/project_stuff/data/data_Qcut.root'
a_data = root_numpy.root2array(
    data_loc,
    treename='DecayTree',
    branches=['scaledmass', 'mppp', 'mjprp', 'mjpk'])
# Load RapidSim B mass branches
Bu2Jpsipipipi_loc = '/home/s1305440/PPE_disk/project_stuff/RapidSim/validation/Bu2Jpsipipipi_tree.root'
a_Bu2Jpsipipipi = root_numpy.root2array(Bu2Jpsipipipi_loc,
                                        treename='DecayTree',
                                        branches=['Bp_0_M_pip_12Kp', 'Bp_0_M'])
Bu2JpsipipiK_loc = '/home/s1305440/PPE_disk/project_stuff/RapidSim/validation/Bu2JpsipipiK_tree.root'
a_Bu2JpsipipiK = root_numpy.root2array(Bu2JpsipipiK_loc,
                                       treename='DecayTree',
                                       branches=['Bp_0_M_Kp_02pip', 'Bp_0_M'])
B02psi2skpi_loc = '/home/s1305440/PPE_disk/project_stuff/RapidSim/validation/B02psi2skpi_tree.root'
a_B02psi2skpi = root_numpy.root2array(B02psi2skpi_loc,
                                      treename='DecayTree',
                                      branches=['m_pim_1_drop', 'B0_0_M'])
Bs2psi2Sphi_loc = '/home/s1305440/PPE_disk/project_stuff/RapidSim/validation/Bs2psi2Sphi_tree.root'
a_Bs2psi2Sphi = root_numpy.root2array(Bs2psi2Sphi_loc,
                                      treename='DecayTree',
Exemple #57
0
    'ntuple_ecal_hits_1.8e8EOT_9.root', 'ntuple_ecal_hits_1.8e8EOT_10.root',
    'ntuple_ecal_hits_1.8e8EOT_11.root', 'ntuple_ecal_hits_1.8e8EOT_12.root',
    'ntuple_ecal_hits_1.8e8EOT_13.root', 'ntuple_ecal_hits_1.8e8EOT_14.root',
    'ntuple_hcal_hits_1.8e8EOT_0.root', 'ntuple_hcal_hits_1.8e8EOT_1.root',
    'ntuple_hcal_hits_1.8e8EOT_2.root', 'ntuple_hcal_hits_1.8e8EOT_3.root',
    'ntuple_hcal_hits_1.8e8EOT_4.root', 'hcalHits_signal_mA1MeV.root',
    'hcalHits_signal_mA5MeV.root', 'hcalHits_signal_mA10MeV.root',
    'hcalHits_signal_mA50MeV.root', 'hcalHits_signal_mA100MeV.root',
    'hcalHits_signal_mA500MeV.root', 'hcalHits_signal_mA1000MeV.root'
]

target_name_tab = [
    'background_0.npy', 'background_1.npy', 'background_2.npy',
    'background_3.npy', 'background_4.npy', 'background_5.npy',
    'background_6.npy', 'background_7.npy', 'background_8.npy',
    'background_9.npy', 'background_10.npy', 'background_11.npy',
    'background_12.npy', 'background_13.npy', 'background_14.npy',
    'hcal_background_0.npy', 'hcal_background_1.npy', 'hcal_background_2.npy',
    'hcal_background_3.npy', 'hcal_background_4.npy', 'hcal_signal_m_1.npy',
    'hcal_signal_m_5.npy', 'hcal_signal_m_10.npy', 'hcal_signal_m_50.npy',
    'hcal_signal_m_100.npy', 'hcal_signal_m_500.npy', 'hcal_signal_m_1000.npy'
]

file_placement = 'data/'
i = 0
for file in fname_tab:
    array = root2array(file)
    np.save(file_placement + target_name_tab[i], array)
    i += 1

#Method which does not require the use of root_numpy library? (does not exist for windowns and not on SLAC server)
Exemple #58
0
def test_double_tree_name_not_specified():
    f = load('trees.root')
    a = rnp.root2array(f)
Exemple #59
0
# Event data frame
edfs = {}
mdfs = {}

sample_name = str(sys.argv[1])
sample_file = str(sys.argv[2])

for name, file_ in [(sample_name, sample_file)]:

    INPUT_FILE = file_

    #
    # Vertex wise Trees
    #
    vertex_df = pd.DataFrame(rn.root2array(INPUT_FILE, treename='VertexTree'))
    angle_df = pd.DataFrame(rn.root2array(INPUT_FILE,
                                          treename='AngleAnalysis'))
    shape_df = pd.DataFrame(rn.root2array(INPUT_FILE,
                                          treename='ShapeAnalysis'))
    gap_df = pd.DataFrame(rn.root2array(INPUT_FILE, treename="GapAnalysis"))
    match_df = pd.DataFrame(rn.root2array(INPUT_FILE,
                                          treename="MatchAnalysis"))
    dqds_df = pd.DataFrame(rn.root2array(INPUT_FILE, treename="dQdSAnalysis"))

    #
    # Combine DataFrames
    #
    comb_df = pd.concat([
        vertex_df.set_index(rserv),
        angle_df.set_index(rserv),
Exemple #60
0
def test_no_filename():
    rnp.root2array([])