Ejemplo n.º 1
0
def main(args):

    # Initialise
    args, cfg = initialise(args)

    # Load data
    data, features, _ = load_data(args.input + 'data.h5',
                                  train=True,
                                  background=True)

    # Fill Tau21 profile
    profile = fill_profile(data, VAR_TAU21)

    # Fit profile
    fit = ROOT.TF1('fit', 'pol1', *FIT_RANGE)
    profile.Fit('fit', 'RQ0')
    intercept_val, coef_val = fit.GetParameter(0), fit.GetParameter(1)
    intercept_err, coef_err = fit.GetParError(0), fit.GetParError(1)

    # Create scikit-learn transform
    ddt = LinearRegression()
    ddt.coef_ = np.array([coef_val])
    ddt.intercept_ = np.array([-coef_val * FIT_RANGE[0]])
    ddt.offset_ = np.array([coef_val * FIT_RANGE[0] + intercept_val])

    print "Fitted function:"
    print "  intercept: {:7.4f} ± {:7.4f}".format(intercept_val, intercept_err)
    print "  coef:      {:7.4f} ± {:7.4f}".format(coef_val, coef_err)

    # Save DDT transform
    saveclf(ddt, 'models/ddt/ddt.pkl.gz')

    return 0
Ejemplo n.º 2
0
def main(args):

    # Initialise
    args, cfg = initialise(args)

    # Load data
    data, _, _ = load_data(
        'data/' + args.input)  #Train=True removed since we use the data file

    # -------------------------------------------------------------------------
    ####
    #### # Initialise Keras backend
    #### initialise_backend(args)
    ####
    #### # Neural network-specific initialisation of the configuration dict
    #### initialise_config(args, cfg)
    ####
    #### # Keras import(s)
    #### from keras.models import load_model
    ####
    #### # NN
    #### from run.adversarial.common import add_nn
    #### with Profile("NN"):
    ####     classifier = load_model('models/adversarial/classifier/full/classifier.h5')
    ####     add_nn(data, classifier, 'NN')
    ####     pass
    # -------------------------------------------------------------------------

    # Compute background efficiency at sig. eff. = 50%
    eff_sig = 0.10
    fpr, tpr, thresholds = roc_curve(data['signal'],
                                     data[VAR],
                                     sample_weight=data['weight'])
    idx = np.argmin(np.abs(tpr - eff_sig))
    print "Background acceptance @ {:.2f}% sig. eff.: {:.2f}% ({} > {:.2f})".format(
        eff_sig * 100., (fpr[idx]) * 100., VAR,
        thresholds[idx])  #changed from 1-fpr[idx]
    #print "Signal efficiency @ {:.2f}% bkg. acc.: {:.2f}% ({} > {:.2f})".format(eff_sig * 100., (fpr[idx]) * 100., VAR, thresholds[idx]) #changed from 1-fpr[idx]
    print "Chosen target efficiency: {:.2f}%".format(EFF)

    # Filling profile
    data = data[data['signal'] == 0]
    profile_meas, (x, y, z) = fill_profile(data)

    # Format arrays
    X = np.vstack((x.flatten(), y.flatten()))
    X = X.T
    Y = z.flatten()

    # Fit KNN regressor
    print "debugging more: x.shape = ", X.shape, ", y.ndim = ", Y.ndim

    knn = KNeighborsRegressor(weights='distance')
    knn.fit(X, Y)

    # Save KNN classifier
    saveclf(knn, 'models/knn/knn_{:s}_{}_{}.pkl.gz'.format(VAR, EFF, MODEL))

    return 0
Ejemplo n.º 3
0
def main(args):

    # Initialise
    args, cfg = initialise(args)

    # Load data
    data, features, _ = load_data(args.input + 'data.h5',
                                  train=True,
                                  background=True)

    #variable = VAR_TAU21
    variable = VAR_N2
    #variable = VAR_DECDEEP
    #variable = VAR_DEEP

    # Fill variable profile
    profile = fill_profile(data, variable)

    # Fit profile
    if variable == VAR_N2:
        fit_range = FIT_RANGE_N2
    elif variable == VAR_TAU21:
        fit_range = FIT_RANGE_TAU21
    elif variable == VAR_DECDEEP:
        fit_range = FIT_RANGE_DECDEEP
    elif variable == VAR_DEEP:
        fit_range = FIT_RANGE_DEEP
    else:
        print "variable invalid"
        return 0
    fit = ROOT.TF1('fit', 'pol1', *fit_range)
    profile.Fit('fit', 'RQ0')
    intercept_val, coef_val = fit.GetParameter(0), fit.GetParameter(1)
    intercept_err, coef_err = fit.GetParError(0), fit.GetParError(1)

    # Create scikit-learn transform
    ddt = LinearRegression()
    ddt.coef_ = np.array([coef_val])
    ddt.intercept_ = np.array([-coef_val * fit_range[0]])
    ddt.offset_ = np.array([coef_val * fit_range[0] + intercept_val])

    print "Fitted function:"
    print "  intercept: {:7.4f} ± {:7.4f}".format(intercept_val, intercept_err)
    print "  coef:      {:7.4f} ± {:7.4f}".format(coef_val, coef_err)

    # Save DDT transform
    saveclf(ddt, 'models/ddt/ddt_{}.pkl.gz'.format(variable))
    print "got to the end of main()"
    return 0
Ejemplo n.º 4
0
def train(data, variable, bg_eff, signal_above=False):
    # Filling profile
    data = data[data['signal'] == 0]
    profile_meas, (x, y, z) = fill_profile(data,
                                           variable,
                                           bg_eff,
                                           signal_above=signal_above)

    # Format arrays
    X = np.vstack((x.flatten(), y.flatten())).T
    Y = z.flatten()

    # Fit KNN regressor
    knn = KNeighborsRegressor(weights='distance')
    knn.fit(X, Y)

    # Save KNN classifier
    saveclf(knn, 'models/knn/knn_{:s}_{:.0f}.pkl.gz'.format(variable, bg_eff))
Ejemplo n.º 5
0
def main(args):

    # Initialise
    args, cfg = initialise(args)

    # Load data
    data, _, _ = load_data(
        'data/' +
        args.input)  #, Train=True) removed since we use the data file

    # -------------------------------------------------------------------------
    ####
    #### # Initialise Keras backend
    #### initialise_backend(args)
    ####
    #### # Neural network-specific initialisation of the configuration dict
    #### initialise_config(args, cfg)
    ####
    #### # Keras import(s)
    #### from keras.models import load_model
    ####
    #### # NN
    #### from run.adversarial.common import add_nn
    #### with Profile("NN"):
    ####     classifier = load_model('models/adversarial/classifier/full/classifier.h5')
    ####     add_nn(data, classifier, 'NN')
    ####     pass
    # -------------------------------------------------------------------------

    # Compute background efficiency at sig. eff. = 50%
    eff_sig = 0.10
    fpr, tpr, thresholds = roc_curve(data['signal'],
                                     data[VAR],
                                     sample_weight=data['TotalEventWeight'])
    idx = np.argmin(np.abs(tpr - eff_sig))
    print "Background acceptance @ {:.2f}% sig. eff.: {:.2f}% ({} > {:.2f})".format(
        eff_sig * 100., (fpr[idx]) * 100., VAR,
        thresholds[idx])  #changed from 1-fpr[idx]
    #print "Signal efficiency @ {:.2f}% bkg. acc.: {:.2f}% ({} > {:.2f})".format(eff_sig * 100., (fpr[idx]) * 100., VAR, thresholds[idx]) #changed from 1-fpr[idx]
    print "Chosen target efficiency: {:.2f}%".format(EFF)

    # Filling profile
    data = data[data['signal'] == 0]
    profile_meas, (x, y, err) = fill_profile_1D(data)

    # Format arrays
    X = x.reshape(-1, 1)
    weights = 1 / err

    print X
    # Fit KNN regressor
    if 'knn1D' == FIT:
        knn = KNeighborsRegressor(5, weights='distance')
        knn.fit(X, y)  #.predict(X)

    elif 'knn1D_v2' in FIT:
        knn = KNeighborsRegressor(5, weights='uniform')
        knn.fit(X, y)  #.predict(X)

    elif 'knn1D_v3' in FIT:
        knn = KNeighborsRegressor(2, weights='uniform')
        knn.fit(X, y)  #.predict(X)

    elif 'knn1D_v4' in FIT:
        knn = KNeighborsRegressor(3, weights='distance')
        knn.fit(X, y)  #.predict(X)

    elif 'poly2' in FIT:
        knn = make_pipeline(PolynomialFeatures(degree=2), Ridge())
        knn.fit(X, y)  #.predict(X)
        #knn1 = PolynomialFeatures(degree=2)
        #knn1.fit(X, y)
        #X_poly = knn1.fit_transform(X)
        #knn = LinearRegression() #fit_intercept=False)
        #knn.fit(X_poly, y, weights)
        #score = round(reg.score(X_poly, y), 4)
        #coef = reg.coef_
        #intercept = reg.intercept_

        #print score, coef, intercept
        #knn.fit(X, y)#.predict(X)
        #print "Fit parameters: ", knn.transform(X).shape #get_feature_names() #get_params() #knn.coef_

    elif 'poly3' in FIT:
        knn = make_pipeline(PolynomialFeatures(degree=3), Ridge())
        knn.fit(X, y)  #.predict(X)

    # Create scikit-learn transform
    elif 'lin' in FIT:
        knn = LinearRegression()
        knn.fit(X, y, weights)

    elif 'erf' in FIT:
        knn, pcov = curve_fit(func, x, y, p0=[73, 0.0004, 2000])
        print "ERF: ", knn

    else:
        print "Weird FIT type chosen"
        #coef_val = np.polyfit(x, y, deg=1, w=weights)

        #knn.coef_      = np.array([coef_val[0]])
        #knn.intercept_ = np.array([coef_val[1]]) #[-coef_val[0] * FIT_RANGE[0]])
        #knn.offset_    = np.array([coef_val[0] * FIT_RANGE[0] + coef_val[1]])

        print "Fitted function:"
        print "  coef: {}".format(knn.coef_)
        print "  intercept:      {}".format(knn.intercept_)

    # Save DDT transform
    saveclf(knn,
            'models/knn/{}_{:s}_{}_{}.pkl.gz'.format(FIT, VAR, EFF, MODEL))

    # Save fit parameters to a ROOT file

    #TCoef = ROOT.TVector3(coef[0], coef[1], coef[2])
    #outFile = ROOT.TFile.Open("models/{}_jet_ungrtrk500_eff{}_stat{}_data.root".format(FIT, EFF, MIN_STAT),"RECREATE")
    #outFile.cd()
    #TCoef.SetName("coefficients")
    #TCoef.Write()
    #outFile.Close()

    return 0