Esempio n. 1
0
def PFN_AUC_calculation(jet_array_1, jet_array_2, train_size, test_size):
    X = np.concatenate([jet_array_1, jet_array_2])[:,:,:4]
    y = np.concatenate([np.ones(len(jet_array_1)), np.zeros(len(jet_array_2))])

    ################################### SETTINGS ###################################

    # data controls
    train, val, test = train_size, X.shape[0]-train_size-test_size, test_size
    use_pids = True

    # network architecture parameters
    Phi_sizes, F_sizes = (100, 100, 128), (100, 100, 100)

    # network training parameters
    num_epoch = 10
    batch_size = 500

    ################################################################################

    # convert labels to categorical
    Y = to_categorical(y, num_classes=2)

    # preprocess by centering jets and normalizing pts
    for x in X:
        mask = x[:,0] > 0
        yphi_avg = np.average(x[mask,1:3], weights=x[mask,0], axis=0)
        x[mask,1:3] -= yphi_avg
        x[mask,0] /= x[:,0].sum()

    # handle particle id channel
    if use_pids:
        remap_pids(X, pid_i=3)
    else:
        X = X[:,:,:3]

    # do train/val/test split 
    (X_train, X_val, X_test,
     Y_train, Y_val, Y_test) = data_split(X, Y, val=val, test=test)

    # build architecture
    pfn = 0
    with suppress_stdout():
        pfn = PFN(input_dim=X.shape[-1], Phi_sizes=Phi_sizes, F_sizes=F_sizes)

    # train model
    pfn.fit(X_train, Y_train,
              epochs=num_epoch,
              batch_size=batch_size,
              validation_data=(X_val, Y_val),
              verbose=0)

    # get predictions on test data
    preds = pfn.predict(X_test, batch_size=1000)

    # get area under the ROC curve
    auc = roc_auc_score(Y_test[:,1], preds[:,1])
    
    return auc
Esempio n. 2
0
        x[mask, 3] = map_func(x[mask, 3])
    return X


if __name__ == '__main__':
    phi_sizes = (16, 32, 64, 128)
    f_sizes = (128, 64, 32, 16)

    X, Y = load_data(2000000, 'final_efn_train')
    X = preprocess(X)
    Y = ef.utils.to_categorical(Y)

    X_train, X_val, X_test, Y_train, Y_val, Y_test = split_data(
        X, Y, test_prop=1.0 / 5, val_prop=1.0 / 5)

    adam = optimizers.Adam(lr=.0006)
    pfn = PFN(input_dim=X_train.shape[-1],
              Phi_sizes=phi_sizes,
              F_sizes=f_sizes,
              optimizer=adam)
    pfn.fit(X_train,
            Y_train,
            epochs=NUM_EPOCHS,
            batch_size=250,
            validation_data=(X_val, Y_val),
            verbose=1)
    preds = pfn.predict(X_test, batch_size=1000)

    fpr, tpr, thresholds = roc_curve(Y_test[:, 1], preds[:, 1])
    print('AUC: ' + str(auc(fpr, tpr)))
Esempio n. 3
0
        # Shuffle loaded datasets and begin
        inds = range(len(X_train))
        np.random.shuffle(inds)
        X_epoch, Y_epoch, W_epoch, MVA_epoch = X_train[inds], Y[inds], W_train[
            inds], MVA_epoch[inds]
        if (np.min(W_train) == np.nan): continue
        if (np.min(W_train) == np.inf): continue

        model.fit(X_epoch, Y_epoch, epochs=1, batch_size=4 * 512, verbose=1)
        pd.DataFrame(X_epoch).to_csv("X_example.csv", index=False)
        pd.DataFrame(Y_epoch).to_csv("Y_example.csv", index=False)
        pd.DataFrame(MVA_epoch).to_csv("MVA_example.csv", index=False)
        if (epoch % 10 == 0):

            model.save(
                '/beegfs/desy/user/dydukhle/TauId/models/keras__deep_set__%i.model'
                % (epoch))
            preds = model.predict(X_epoch, batch_size=1 * 2048)
            mva = MVA_train
            df_preds = pd.DataFrame({
                "pred": [i[0] for i in preds],
                "labels": [y for y in Y_epoch]
            })  #, 'mva':[m[0] for m in mva] })
            df_preds.to_csv(
                "./DeepSets_res/labels_and_pred_e_{0}.csv".format(epoch))
            df_mva = pd.DataFrame(mva)
            df_mva.to_csv("./DeepSets_res/mva_{0}.csv".format(epoch))
    # Get next batch from background loader res
        X_train, Y, W_train, MVA_train = utils.BuildBatch(
        )  #res.get(timeout=180)