예제 #1
0
파일: test.py 프로젝트: YohannFaure/recnn
def tester(pattern='R=1e-05_anti-kt'):
    X,y = np.load('data/npyfilesregression/Background_JEC_train_ID_preprocessed_{}.npy'.format(pattern))
    print 'train data loaded'
    tf = create_tf_transform(X)
    print 'tf created'
    model = 'data/modelsregression/Model_{}.pickle'.format(pattern)
    X, y = np.load('data/npyfilesregression/Background_JEC_test_ID_preprocessed_{}.npy'.format(pattern))
    print 'test data loaded'
    X = apply_tf_transform(X,tf)
    test(X, y, model,regression=True)
예제 #2
0
파일: test.py 프로젝트: cbernet/recnn
def test(filepath, modelpath, trainfilepath, rootfilepath, branchname, isSignal):
    X,y = np.load(trainfilepath)
    tf = create_tf_transform(X)
    X, y = np.load(filepath)
    X_tf = apply_tf_transform(X,tf)
    y_pred = predict(X_tf,modelpath, grnn_predict_gated, regression=False)
    testfile = TFile(rootfilepath,'update')
    testtree = testfile.Get('testtree')
    finaltree = testtree.CloneTree(0)
    finaltree.SetName('finaltree')
    branchval = np.zeros(1)
    finaltree.Branch(branchname, branchval, branchname+'/D')
    branchval2 = np.zeros(1)
    branchval2[0] = 1. if isSignal else 0.
    finaltree.Branch('isSignal', branchval2, 'isSignal/D')
    i = 0
    for event in testtree:
        #here test if eta still aligned
        branchval[0] = y_pred[i]
        i+=1
        finaltree.Fill()
    finaltree.Write()
예제 #3
0
def train(filename_train,
          filename_model,
          regression=False,
          simple=False,
          n_features=14,
          n_hidden=40,
          n_epochs=5,
          batch_size=64,
          step_size=0.0005,
          decay=0.9,
          random_state=42,
          verbose=False,
          statlimit=-1):
    # Initialization
    gated = not simple
    if verbose:
        logging.info("Calling with...")
        logging.info("\tfilename_train = %s" % filename_train)
        logging.info("\tfilename_model = %s" % filename_model)
        logging.info("\tgated = %s" % gated)
        logging.info("\tn_features = %d" % n_features)
        logging.info("\tn_hidden = %d" % n_hidden)
        logging.info("\tn_epochs = %d" % n_epochs)
        logging.info("\tbatch_size = %d" % batch_size)
        logging.info("\tstep_size = %f" % step_size)
        logging.info("\tdecay = %f" % decay)
        logging.info("\trandom_state = %d" % random_state)
    rng = check_random_state(random_state)

    # Make data
    if verbose:
        logging.info("Loading data...")
    if filename_train[-1] == "e":
        fd = open(filename_train, "rb")
        X, y = pickle.load(fd)
        fd.close()
    else:
        X, y = np.load(filename_train)
    X = np.array(X).astype(dict)
    y = np.array(y).astype(float)
    flush = np.random.permutation(len(X))
    X, y = X[flush][:statlimit], y[flush][:statlimit]
    i = 0

    ### delete single particles ###
    while i < len(X):
        if len(X[i]["content"]) == 1:
            X = np.delete(X, i)
            y = np.delete(y, i)
        else:
            i += 1

    if regression:
        zerovalue = square_error(y, [x["pt"] for x in X]).mean()

    X = list(X)
    if verbose:
        logging.info("\tfilename = %s" % filename_train)
        logging.info("\tX size = %d" % len(X))
        logging.info("\ty size = %d" % len(y))

    # Preprocessing
    if verbose:
        logging.info("Preprocessing...")
    tf = create_tf_transform(X)

    X = apply_tf_transform(X, tf)

    # Split into train+validation
    logging.info("Splitting into train and validation...")

    X_train, X_valid, y_train, y_valid = train_test_split(X,
                                                          y,
                                                          test_size=0.1,
                                                          random_state=rng)
    del X
    del y
    # Training
    if verbose:
        logging.info("Training...")

    if gated:
        predict = grnn_predict_gated
        init = grnn_init_gated
    else:
        predict = grnn_predict_simple
        init = grnn_init_simple

    trained_params = init(n_features, n_hidden, random_state=rng)
    n_batches = int(np.ceil(len(X_train) / batch_size))
    best_score = [np.inf]  # yuck, but works
    best_params = [trained_params]

    def loss(X, y, params):
        y_pred = predict(params, X, regression=regression)
        if regression:
            l = square_error(y, y_pred).mean()
        else:
            l = log_loss(y, y_pred).mean()
        return l

    def objective(params, iteration):
        rng = check_random_state(iteration % n_batches)
        start = rng.randint(len(X_train) - batch_size)
        idx = slice(start, start + batch_size)
        return loss(X_train[idx], y_train[idx], params)

    def callback(params, iteration, gradient, regression=False):
        if iteration % 100 == 0:
            the_loss = loss(X_valid, y_valid, params)
            if the_loss < best_score[0]:
                best_score[0] = the_loss
                best_params[0] = copy.deepcopy(params)

                fd = open(filename_model, "wb")
                pickle.dump(best_params[0], fd)
                fd.close()

            if verbose:
                if regression:
                    logging.info(
                        "%5d\t~loss(train) = %.4f\tloss(valid) = %.4f"
                        "\tbest_loss(valid) = %.4f" %
                        (iteration, loss(X_train[:5000], y_train[:5000],
                                         params), loss(X_valid, y_valid,
                                                       params), best_score[0]))
                else:
                    roc_auc = roc_auc_score(
                        y_valid, predict(params,
                                         X_valid,
                                         regression=regression))
                    logging.info(
                        "%5d\t~loss(train) = %.4f\tloss(valid) = %.4f"
                        "\troc_auc(valid) = %.4f\tbest_loss(valid) = %.4f" %
                        (iteration, loss(X_train[:5000], y_train[:5000],
                                         params), loss(
                                             X_valid, y_valid,
                                             params), roc_auc, best_score[0]))

    for i in range(n_epochs):
        logging.info("epoch = %d" % i)
        logging.info("step_size = %.4f" % step_size)
        if regression:
            logging.info("zerovalue = %.4f" % zerovalue)

        trained_params = adam(ag.grad(objective),
                              trained_params,
                              step_size=step_size,
                              num_iters=1 * n_batches,
                              callback=callback)
        step_size = step_size * decay
예제 #4
0
파일: PlotRoc.py 프로젝트: cbernet/recnn
sys.path.append("..")

# In[]:
basepath = '/data/conda/recnn/data'
name="anti-kt"
trainfile,testfile = basepath+"/npyfiles/subjet_oriented_"+name+"_train.npy",basepath+"/npyfiles/subjet_oriented_"+name+"_test.npy"
modelpath = basepath+"/models/subjet_oriented_"+name+"_model.pickle"

# In[]:
### Load training data ###
X, y = np.load(trainfile)
X=np.array(X).astype(dict)
y = np.array(y).astype(int)

### to rescale test data ###
tf = create_tf_transform(X,y)

### Load test data ###
X1, y1 = np.load(testfile)
X1 = np.array(X1).astype(dict)
y1 = np.array(y1).astype(int)

X1,y1=prepare_test_data(tf, X1, y1) 
# In[]:
### Build the roc ###
r, f, t = build_roc(X1, y1, modelpath, func=grnn_predict_gated)
print(r)

# In[]:
plt.plot(f,t,label=name)
tpr,fpr = np.load('/data/conda/recnn/data/roccurves/standardID_ROC.npy')