コード例 #1
0
def solve(tX, y):
    tX_tr, y_tr, tX_te, y_te = split_data(tX, y, ratio=0.8, seed=2019)

    lambda_ = 1
    w, _ = ridge_regression(y_tr, tX_tr, lambda_)
    y_pr_tr = predict_labels(w, tX_tr)
    y_pr_te = predict_labels(w, tX_te)
    acc_tr = compute_accuracy(y_tr, y_pr_tr)
    acc_te = compute_accuracy(y_te, y_pr_te)

    return acc_tr, acc_te
コード例 #2
0
def train_3models(tX, y):
    # Preprocess data together to have the same shifts while creating log or root features
    prep_param = {
        "bias": True,
        "fill": True,
        "standardize": False,
        "degree": 8,
        "log": True,
        "root": True
    }
    tX_new, y_new, _ = preprocess_data(tX, y, prep_param)

    tX_tr, y_tr, tX_te, y_te = split_data(tX_new, y_new, ratio=0.8, seed=2019)

    # Split data according to PRI_jet_num value
    tX_tr_splitted, indices_tr = divide_data(tX_tr)
    tX_te_splitted, indices_te = divide_data(tX_te)
    n_models = len(tX_tr_splitted)

    y_tr_splitted = []
    for i in range(len(indices_tr)):
        y_tr_splitted.append(y_tr[indices_tr[i]])
        print(tX_tr_splitted[i].shape)

    # Train
    weights = []
    for i in range(n_models):
        lambda_ = lambda_cv(tX_tr_splitted[i], y_tr_splitted[i])
        print(f"Class {i}, lambda: {lambda_}")
        weights.append(
            ridge_regression(y_tr_splitted[i], tX_tr_splitted[i], lambda_)[0])
        print(len(weights[-1]))

    # Predict
    y_pr_tr = np.zeros(y_tr.shape)
    y_pr_te = np.zeros(y_te.shape)
    for i in range(n_models):
        y_pr_tr[indices_tr[i]] = predict_labels(weights[i], tX_tr_splitted[i])
        y_pr_te[indices_te[i]] = predict_labels(weights[i], tX_te_splitted[i])

    # Get accuracy
    acc_tr = compute_accuracy(y_tr, y_pr_tr)
    acc_te = compute_accuracy(y_te, y_pr_te)
    print(f"Total accuracy tr: {acc_tr}, te: {acc_te}")

    for i in range(n_models):
        acc_tr = compute_accuracy(y_tr[indices_tr[i]], y_pr_tr[indices_tr[i]])
        acc_te = compute_accuracy(y_te[indices_te[i]], y_pr_te[indices_te[i]])
        print(f"Class {i}, Accuracy tr: {acc_tr}, te: {acc_te}")
コード例 #3
0
def cross_validation(y, x, k_indices, k, lambda_, degree):

    # Dividing in subgroups
    te_indice = k_indices[k]
    tr_indice = k_indices[~(np.arange(k_indices.shape[0]) == k)]
    tr_indice = tr_indice.reshape(-1)

    y_te = y[te_indice]
    y_tr = y[tr_indice]
    tx_te = x[te_indice]
    tx_tr = x[tr_indice]

    # Preprocessing data: cleaning, standardazing and adding constant column
    tx_tr, tx_te = process_data(tx_tr, tx_te, y_tr, y_te)

    # Feature augmentation through polynomials
    tx_tr = build_poly(tx_tr, degree)
    tx_te = build_poly(tx_te, degree)

    # Printing degree and lambda tested
    print("Test: d = ", degree, "; l = ", lambda_)

    # Training with ridge regression
    w, loss = ridge_regression(y_tr, tx_tr, lambda_)

    # Computing prediction vector
    y_pred = predict_labels(w, tx_te)

    # Computing accuracy on test set
    accuracy = compute_accuracy(y_te, y_pred)

    # Log informations
    print("Accuracy = ", accuracy, "; loss = ", loss, "\n")

    return loss_te, accuracy
コード例 #4
0
def logistic_trials(y, tx, tx_sub, degree_range, partitions=2):
    ## Split data into test and training sets
    ## If partitions > 2, use k-fold cross-validation
    glob_tx_tr, glob_tx_te, glob_y_tr, glob_y_te = split_data(tx, y, 0.8)

    ## Initial results: losses, weights, preditions and (test) losses
    models = []
    losses = []
    accuracies = []
    predictions = []

    ## Loops over range of degrees
    degrees = range(degree_range[0], degree_range[1])
    for degree in degrees:
        print("Trying degree", degree, ":")

        tx_tr, tx_te, tx_pred = expand(degree, glob_tx_tr, glob_tx_te, tx_sub)
        initial_w = np.ones(tx_tr.shape[1])

        w, loss = logistic_regression(glob_y_tr, tx_tr, initial_w, MAX_ITERS,
                                      GAMMA)
        print("\tTraining Loss = ", loss)

        y_test = predict_labels(w, tx_te)
        test_loss = compute_loss(glob_y_te, tx_te, w, func="logistic")
        accuracy = compute_accuracy((y_test + 1) / 2, glob_y_te)
        y_pred = predict_labels(w, tx_pred)

        print("\tTest Loss = ", test_loss, " Test Accuracy = ", accuracy)
        models.append(("logistic_SGD", degree, w))
        losses.append(test_loss)
        accuracies.append(accuracy)
        predictions.append(y_pred)
    return models, losses, accuracies, predictions
コード例 #5
0
def main():
    y_train, tX_train, ids = load_csv_data(DATA_TRAIN_PATH)
    _, tX_test, ids_test = load_csv_data(DATA_TEST_PATH)

    np.random.seed(2019)

    # Preprocess data together to have the same shifts while creating log or root features
    tX_stacked = np.vstack((tX_train, tX_test))
    prep_param = {
        "bias": True,
        "fill": True,
        "standardize": False,
        "degree": 8,
        "log": True,
        "root": True
    }
    tX_stacked_prep, *_ = preprocess_data(tX_stacked, None, prep_param)
    tX_train_prep, tX_test_prep = np.split(tX_stacked_prep, [len(tX_train)])

    # Split data according to PRI_jet_num value
    tX_tr_splitted, indices_tr = divide_data(tX_train_prep)
    tX_te_splitted, indices_te = divide_data(tX_test_prep)
    n_models = len(indices_tr)

    y_tr_splitted = []
    for i in range(n_models):
        y_tr_splitted.append(y_train[indices_tr[i]])

    # Train
    weights = []
    for i in range(n_models):
        lambda_ = lambda_cv(tX_tr_splitted[i], y_tr_splitted[i])
        print(f"Class {i}, lambda: {lambda_}")
        weights.append(
            ridge_regression(y_tr_splitted[i], tX_tr_splitted[i], lambda_)[0])

    # Predict
    y_pr_tr = np.zeros(tX_train.shape[0])
    y_pr_te = np.zeros(tX_test.shape[0])
    for i in range(n_models):
        y_pr_tr[indices_tr[i]] = predict_labels(weights[i], tX_tr_splitted[i])
        y_pr_te[indices_te[i]] = predict_labels(weights[i], tX_te_splitted[i])

    acc_tr = compute_accuracy(y_train, y_pr_tr)
    print(f"Total accuracy train: {acc_tr}")
    _, counts = np.unique(y_pr_te, return_counts=True)
    print(
        f"Distribution on test data class -1: {counts[0]}, class +1: {counts[1]}"
    )

    create_csv_submission(ids_test, y_pr_te, OUTPUT_PATH)
コード例 #6
0
def ridge_trials(y, tx, tx_sub, degree_range, lambda_range, partitions=2):
    ## Split data into test and training sets
    ## If partitions > 2, use k-fold cross-validation
    glob_tx_tr, glob_tx_te, glob_y_tr, glob_y_te = split_data(tx, y, 0.8)

    ## Initial results: losses, weights, preditions and (test) losses
    models = []
    losses = []
    accuracies = []
    predictions = []

    ## Loops over range of degrees
    degrees = range(degree_range[0], degree_range[1])
    lambdas = np.logspace(lambda_range[0],
                          lambda_range[1],
                          num=1 + (lambda_range[1] - lambda_range[0]))
    for degree in degrees:
        ## Loops over range of lambdas
        for lambda_ in lambdas:
            print("Trying degree", degree, "with lambda =", lambda_, ":")

            tx_tr, tx_te, tx_pred = expand(degree, glob_tx_tr, glob_tx_te,
                                           tx_sub)

            w, loss = ridge_regression(glob_y_tr, tx_tr, lambda_)
            print("\tTraining Loss = ", loss)

            y_test = predict_labels(w, tx_te)
            test_loss = compute_loss(glob_y_te, tx_te, w)
            accuracy = compute_accuracy((y_test + 1) / 2, glob_y_te)
            y_pred = predict_labels(w, tx_pred)

            print("\tTest Loss = ", test_loss, " Test Accuracy = ", accuracy)
            models.append(("ridge_regression", degree, lambda_, w))
            losses.append(test_loss)
            accuracies.append(accuracy)
            predictions.append(y_pred)
    return models, losses, accuracies, predictions
コード例 #7
0
def train_ref(model,
              criterion,
              optimizer,
              input,
              target,
              nb_epochs=200,
              verbose=False):

    mini_batch_size = 100

    #empty recipient
    loss_evolution = []
    precision_evolution = []

    #actual training
    for e in range(nb_epochs):
        loss_e = 0.
        for b in range(0, input.size(0), mini_batch_size):
            output = model(input.narrow(0, b, mini_batch_size))
            loss = criterion(output, target.narrow(0, b, mini_batch_size))
            loss_e += loss
            model.zero_grad()
            loss.backward()
            optimizer.step()

        #record the data
        precision_evolution.append(
            helpers.compute_accuracy(model, input, target) / target.shape[0] *
            100)
        loss_evolution.append(loss_e)

        if verbose:
            message = "epoch {:3}, loss {:10.4}".format(e, loss_e)
            helpers.update_progress((e + 1.) / nb_epochs, message=message)

    return loss_evolution, precision_evolution
コード例 #8
0
ファイル: run.py プロジェクト: guillaumemichel/Project1-ML
model = -1
while model < 0 or model > 7:
    model = input("Enter a valid number: ")
    try:
        model = int(model)
    except ValueError:
        model = -1

#Load the train and test data
print("Loading the data...")

y_tr, input_data_train, _ = load_csv_data("data/train.csv")
y_te, input_data_test, ids_test = load_csv_data("data/test.csv")

#Preprocess train and test data
print("Preprocessing the data...")

tx_tr = preprocess(input_data_train)
tx_te = preprocess(input_data_test)

#Compute the optimal weights
print("Computing the optimal weights...")

losses, optimal_weights = choose_model(y_tr, tx_tr, models[model], np.zeros(tx_tr.shape[1]), 500, 2e-6, 0.0008)

print("Test accuracy: ", compute_accuracy(y_te, tx_te, optimal_weights))
print("Training accuracy: ", compute_accuracy(y_tr, tx_tr, optimal_weights))

y_pred = predict_labels(optimal_weights, tx_te)
create_csv_submission(ids_test, y_pred, "submission.csv")
コード例 #9
0
ファイル: testing.py プロジェクト: Arainay/cifar10
import torch
import torchvision

from Classifier import Classifier
from classes import classes
from helpers import show_image, compute_accuracy, get_test_set_and_loader

if __name__ == '__main__':
    torch.multiprocessing.freeze_support()

    _, test_loader = get_test_set_and_loader()

    dataiter = iter(test_loader)
    images, labels = dataiter.next()

    show_image(torchvision.utils.make_grid(images))
    print('GroundTruth: ',
          ' '.join('%5s' % classes[labels[j]] for j in range(4)))

    classifier = Classifier()
    classifier.load_state_dict(torch.load('./model_storage/cifar_net.pth'))

    outputs = classifier(images)
    _, predicted = torch.max(outputs, 1)
    print('Predicted: ',
          ' '.join('%5s' % classes[predicted[j]] for j in range(4)))

    compute_accuracy(test_loader, classifier)
コード例 #10
0
ファイル: training.py プロジェクト: Arainay/pytorch-intro
for epoch in range(num_epoch):
    nn_model.train()

    loss_acc = 0
    correct_samples = 0
    total_samples = 0
    for i_step, (x, y) in enumerate(train_loader):
        prediction = nn_model(x)
        loss_value = loss(prediction, y)

        optimizer.zero_grad()
        loss_value.backward()
        optimizer.step()

        _, indices = torch.max(prediction, 1)
        correct_samples += torch.sum(indices == y)
        total_samples += y.shape[0]
        loss_acc += loss_value
        ave_loss = loss_acc / (i_step + 1)

        train_accuracy = 100 * float(correct_samples) / total_samples
        val_accuracy = compute_accuracy(nn_model, val_loader)

        loss_history.append(float(ave_loss))
        train_history.append(train_accuracy)
        val_history.append(val_accuracy)

        print("Average loss: %f, Train accuracy: %f, Val accuracy: %f" %
              (ave_loss, train_accuracy, val_accuracy))
コード例 #11
0
#define optimizer
optim = Optimizer.SGD(parameters=model.param(), lr=1e-1)

#train the model
loss, accuracy = train(model,
                       criterion,
                       optim,
                       train_input,
                       train_target,
                       nb_epochs=200,
                       verbose=True)

#compute statistics on test
output = model.forward(test_input)
loss_test = criterion.forward(output, test_target)
accuracy_test = compute_accuracy(model.forward, test_input, test_target)

print("")
print("TRAIN:  accuracy {:.4}%, loss {:.4}".format(accuracy[-1], loss[-1]))
print("TEST :  accuracy {:.4}%, loss {:.4}".format(
    accuracy_test / test_target.shape[0] * 100, loss_test))

#vizualisation
plt.subplot(121)
plt.plot(loss, label="loss", c="orange")
plt.legend()
plt.subplot(122)
plt.plot(accuracy, label="accuracy", c="blue")
plt.legend()
plt.savefig("../results/plots/test.png", bbox_inches="tight")
#plt.show()
コード例 #12
0
        k_indices = k_fold_indices(train_data_split.shape[0], 5, SEED)
        for i, deg in enumerate(POSSIBLE_DEGREES):
            train_data, _ = preprocessing_pipeline(train_data_split,
                                                   degree=deg)
            train_set_folds = k_fold_cross_split_data(train_classes_split,
                                                      train_data, k_indices)

            for j, lambda_ in enumerate(POSSIBLE_LAMBDA_VALUES):
                folds_train_accuracy = []
                folds_validation_accuracy = []

                # Train a Ridge Regression model on each fold
                for x_train, y_train, x_test, y_test in train_set_folds:
                    w, train_loss = ridge_regression(y_train, x_train, lambda_)
                    folds_train_accuracy.append(
                        compute_accuracy(predict_labels(w, x_train), y_train))
                    folds_validation_accuracy.append(
                        compute_accuracy(predict_labels(w, x_test), y_test))
                train_accuracy_matrix[jet_num, 0, i, j] = \
                    (np.mean(folds_train_accuracy), np.std(folds_train_accuracy))
                validation_accuracy_matrix[jet_num, 0, i, j] = \
                    (np.mean(folds_validation_accuracy), np.std(folds_validation_accuracy))

            train_data_log_svm = preprocessing_pipeline(train_data_split,
                                                        degree=deg,
                                                        norm_first=False)
            train_set_folds = k_fold_cross_split_data(train_classes_split,
                                                      train_data_log_svm,
                                                      k_indices)

            for j, lambda_ in enumerate(POSSIBLE_LAMBDA_LOG):