Ejemplo n.º 1
0
def main(args=defaults):
    dataset = np.loadtxt("FM_dataset.dat")
    #######################################################################
    #                       ** START OF YOUR CODE **
    #######################################################################
    np.random.shuffle(dataset)

    trainRatio = 0.8
    testRatio = 0.1
    numEpochs = 500
    batchsize = 64

    # shuffle the dataset prior
    np.random.shuffle(dataset)

    # preprocess data
    prep = Preprocessor(dataset)
    dataset = prep.apply(dataset)

    # retrieve X and Y columns
    X, Y = dataset[:, 0:3], dataset[:, 3:6]

    # create loaders
    trainloader, validloader, testloader = helpers.loadTrainingData(
        X, Y, trainRatio, testRatio, batchsize)

    # initiate the model
    model = Net(3, 3, args['l1'], args['l2'], args['l3'], args['l4'])

    # load hyperparameters
    loss_function = nn.MSELoss()
    optimizer = optim.Adam(model.parameters())

    # train the model
    helpers.trainModel(model, optimizer, loss_function, numEpochs, trainloader,
                       validloader)
    # evaluate the model
    helpers.testModel(model, loss_function, numEpochs, testloader)

    torch.save(model.state_dict(), 'best_model_reg.pth')

    x_store, y_store = [], []
    i = 0
    for x, y in testloader:
        if i < 1:
            x_store = np.array(x)
            y_store = np.array(y)
        else:
            np.concatenate((x_store, np.array(x)), axis=0)
            np.concatenate((y_store, np.array(y)), axis=0)
        i = 1

    evaluate_architecture(model, torch.Tensor(x_store), torch.Tensor(y_store),
                          prep)
    #######################################################################
    #                       ** END OF YOUR CODE **
    #######################################################################
    # data is normalised in this function
    illustrate_results_FM(model, prep)
Ejemplo n.º 2
0
def predict_hidden(dat):
    # Preprocess data
    x = dat[:, :3]
    y = dat[:, 3:]
    prep_input = Preprocessor(dat)
    x_pre = prep_input.apply(x)

    model = load_model()
    pred = model.predict(x_pre)
    return pred
Ejemplo n.º 3
0
def main():
    dataset = np.loadtxt("ROI_dataset.dat")
    #######################################################################
    #                       ** START OF YOUR CODE **
    #######################################################################

    input_dim = 3
    neurons = [16, 4]
    activations = ["relu", "identity"]
    network = MultiLayerNetwork(input_dim, neurons, activations)

    np.random.shuffle(dataset)

    prep = Preprocessor(dataset)

    x = dataset[:, :3]
    y = dataset[:, 3:]

    split_idx = int(0.8 * len(x))

    x_train = x[:split_idx]
    y_train = y[:split_idx]
    x_val = x[split_idx:]
    y_val = y[split_idx:]

    x_train_pre = prep.apply(x_train)
    x_val_pre = prep.apply(x_val)

    trainer = Trainer(
        network=network,
        batch_size=8,
        nb_epoch=100,
        learning_rate=0.01,
        loss_fun="cross_entropy",
        shuffle_flag=True,
    )

    trainer.train(x_train_pre, y_train)
    print("Train loss = ", trainer.eval_loss(x_train_pre, y_train))
    print("Validation loss = ", trainer.eval_loss(x_val_pre, y_val))

    preds = network(x_val_pre).argmax(axis=1).squeeze()
    targets = y_val.argmax(axis=1).squeeze()
    accuracy = (preds == targets).mean()
    print("Validation accuracy: {}".format(accuracy))


    #######################################################################
    #                       ** END OF YOUR CODE **
    #######################################################################
    illustrate_results_ROI(network, prep)
Ejemplo n.º 4
0
def predict_hidden(dataset):
    input_dim = 3
    x_data = dataset[:, :input_dim]
    y_data = dataset[:, input_dim:]
    prep_input = Preprocessor(x_data)
    x_data_pre = prep_input.apply(x_data)

    # Load the network
    filename = open("trained_ROI.pickle", 'rb')
    model = pickle.load(filename)
    filename.close()

    # Generate the output
    pred = model.predict(x_data_pre)  #use keras to make prediction
    oneHotEncoding = one_hot_encode(pred, pred.shape[1])
    return oneHotEncoding
Ejemplo n.º 5
0
def predict_hidden(dataset):
    # Confgure the dataset
    np.random.shuffle(dataset)

    # Preprocess the incoming dataset
    prep_input = Preprocessor(dataset)
    test_set = prep_input.apply(dataset)

    testset = torch.from_numpy(test_set).float()

    model = torch.load('best_model_FM.pt')

    predictions = model(testset)

    print("Predictions from best model: " + str(predictions.data.numpy()))
    return predictions.data.numpy()
Ejemplo n.º 6
0
def test_model(network, testset):
    # shuffle the testset
    np.random.shuffle(testset)

    x_test = testset[:, :3]
    y_test = testset[:, 3:]

    prep_input = Preprocessor(x_test)
    x_test_pre = prep_input.apply(x_test)

    x_test_torch = torch.from_numpy(x_test_pre).float()

    # get the result of testing on the best model
    mse, evs, r2 = evaluate_architecture(network, x_test_torch, y_test)

    # return the the mean squared error, the explained variance score and the r2
    stats = (mse, evs, r2)
    return stats
Ejemplo n.º 7
0
def predict_hidden(dataset):

    input_dim = 3

    # Pre-process data
    x_data = dataset[:, :input_dim]
    y_data = dataset[:, input_dim:]
    prep_input = Preprocessor(x_data)
    x_data_pre = prep_input.apply(x_data)

    # Load the network
    filename = open("trained_FM.pickle", 'rb')
    model = pickle.load(filename)
    filename.close()

    # Generate the output
    preds = model.predict(x_data_pre)  #use keras to make prediction

    return preds
Ejemplo n.º 8
0
def main():

    # Load data
    dat = np.loadtxt("FM_dataset.dat")
    # Shuffle data
    np.random.shuffle(dat)

    # Preprocess input data
    prep_input = Preprocessor(dat)
    x = dat[:, :3]
    y = dat[:, 3:]

    train_split_idx = int(0.8 * len(x))

    split_idx = int(0.8 * len(x))

    x_train = x[:split_idx]
    y_train = y[:split_idx]
    x_val = x[split_idx:]
    y_val = y[split_idx:]

    x_train_pre = prep_input.apply(x_train)
    x_val_pre = prep_input.apply(x_val)

    # Construct neural network
    neurons = [300, 3]
    activations = ["relu", "identity"]
    net = MultiLayerNetwork(3, neurons, activations)

    trainer = Trainer(network=net,
                      batch_size=50,
                      nb_epoch=200,
                      learning_rate=0.01,
                      loss_fun="mse",
                      shuffle_flag=True)

    trainer.train(x_train_pre, y_train)

    evaluate_architecture(trainer, x_val_pre, y_val)

    illustrate_results_FM(net, prep_input)
Ejemplo n.º 9
0
def load_data(filepath):
    # load data
    dat = np.loadtxt(filepath)

    # Shuffle data
    np.random.shuffle(dat)
    x = dat[:, :3]
    y = dat[:, 3:]

    # Split data into training and validation set
    split_idx = int(0.8 * len(x))
    x_train = x[:split_idx]
    y_train = y[:split_idx]
    x_val = x[split_idx:]
    y_val = y[split_idx:]

    # Preprocess data
    prep_input = Preprocessor(dat)
    x_train_pre = prep_input.apply(x_train)
    x_val_pre = prep_input.apply(x_val)

    return x_train_pre, y_train, x_val_pre, y_val
Ejemplo n.º 10
0
def predict_hidden(dataset):
    # -------  Process data -------- #
    dataset = np.loadtxt(dataset)
    prep = Preprocessor(dataset)
    dataset = prep.apply(dataset)
    X, Y = torch.Tensor(dataset[:, 0:3]), torch.Tensor(dataset[:, 3:6])

    # ------- Instantiate model ----- #
    model = Net(3, 3, 237, 248, 106, 115)

    # ----- Load our best model ------ #
    model.load_state_dict(torch.load('best_model_reg.pth'))

    # ----- Compute the output ------ #
    results = model(X)
    results = results.detach().numpy()

    # ----- Revert data processing ------ #
    dataset[:, 3:6] = results
    dataset = prep.revert(dataset)
    prediction = dataset[:, 3:6]

    return prediction  # Returns a numpy array of shape (n_samples, 3)
Ejemplo n.º 11
0
def main(_neurons,
         _activationFunctionHidden,
         _activationFunctionOutput,
         _lossFunction,
         _batchSize,
         _learningRate,
         _numberOfEpochs,
         _writeToCSV=False,
         _hyperparameterTuning=False):
    dataset = np.loadtxt("ROI_dataset.dat")

    #######################################################################
    #                       ** START OF YOUR CODE **
    #######################################################################
    # Setup hyperparameters and neural network
    input_dim = 3  # CONSTANT: Stated in specification

    np.random.shuffle(dataset)
    #numOfRows = int(0.8*dataset.shape[0])
    #output = predict_hidden(dataset[:numOfRows, :])
    #print(output)
    # Separate data columns into x (input features) and y (output)
    x = dataset[:, :input_dim]
    y = dataset[:, input_dim:]

    split_idx = int(0.8 * len(x))

    # Split data by rows into a training set and a validation set. We then augment the training data into the desired proportions
    x_train = x[:split_idx]
    y_train = y[:split_idx]
    # Validation dataset
    x_val = x[split_idx:]
    y_val = y[split_idx:]

    # Apply preprocessing to the data
    x_prep_input = Preprocessor(x_train)
    #y_prep_input = Preprocessor(y_train)

    x_train_pre = x_prep_input.apply(x_train)
    #y_train_pre = y_prep_input.apply(y_train)
    y_train_pre = y_train

    x_val_pre = x_prep_input.apply(x_val)
    #y_val_pre = y_prep_input.apply(y_val)
    y_val_pre = y_val

    seed = 7
    np.random.seed(seed)

    if _hyperparameterTuning == True:
        #create model
        model = KerasClassifier(build_fn=create_model,
                                nb_epoch=_numberOfEpochs,
                                batch_size=_batchSize)

        # Use scikit-learn to grid search - these are all possible paramaters, takes a long time so I only left in few values
        batch_size = [16, 32, 128]  #32
        epochs = [10, 50, 250]  #10, 100, 250, 500, 1000?
        learn_rate = [1e-1, 1e-3, 1e-6]
        neurons = [5, 15, 20, 50]
        hidden_layers = [3, 5, 10, 25]

        param_grid = dict(epochs=epochs,
                          batch_size=batch_size,
                          learn_rate=learn_rate,
                          neurons=neurons,
                          hidden_layers=hidden_layers)

        #perform grid search with 10-fold cross validation
        grid = RandomizedSearchCV(estimator=model,
                                  param_distributions=param_grid,
                                  n_jobs=-1,
                                  cv=10)

        grid_result = grid.fit(x_train_pre, y_train_pre)

        print("Best: %f using %s" %
              (grid_result.best_score_, grid_result.best_params_))
        best_model = grid.best_estimator_.model

        # Evaluate the neural network
        preds = best_model.predict(x_val_pre)
        targets = y_val_pre
        accuracy, confusionMatrix, labelDict = evaluate_architecture(
            targets, preds)

        # Optional: Print results
        print(confusionMatrix)
        for i in range(len(labelDict)):
            key = "label" + str(i + 1)
            print(key, labelDict[key])
        print("Accuracy: ", accuracy)

        # Optional: Append x and y values, to be plotted at the end
        global xValues, yValues
        xValues.append(len(neurons) - 1)
        for i in range(len(labelDict)):
            key = "label" + str(i + 1)
            metric = "f1"
            yValues[i].append(labelDict[key][metric])
            yValues[len(yValues) - 1].append(accuracy)

        filename = 'trained_ROI.pickle'
        pickle.dump(best_model, open(filename, 'wb'))

    else:
        model = create_model()
        history = model.fit(x_train_pre,
                            y_train_pre,
                            batch_size=_batchSize,
                            epochs=numberOfEpochs,
                            verbose=1,
                            validation_data=(x_val_pre, y_val_pre))

        #model.fit(x_train_pre,y_train_pre)
        score = model.evaluate(x_val_pre, y_val_pre, verbose=0)
        print('Test loss:', score[0])
        print('Test accuracy:', score[1])

        # Evaluate the neural network
        preds = model.predict(x_val_pre)
        targets = y_val_pre
        accuracy, confusionMatrix, labelDict = evaluate_architecture(
            targets, preds)

        # Optional: Print results
        print(confusionMatrix)
        for i in range(len(labelDict)):
            key = "label" + str(i + 1)
            print(key, labelDict[key])

        print("Accuracy: ", accuracy)

    #predict hidden dataset using best model
    predictions = predict_hidden(dataset)
    print(predictions)
Ejemplo n.º 12
0
def config_and_train(training, validation, iterator):
    #def config_and_train(dataset, iterator):

    batchsize = iterator[0]
    neurons = iterator[1]
    epochs = iterator[2]
    learning_rate = iterator[3]
    dropout = iterator[4]

    # Confgure the dataset
    np.random.shuffle(training)
    np.random.shuffle(validation)

    #x = dataset[:, :3]
    #y = dataset[:, 3:]

    #split_index = int(0.8 * len(x))

    x_training = training[:, :3]
    y_training = training[:, 3:]
    x_validation = validation[:, :3]
    y_validation = validation[:, 3:]

    prep_input = Preprocessor(x_training)

    x_train_pre = prep_input.apply(x_training)
    x_val_pre = prep_input.apply(x_validation)

    x_dim = x_training.shape[1]  # get number of input neurons
    y_dim = y_training.shape[1]  # get number of output neurons

    # Create the network
    model = torch.nn.Sequential()

    model.add_module("dense1", torch.nn.Linear(x_dim, neurons))
    model.add_module("sig1", torch.nn.Sigmoid())
    model.add_module("dropout1", torch.nn.Dropout(dropout))
    model.add_module("dense2", torch.nn.Linear(neurons, y_dim))

    # define an optimiser and learning rate
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)
    # Look into Adam?

    # define a loss function (MSE needed for regression problem)
    loss_function = nn.MSELoss()

    # convert numpy array into torch tensors
    x_training_pre = torch.from_numpy(x_train_pre).float()
    y_training = torch.from_numpy(y_training).float()
    x_validation_pre = torch.from_numpy(x_val_pre).float()
    y_validation = torch.from_numpy(y_validation).float()

    # Set up the dataset to use the dataloader
    tensor_training_set = TensorDataset(x_training_pre, y_training)
    train_dataloader = DataLoader(tensor_training_set,
                                  batch_size=batchsize,
                                  shuffle=True)

    training_samples = x_training.shape[0]
    training_batches = training_samples / batchsize

    valid_ds = TensorDataset(x_validation_pre, y_validation)
    valid_dataloader = DataLoader(valid_ds, batch_size=batchsize * 2)

    for i in range(epochs):

        #print("Epoch Number: " + str(i))
        # Shuffle the data

        current_loss = 0
        # set the model to training mode
        model.train()
        for xb, yb in train_dataloader:
            pred = model(xb)
            #loss = loss_function(pred, (torch.max(yb, 1)[1]))
            loss = loss_function(pred, yb)

            loss.backward()

            optimizer.step()
            optimizer.zero_grad()

            #print("Epoch: %d, cost: %f, accuracy: %.2f" % (i, current_loss/training_batches, loss_function(model(xb), torch.max(yb, 1)[1]))

            #illustrate_results_FM(model, prep_input)
    print(loss_function(model(xb), yb))
    #print("Training End")

    x_prime = x_val_pre

    # Evaluate in the hyperparam function
    mse, evs, r2 = evaluate_architecture(model, x_validation_pre, y_validation)

    # return the model, the mean squared error, the explained variance score and the r2
    return model, mse, evs, r2
Ejemplo n.º 13
0
def main(_neurons,
         _activationFunctionHidden,
         _activationFunctionOutput,
         _lossFunction,
         _batchSize,
         _learningRate,
         _numberOfEpochs,
         _writeToCSV=False,
         _hyperparameterTuning=False):

    dataset = np.loadtxt("FM_dataset.dat")

    #######################################################################
    #                       ** START OF YOUR CODE **
    #######################################################################

    input_dim = 3  # CONSTANT: Stated in specification

    #shuffle the data
    np.random.shuffle(dataset)

    # Separate data columns into x (input features) and y (output)
    x = dataset[:, :input_dim]
    y = dataset[:, input_dim:]

    split_idx = int(0.8 * len(x))

    # Split data by rows into a training set and a validation set
    x_train = x[:split_idx]
    y_train = y[:split_idx]
    x_val = x[split_idx:]
    y_val = y[split_idx:]

    # Apply preprocessing to the data
    x_prep_input = Preprocessor(x_train)
    y_prep_input = Preprocessor(y_train)

    x_train_pre = x_prep_input.apply(x_train)
    y_train_pre = y_prep_input.apply(y_train)

    x_val_pre = x_prep_input.apply(x_val)
    y_val_pre = y_prep_input.apply(y_val)

    # fix random seed for reproducibility
    seed = 7
    np.random.seed(seed)

    if _hyperparameterTuning == True:

        #create model
        model = KerasRegressor(build_fn=create_model,
                               nb_epoch=_numberOfEpochs,
                               batch_size=_batchSize)

        # Use scikit-learn to grid search
        batch_size = [32]
        epochs = [100, 250, 500, 1000]  #10, 100, 250, 500, 1000?
        learn_rate = [1e-3]
        neurons = [5]
        hidden_layers = [3]
        #activation =  ['relu', 'sigmoid'] #tanh

        #optimizer = [ 'SGD', 'RMSprop', 'Adam']
        #dropout_rate = [0.0, 0.5, 0.9]

        param_grid = dict(epochs=epochs,
                          batch_size=batch_size,
                          learn_rate=learn_rate,
                          neurons=neurons,
                          hidden_layers=hidden_layers)

        #perform grid search with 10-fold cross validation
        grid = GridSearchCV(estimator=model,
                            param_grid=param_grid,
                            n_jobs=-1,
                            cv=5)

        grid_result = grid.fit(x_train_pre, y_train_pre)

        #summarize results of hyperparameter search
        print("Best: %f using %s" %
              (grid_result.best_score_, grid_result.best_params_))
        means = grid_result.cv_results_['mean_test_score']
        stds = grid_result.cv_results_['std_test_score']
        params = grid_result.cv_results_['params']
        for mean, stdev, param in zip(means, stds, params):
            print("%f (%f) with: %r" % (mean, stdev, param))

        #extract the best model
        best_model = grid.best_estimator_.model

        #Evaluate the best model
        preds = best_model.predict(x_val_pre)
        targets = y_val_pre
        mse = evaluate_architecture(targets, preds)
        print("Mean squared error of best model:", mse)

        #save the best model
        filename = 'trained_FM.pickle'
        pickle.dump(best_model, open(filename, 'wb'))

    else:

        model = create_model()
        history = model.fit(x_train_pre,
                            y_train_pre,
                            batch_size=_batchSize,
                            epochs=numberOfEpochs,
                            verbose=1,
                            validation_data=(x_val_pre, y_val_pre))

        #model.fit(x_train_pre,y_train_pre)
        score = model.evaluate(x_val_pre, y_val_pre, verbose=0)
        print('Test loss:', score[0])
        print('Test accuracy:', score[1])

    #predict hidden dataset using best model
    predictions = predict_hidden(dataset)
    print(predictions)