Beispiel #1
0
def GeneticAlgorithm_Initialize(game, children=None):
    import neural

    if not hasattr(game, 'generation'):
        setattr(game, 'generation', 1)
    else:
        game.generation += 1

    # initialize neural network for each player
    count = 0
    for player in game.players:
        count += 1

        game.drawLoadingScreen("Training players {}/{}".format(
            count, game.numberOfPlayers))
        core.pygame.event.get()

        if children:
            network = neural.NeuralNetwork(False)
            child = children.pop()
            network.training_data = child["input"]
            network.target_data = child["output"]
            network.train()
        else:
            network = neural.NeuralNetwork(True)

        setattr(player, 'neural', network)
    def __init__(self, filename = DEFAULT_BIRD_TEXTURE, scale = 1, image_x = 0, 
                    image_y = 0, image_width = 0, image_height = 0, center_x = 0, 
                    center_y = 0, repeat_count_x = 1, repeat_count_y = 1, neural_net = None):
        super().__init__(filename = filename, scale = scale, image_x = image_x, image_y = image_y, 
                            image_width = image_width, image_height = image_height, center_x = center_x, 
                            center_y = center_y, repeat_count_x = repeat_count_x, repeat_count_y = repeat_count_y)

        if neural_net == None:
            # Description on in_nodes:
            #   - y position of bird
            #   - y position of a piece of the nearest col
            #   - x position of nearest col
            #   - dy of bird
            self.neural_net = neural.NeuralNetwork(4, HIDDEN_NODES, 1)
            # self.neural_net.setActivationFunction(neural.tanh, neural.tanhDeriv)
        else:
            self.neural_net = neural_net

        # Initial conditions
        self.position = [BIRD_X_POSITION, random.randint(250, 450)]
        self.flap = False
        self.dead = False
        self.disabled = False
        self.dy = 0

        # Information for genetic algorithm
        self.score = 0
        self.fitness = 0
Beispiel #3
0
def test_mnist_nn():
    
    train_set, valid_set, test_set = load_data('..\mnist\mnist.pkl.gz')
    
    train_data = convert_data(train_set)
    valid_data = convert_data(valid_set)
    test_data = convert_data(test_set)
    
    #print('td', train_data[0:50] )
        
    nn = neural.NeuralNetwork( (784, 100, 10) )
    
    X = []
    E = []
    
    #fig = plt.figure()
    #ax = fig.add_subplot(111)
    #ax.plot(X, E)
    #plt.pause(0.01)
    
    
    
    for i in range(100):
        nn.train_SGD(train_data, batch_size=100, eta=6.0)
    
        train_err, train_count = nn.evaluate( train_data )
        valid_err, valid_count = nn.evaluate( valid_data )
        
        print('epoch:', i, 'trial_err:', train_err, 'train_count:', train_count, 'valid_err:', valid_err, 'valid_count:', valid_count, )
Beispiel #4
0
def ManualTraining_Initialize(game):
    # In this mode player teaches the neural network with the following approach:
    #  When player jumped, a new training data element is added with appropriate inputs and "1" as output data;
    #  Every [MT_TRAINING_DATA_FRAMERATE] frames, a new training data element is added with appropriate inputs and "0" as output data;
    #  The process continues for the first [MT_PIPES_TO_LEARN] pipes. After that, the keyboard input is disabled and the neural network takes control.
    import neural

    for player in game.players:
        setattr(player, 'neural', neural.NeuralNetwork())
        setattr(player, 'learning', True)
Beispiel #5
0
    def __init__(self,net=None):
        super(NeuralBrain, self).__init__()
        if net is None:
            self.net=neural.NeuralNetwork(config.default_neural_shape,None,config.default_symmetry_mat)
        else:
            self.net=net

 #for jsonpickle human readable form

        self._id=self.net.id
        self._name="BN"+str(self._id).zfill(3)
Beispiel #6
0
def minimizeW(iterations, x, y, alpha, inputVal):
    # return outputs for input data set

    NN = neural.NeuralNetwork()
    for i in range(iterations):
        dw1, dw2 = NN.costFunctionPrime(x, y)
        NN.w1 = NN.w1 - alpha * dw1
        NN.w2 = NN.w2 - alpha * dw2
        # print(NN.costFunction(X, y))
    Y = NN.forward(inputVal)
    return Y
Beispiel #7
0
 def evaluate(self, individual):
     weights = individual[0]
     point1 = self.num_input_nodes * self.num_hidden_nodes
     point2 = self.num_input_nodes * self.num_hidden_nodes + (
         self.num_hidden_nodes *
         (self.num_hidden_layers - 1) * self.num_hidden_nodes)
     input = weights[:point1]
     hidden = weights[point1:point2]
     output = weights[point2:len(weights)]
     nn = neural.NeuralNetwork(1)
     error = nn.forward_propagate(input, hidden, output)
     return error
Beispiel #8
0
 def final_eval(self, individual, databreak):
     weights = individual[0]
     point1 = self.num_input_nodes * self.num_hidden_nodes
     point2 = self.num_input_nodes * self.num_hidden_nodes + (
         self.num_hidden_nodes *
         (self.num_hidden_layers - 1) * self.num_hidden_nodes)
     input = weights[:point1]
     hidden = weights[point1:point2]
     output = weights[point2:len(weights)]
     nn = neural.NeuralNetwork(databreak)
     # net = nn.create_network(input,hidden,output)
     error = nn.final_eval(input, hidden, output)
     return error
Beispiel #9
0
    def __init__(self, players = 1, demo_mode = False, neural_network_to_load = neural.NeuralNetwork(4, HIDDEN_NODES, 1)):
        super().__init__(SCREEN_WIDTH, SCREEN_HEIGHT, TITLE)

        # These are the sprite lists that contain the bird/pipe objects
        self.bird_list = None
        self.pipes = None

        # Helpful genetic algorithm information.
        self.generation = 0
        self.max_score = 0
        self.global_max = 0
        self.global_reached = False
        self.player_count = players

        # This is if you are going to be testing your AI.
        self.demo_mode = demo_mode
        self.demo_network = neural_network_to_load

        # Data structures to keep track of dead birds for genetic algorithm.
        self.dead_birds_dict = {}
        self.dead_birds_array = []

        # Keeps track of which bird is doing the best in the current run.
        self.best_bird = None
Beispiel #10
0
for row in rows:
    values = [float(x) for x in row.split(',')]
    features.append(
        values[:-2])  # Ignore last two columns (median value and bias)
    labels.append(values[-2:-1])  # Only median value

# Split data in training and testing
X_train, X_test, y_train, y_test = [], [], [], []
for i in range(len(features)):
    if random.random() > 0.25:
        X_train.append(features[i])
        y_train.append(labels[i])
    else:
        X_test.append(features[i])
        y_test.append(labels[i])
X_train = np.array(X_train, dtype=np.float128).T
y_train = np.array(y_train, dtype=np.float128).T
X_test = np.array(X_test, dtype=np.float128).T
y_test = np.array(y_test, dtype=np.float128).T

print(X_train.shape)
print(y_train.shape)

# First train
nn = neural.NeuralNetwork([13, 8, 5, 1],
                          activations=['sigmoid', 'sigmoid', 'sigmoid'])

nn.train(X_train, y_train, epochs=1000, batch_size=64, lr=10)

nn.evaluate(X_test, y_test)
Beispiel #11
0
		X_train.append(features[i])
		y_train.append(labels[i])
	else:
		X_test.append(features[i])
		y_test.append(labels[i])
X_train = np.array(X_train, dtype=np.float128).T
y_train = np.array(y_train, dtype=np.float128).T
X_test = np.array(X_test, dtype=np.float128).T
y_test = np.array(y_test, dtype=np.float128).T

print(X_train.shape)
print(y_train.shape)


# First train
nn = neural.NeuralNetwork([7, 5, 3],activations=['sigmoid', 'sigmoid'])

nn.train(X_train, y_train, epochs=1000, batch_size=64, lr = 0.1)

# Evaluate
print(y_test)
_, output = nn.feed_forward(X_test)
y_prime = [x.index(max(x)) for x in output]
percent_errors = []
for i in range(len(y)):
	percent_errors.append(abs((y_prime[i] - y[i]) / y[i]))

mean_error = sum(percent_errors) / len(percent_errors)
print("Mean percent error: {0:.2f}%".format(float((mean_error * 100).astype(str))))
print("Max error: {0:.2f}%".format(float((max(percent_errors) * 100).astype(str))))
print("Min error: {0:.2f}%".format(float((min(percent_errors) * 100).astype(str))))
import numpy
import neural
from tqdm import tqdm
import matplotlib.pyplot

# params
input_nodes = 28 * 28
hidden_layers = 3
hidden_nodes = 100
output_nodes = 10
learning_rate = 0.01
epochs = 15

# initialize network
nn = neural.NeuralNetwork(input_nodes, hidden_layers, hidden_nodes,
                          output_nodes, learning_rate)

# read training data
training_data_file = open("mnist_dataset/mnist_train.csv", 'r')
training_data_list = training_data_file.readlines()
training_data_file.close()

# train for all training data
loss = []
for e in range(epochs):
    print("epoch = ", e + 1)
    for record in tqdm(training_data_list):
        all_values = record.split(',')
        inputs = (numpy.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
        targets = numpy.zeros(output_nodes) + 0.01
        targets[int(record[0])] = 0.99
Beispiel #13
0
def test_my_nn():
    import sys

    #random.seed(0)
    np.random.seed(0)

    my_train_data = [ ( np.array([x]), np.asarray([[y]])) for x, y in \
                  zip(train_features.values, train_targets['cnt'].values)]

    ### Set the hyperparameters here ###
    iterations = 100000
    learning_rate = 0.1  # good: 1.0   def: 0.1
    hidden_nodes = 10  # good: 10
    output_nodes = 1
    batch_size = 128  # good: 512     def: 128

    N_i = train_features.shape[1]

    # Initialise my own neural network
    nn = neural.NeuralNetwork((N_i, hidden_nodes, 1))
    nn.activations[-1] = nn.fun_linear
    #nn.bias_mult = 0  # disable biases
    for l in range(nn.num_layers):
        nn.biases[l].fill(0)

    print('Starting')

    fig = plt.figure()
    ax = fig.gca()

    plt.grid()

    losses = {'nn_train': [], 'nn_validation': []}
    for ii in range(iterations):

        # Go through a random batch of 128 records from the training data set
        temp_shuffled = my_train_data[:]
        np.random.shuffle(temp_shuffled)
        data_batch = temp_shuffled[0:batch_size]

        # Train the network
        nn.train_batch(data_batch, learning_rate)
        #nn.train_batch( data_batch, learning_rate, lmbda = 50.0, n=len(my_train_data) )

        if ii % 100 == 0:
            # Printing out the training progress
            nn_train_loss = MSE(
                nn.forward(train_features).T, train_targets['cnt'].values)
            nn_val_loss = MSE(
                nn.forward(val_features).T, val_targets['cnt'].values)

        sum_W = 0
        for l in range(nn.num_layers):
            sum_W += np.sum(nn.weights[l])

        #if ii % 100 == 0 and learning_rate > 0.001:
        #    learning_rate *= 0.997

        sys.stdout.write("\rProgress: {:2.1f}".format(100 * ii/float(iterations)) \
                         + "% ... NN Training loss: " + str(nn_train_loss)[:5] \
                         + " ... NN Validation loss: " + str(nn_val_loss)[:5] \
                         + " ... NN LR: " + str(learning_rate)[:5] \
                         + " ... NN Sum W: " + str(sum_W) )
        sys.stdout.flush()

        losses['nn_train'].append(nn_train_loss)
        losses['nn_validation'].append(nn_val_loss)

        if True and ii % 1000 == 0 and ii > 10:

            #mean = sum(losses['nn_validation']) / len(losses['nn_validation'])

            plt.cla()
            ax.set_xticks(np.arange(0, 100000, 1000))
            ax.set_yticks(np.arange(0, 1., 0.1))
            ax.set_ylim([0, 0.4])
            plt.grid()
            plt.plot(losses['nn_train'][10:],
                     color=(0.5, 0.5, 1.0),
                     linewidth=1.0)
            plt.plot(losses['nn_validation'][10:],
                     color=(1.0, 0.5, 0.0),
                     linewidth=1.0)
            plt.pause(0.001)

    pdb.set_trace()
    def setUp(self):
       
        random.seed(0)
        np.random.seed(0)
        
        #
        #   Define weights for testing
        #
        weights_0 = np.array( [ [ 0.1, 0.4, 0.7 ],
                                [ 0.2, 0.5, 0.8 ] ], dtype=np.float32 )
                                 
        biases_0 = np.array( [ [ 0.3, 0.6, 0.9 ] ], dtype=np.float32 )
        
        weights_1 = np.array( [ [ 1.0 ], 
                                [ 1.1 ],
                                [ 1.2 ] ], dtype=np.float32)

        biases_1 = np.array( [ [ 1.3 ] ], dtype=np.float32 )
        
        #
        #   Define test data
        #
        self.data_vec = [ (  np.array( [[0.1, 0.1]] ),  np.array( [[0]] )  ),
                          (  np.array( [[0.1, 0.9]] ),  np.array( [[1]] )  ),
                          (  np.array( [[0.9, 0.1]] ),  np.array( [[1]] )  ),
                          (  np.array( [[0.9, 0.9]] ),  np.array( [[0]] )  ) ]

        self.inputs = np.array([[0.1, 0.1],
                                [0.1, 0.9],
                                [0.9, 0.1],
                                [0.9, 0.9]])
        self.targets = np.array([[0.0],
                                 [1.0],
                                 [1.0],
                                 [0.0]])
                        
                        
        if IMPLEMENTATION == 'neural':
            self.nn = neural.NeuralNetwork( (2, 3, 1), init='norm' )
        elif IMPLEMENTATION == 'mini':
            self.nn = neural_mini.NeuralNetwork2( (2, 3, 1) )
        elif IMPLEMENTATION == 'tensor':
            neural_tf.reset_default_graph()
            self.nn = neural_tf.NeuralNetworkTF( (2, 3, 1) )
        elif IMPLEMENTATION == 'keras':
            self.nn = neural_keras.NeuralKeras( (2, 3, 1) )
        elif IMPLEMENTATION == 'reference':
            self.nn = nndl.Network( (2, 3, 1) )  # reference neural network
            
            weights_0 = weights_0.T
            biases_0 = biases_0.T
            weights_1 = weights_1.T
            biases_1 = biases_1.T
            
            self.data_vec = [ (it[0].T, it[1]) for it in self.data_vec ]

        else:
            raise ValueError('Unknown implementation: ' + IMPLEMENTATION)
        
        # Make sure shapes match before asigning weights into neural network
        nn_weights_0 = self.nn.get_weights(0)
        nn_biases_0 = self.nn.get_biases(0)
        nn_weights_1 = self.nn.get_weights(1)
        nn_biases_1 = self.nn.get_biases(1)

        self.assertEqual( nn_weights_0.shape, weights_0.shape )
        self.assertEqual( nn_biases_0.shape, biases_0.shape )
        self.assertEqual( nn_weights_1.shape, weights_1.shape )
        self.assertEqual( nn_biases_1.shape, biases_1.shape )

        self.nn.set_weights(0, weights_0)
        self.nn.set_biases(0, biases_0)
        self.nn.set_weights(1, weights_1)
        self.nn.set_biases(1, biases_1)

        nn_weights_0 = self.nn.get_weights(0)
        nn_biases_0 = self.nn.get_biases(0)
        nn_weights_1 = self.nn.get_weights(1)
        nn_biases_1 = self.nn.get_biases(1)

        self.assertEqual(nn_weights_0.tolist(), weights_0.tolist())
        self.assertEqual(nn_biases_0.tolist(), biases_0.tolist())
        self.assertEqual(nn_weights_1.tolist(), weights_1.tolist())
        self.assertEqual(nn_biases_1.tolist(), biases_1.tolist())
Beispiel #15
0
                    width = int(car.size[0] * x_scale_factor)
                    height = int(car.size[1] * y_scale_factor)
                    car.scaled_image = pygame.transform.scale(
                        car.scaled_image, (width, height))
                    x, y = car.rect.center
                    car.rect = car.scaled_image.get_rect()
                    car.rect.center = (x, y)

                    window.blit(car.scaled_image, car.rect)
                    pygame.display.update()
                    found_car = True
                    break


for i in range(NUM_CARS):
    networks.append(neural.NeuralNetwork())
    cars.append(Car())

clock = pygame.time.Clock()

running = True
first_run = True
while running:
    clock.tick(10)
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            running = False
        if event.type == pygame.MOUSEMOTION:
            if pygame.mouse.get_pressed()[0] == 1:
                select_car()
Beispiel #16
0
    def Run(self, *args):
        # getting data for main problem
        dataproc = data_processing.DataProcessing()
        data = dataproc.GetMainData()
        # getting everything we need
        CDOM, CDOM_sorted, CDOM_diag_mesh, \
        ASV, ASV_ranged, \
        metadata, metadata_scaled, \
        X_ASV, y_CDOM = data
        #XGboost with scikitlearn - data with spatial component (BCC Bray distance by CDOM)
        X_CDOM = CDOM.loc[:, [
            "CDOM.x1", "CDOM.x2"
        ]]  #Molten meshgrid CDOM values for real data BCC Bray distances
        X_CDOM_diag_mesh = CDOM_diag_mesh.loc[:, [
            "CDOM.x1", "CDOM.x2"
        ]]  #Molten meshgrid CDOM values for generating predicted BCC Bray distances
        y_CDOM = CDOM.loc[:, "ASV.dist"]

        if self.paramData['type'] == 'ffnn_keras':
            # retrieving network data
            NNdata = self.PreProcessing()
            # passing parameter file
            print(self.paramData)
            '''
            Getting Network Architecture
            '''
            network = neural.NeuralNetwork(NNdata)
            # passing network architecture and create the model
            model = network.BuildModel()
            # training model
            model, history = network.TrainModel(
                model, self.X_train, self.X_test, self.Y_train_onehot,
                self.Y_test_onehot)  #self.X_norm, self.Y_onehot)
            test_loss, test_acc = model.evaluate(self.X_test,
                                                 self.Y_test_onehot)
            print('Test accuracy:', test_acc)

            # Plotting results
            self.funcs.PlotResultsKeras(history, self.paramData['type'],
                                        self.paramData['OutputPath'],
                                        self.paramData['epochs'],
                                        self.paramData['Optimization'],
                                        self.paramData['BatchSize'])

        elif self.paramData['type'] == 'snn_keras':
            # retrieving network data
            NNdata = self.PreProcessing()
            '''
            Getting Network Architecture
            '''
            network = neural.NeuralNetwork(NNdata)
            # passing network architecture and create the model
            model = network.BuildModel()
            # training model
            model, history = network.TrainModel(model, self.pairs_train,
                                                self.Y_train_onehot,
                                                self.pairs_test,
                                                self.Y_test_onehot)
            # Plotting results
            self.funcs.PlotResultsKeras(history, self.paramData['type'],
                                        self.paramData['OutputPath'],
                                        self.paramData['epochs'],
                                        self.paramData['Optimization'],
                                        self.paramData['BatchSize'])
        elif self.paramData['type'] == 'tnn_keras':
            # retrieving network data
            NNdata = self.PreProcessing()
            '''
            Getting Network Architecture
            '''
            network = neural.NeuralNetwork(NNdata)
            # passing network architecture and create the model
            model = network.BuildModel()
            # training model
            model, history = network.TrainModel(model, self.triplets_train,
                                                self.Y_train_onehot,
                                                self.triplets_test,
                                                self.Y_test_onehot)
            # Plotting results
            self.funcs.PlotResultsKeras(history, self.paramData['type'],
                                        self.paramData['OutputPath'],
                                        self.paramData['epochs'],
                                        self.paramData['Optimization'],
                                        self.paramData['BatchSize'])

        elif self.paramData['type'] == 'ffnn_manual':
            # Neural network with multiple layers - regression - BCC Bray distances by CDOM - original data
            X_CDOM = CDOM.loc[:, ["CDOM.x1", "CDOM.x2"]].to_numpy()
            y_CDOM = CDOM.loc[:, "ASV.dist"].to_numpy(
            )[:, np.newaxis]  #Original data
            '''
            NN_reg_original = neural.NeuralNetworkML(X_CDOM, y_CDOM,
                                                     trainingShare=0.80,
                                                     n_hidden_layers=3,
                                                     n_hidden_neurons=[2000, 1000, 500],
                                                     n_categories=1,
                                                     epochs=10, batch_size=10,
                                                     eta=1e-8,
                                                     lmbd=0, fixed_LR=False,
                                                     method="regression",
                                                     activation="sigmoid",
                                                     seed = self.paramData['RandomSeed'])
            '''
            n_hidden_neurons = []
            for layer in range(self.paramData['NHiddenLayers']):
                n_hidden_neurons.append(self.paramData['NHiddenNeurons'])
            for layer in range(1, self.paramData['NHiddenLayers'], 1):
                n_hidden_neurons[layer] = int(n_hidden_neurons[layer - 1] / 2)
            #print(n_hidden_neurons)

            NN_reg_original = neural.NeuralNetworkML(
                X_CDOM,
                y_CDOM,
                trainingShare=1 - self.paramData['TestSize'],
                n_hidden_layers=self.paramData['NHiddenLayers'],
                n_hidden_neurons=n_hidden_neurons,
                n_categories=1,
                epochs=self.paramData['epochs'],
                batch_size=self.paramData['BatchSize'],
                eta=self.paramData['alpha'],
                lmbd=0,
                fixed_LR=False,
                method="regression",
                activation="sigmoid",
                seed=self.paramData['RandomSeed'])

            NN_reg_original.train()
            # Plotting results
            self.funcs.PlotResultsManualFFNN(NN_reg_original, CDOM,
                                             self.paramData['type'],
                                             self.paramData['OutputPath'],
                                             self.paramData['epochs'],
                                             self.paramData['BatchSize'])
        elif self.paramData['type'] == 'xgb':
            X_train, X_test, y_train, y_test = train_test_split(
                X_CDOM,
                y_CDOM,
                train_size=1 - self.paramData['TestSize'],
                test_size=self.paramData['TestSize'],
                random_state=self.paramData['RandomSeed'])
            # initialising xgboosting
            xgboosting = xgb.XGBoosting()
            model = xgboosting.RunModel(X_train, X_test, y_train, y_test,
                                        X_CDOM, X_CDOM_diag_mesh, CDOM,
                                        CDOM_sorted,
                                        self.paramData['OutputPath'])
            #Get best model by test MSE
            XGboost_best_model_index = model.best_iteration
            XGboost_best_iteration = model.get_booster().best_ntree_limit
            MSE_per_epoch = model.evals_result()

            # make predictions for test data
            y_pred = model.predict(X_test, ntree_limit=XGboost_best_iteration)
            y_pred_train = model.predict(X_train)
            #predictions = [round(value) for value in y_pred]

            best_prediction = model.predict(X_CDOM,
                                            ntree_limit=XGboost_best_iteration)
            CDOM_pred = best_prediction.copy(
            )  #CDOM_pred.shape: (2556,) CDOM_pred are the predicted BCC Bray distances for CDOM value pairs
            CDOM_pred_fine_mesh = model.predict(
                X_CDOM_diag_mesh, ntree_limit=XGboost_best_iteration)
            '''
            y_pred,\
            y_pred_train,\
            MSE_per_epoch,\
            CDOM_pred, \
            CDOM_pred_fine_mesh, \
            XGboost_best_model_index = xgboosting.RunModel(X_train, X_test,
                                                        y_train, y_test,
                                                        X_CDOM, X_CDOM_diag_mesh,
                                                        CDOM, CDOM_sorted,
                                                        self.paramData['OutputPath'])
            '''
            # plotting 3d plots and mse for XGBoost
            self.funcs.PlotResultsXGBoost(CDOM, CDOM_sorted, X_CDOM_diag_mesh,
                                          CDOM_pred_fine_mesh, CDOM_pred,
                                          self.paramData['OutputPath'], y_pred,
                                          y_pred_train, MSE_per_epoch, y_train,
                                          y_test, XGboost_best_model_index)
        elif self.paramData['type'] == 'rf_main':
            rf = random_forest.RandomForest()
            # Laurent
            population_size, metadata = rf.read_data(False, False)
            predictions, test_y, ML_ = rf.prepare_data(
                population_size, metadata, self.paramData['TestSize'],
                self.paramData['RandomSeed'])
            all_predictions = rf.predict_all_metadata(population_size,
                                                      metadata, ML_)

            # we will compare the outcome with xgboost
            def MergeTable(var_list, metadata_variables):
                table = pd.DataFrame(np.concatenate((var_list), axis=1))
                table.columns = metadata_variables
                return table

            def PredictMetadata(ASV_table, metadata_variables, train_size,
                                test_size, seed):
                X_ASV = ASV_table
                X_ASV.columns = [''] * len(X_ASV.columns)
                X_ASV = X_ASV.to_numpy()
                metadata_list = []
                for i in metadata_variables:
                    #y_CDOM = metadata.loc[:, i][:, np.newaxis]

                    # split data into train and test sets
                    y_meta = metadata.loc[:, i]  #Requires 1d array
                    X_train, X_test, y_train, y_test = train_test_split(
                        X_ASV,
                        y_meta,
                        train_size=train_size,
                        test_size=test_size,
                        random_state=seed)

                    # fit model no training data
                    model = XGBRegressor(objective='reg:squarederror')
                    model.fit(X_train,
                              y_train,
                              eval_set=[(X_train, y_train), (X_test, y_test)],
                              eval_metric='rmse',
                              early_stopping_rounds=100,
                              verbose=False)

                    #Get best model by test MSE
                    XGboost_best_model_index = model.best_iteration
                    XGboost_best_iteration = model.get_booster(
                    ).best_ntree_limit

                    # make predictions for full dataset
                    y_pred = model.predict(X_ASV,
                                           ntree_limit=XGboost_best_iteration)
                    metadata_list.append(y_pred[:, np.newaxis])
                return MergeTable(metadata_list, metadata_variables)

            var_list = [
                "Latitude", "Longitude", "Altitude", "Area", "Depth",
                "Temperature", "Secchi", "O2", "CH4", "pH", "TIC", "SiO2",
                "KdPAR"
            ]
            train_size = 1 - self.paramData['TestSize']
            test_size = self.paramData['TestSize']
            seed = self.paramData['RandomSeed']
            predicted_metadata = PredictMetadata(ASV, var_list, train_size,
                                                 test_size, seed)

            with pd.option_context('display.max_rows', None,
                                   'display.max_columns',
                                   None):  # more options can be specified also
                print(predicted_metadata)

        elif self.paramData['type'] == 'rf_side':
            # retrieving network data
            NNdata = self.PreProcessing()
            rf = random_forest.RandomForest()
            seed = self.paramData['RandomSeed']
            clfs, scores_test, scores_train = rf.predict_t(
                self.X_train, self.X_test, self.y_train_l, self.y_test_l, seed)

        elif self.paramData['type'] == 'all':
            '''
            Neural Network
            '''
            # Neural network with multiple layers - regression - BCC Bray distances by CDOM - original data
            X_CDOM = CDOM.loc[:, ["CDOM.x1", "CDOM.x2"]].to_numpy()
            y_CDOM = CDOM.loc[:, "ASV.dist"].to_numpy(
            )[:, np.newaxis]  #Original data

            n_hidden_neurons = []
            for layer in range(self.paramData['NHiddenLayers']):
                n_hidden_neurons.append(self.paramData['NHiddenNeurons'])
            for layer in range(1, self.paramData['NHiddenLayers'], 1):
                n_hidden_neurons[layer] = int(n_hidden_neurons[layer - 1] / 2)
            #print(n_hidden_neurons)

            NN_reg_original = neural.NeuralNetworkML(
                X_CDOM,
                y_CDOM,
                trainingShare=1 - self.paramData['TestSize'],
                n_hidden_layers=self.paramData['NHiddenLayers'],
                n_hidden_neurons=n_hidden_neurons,
                n_categories=1,
                epochs=self.paramData['epochs'],
                batch_size=self.paramData['BatchSize'],
                eta=self.paramData['alpha'],
                lmbd=0,
                fixed_LR=False,
                method="regression",
                activation="sigmoid",
                seed=self.paramData['RandomSeed'])

            NN_reg_original.train()

            x_mesh = np.log10(
                np.arange(min(CDOM.loc[:, "CDOM.x1"]),
                          max(CDOM.loc[:, "CDOM.x2"]) + 0.01, 0.01)) + 1
            y_mesh = x_mesh.copy()
            x_mesh, y_mesh = np.meshgrid(x_mesh, y_mesh)
            X_CDOM_mesh = self.funcs.pdCat(
                x_mesh.ravel()[:, np.newaxis],
                y_mesh.ravel()[:, np.newaxis]).to_numpy()
            best_prediction = NN_reg_original.model_prediction(
                X_CDOM_mesh,
                NN_reg_original.accuracy_list.index(
                    min(NN_reg_original.accuracy_list)))

            x_mesh = np.arange(min(CDOM.loc[:, "CDOM.x1"]),
                               max(CDOM.loc[:, "CDOM.x2"]) + 0.01, 0.01)
            y_mesh = x_mesh.copy()
            x_mesh, y_mesh = np.meshgrid(x_mesh, y_mesh)

            ff_pred_original = best_prediction.copy()
            ff_pred_original = np.reshape(ff_pred_original, (363, 363))
            ff_pred_original[x_mesh - y_mesh == 0] = np.nan
            ff_pred_original[x_mesh > y_mesh] = np.nan
            '''
            XGBoost part
            '''
            X_CDOM = CDOM.loc[:, [
                "CDOM.x1", "CDOM.x2"
            ]]  #Molten meshgrid CDOM values for real data BCC Bray distances
            X_CDOM_diag_mesh = CDOM_diag_mesh.loc[:, [
                "CDOM.x1", "CDOM.x2"
            ]]  #Molten meshgrid CDOM values for generating predicted BCC Bray distances
            y_CDOM = CDOM.loc[:, "ASV.dist"]

            X_train, X_test, y_train, y_test = train_test_split(
                X_CDOM,
                y_CDOM,
                train_size=1 - self.paramData['TestSize'],
                test_size=self.paramData['TestSize'],
                random_state=self.paramData['RandomSeed'])
            # initialising xgboosting
            xgboosting = xgb.XGBoosting()
            model = xgboosting.RunModel(X_train, X_test, y_train, y_test,
                                        X_CDOM, X_CDOM_diag_mesh, CDOM,
                                        CDOM_sorted,
                                        self.paramData['OutputPath'])

            #Get best model by test MSE
            XGboost_best_model_index = model.best_iteration
            XGboost_best_iteration = model.get_booster().best_ntree_limit
            MSE_per_epoch = model.evals_result()

            # make predictions for test data
            y_pred = model.predict(X_test, ntree_limit=XGboost_best_iteration)
            y_pred_train = model.predict(X_train)
            #predictions = [round(value) for value in y_pred]

            best_prediction = model.predict(X_CDOM,
                                            ntree_limit=XGboost_best_iteration)
            CDOM_pred = best_prediction.copy(
            )  #CDOM_pred.shape: (2556,) CDOM_pred are the predicted BCC Bray distances for CDOM value pairs
            CDOM_pred_fine_mesh = model.predict(
                X_CDOM_diag_mesh, ntree_limit=XGboost_best_iteration)
            '''
            Simple OLS - generating design matrix out of data set etc.
            '''
            reg = regression.Regression()
            X_mesh = reg.GenerateMesh(
                0.21, 3.83, 0.21, 3.83, 0.01, 0.01, log_transform=True
            )  # The low number of points on the higher end of the gradient causes distortions for linear regression
            X_mesh_degree_list = reg.DesignMatrixList(X_mesh[0], X_mesh[1],
                                                      12)[1:]
            X_degree_list = reg.DesignMatrixList(CDOM.loc[:, "CDOM.x1"],
                                                 CDOM.loc[:,
                                                          "CDOM.x2"], 12)[1:]
            X_degree_list_subset = []

            z = CDOM_pred  #XGboost-predicted values
            z = CDOM.loc[:, "ASV.dist"]  #Original data
            #ebv_no_resampling = reg.generate_error_bias_variance_without_resampling(X_degree_list, 1)
            #ebv_resampling = reg.generate_error_bias_variance_with_resampling(X_degree_list, 1, 100)
            #reg.ebv_by_model_complexity(ebv_resampling)
            #reg.training_vs_test(ebv_no_resampling)

            CDOM_pred_reg = X_mesh_degree_list[8] @ reg.beta_SVD(
                X_degree_list[8], CDOM_pred)
            #print(pd.DataFrame(X_mesh_degree_list[1]))
            #print(CDOM_pred_reg)
            #with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
            #  print(pd.DataFrame(CDOM_pred_reg))

            x_mesh_reg = np.arange(min(CDOM.loc[:, "CDOM.x1"]),
                                   max(CDOM.loc[:, "CDOM.x2"]) + 0.01, 0.01)
            y_mesh_reg = x_mesh_reg.copy()
            x_mesh_reg, y_mesh_reg = np.meshgrid(x_mesh_reg, y_mesh_reg)
            X_CDOM_mesh = self.funcs.pdCat(x_mesh_reg.ravel()[:, np.newaxis],
                                           y_mesh_reg.ravel()[:, np.newaxis])
            #print(pd.DataFrame(X_CDOM_mesh))
            #print("CDOM_pred_reg.shape", CDOM_pred_reg.shape)
            z_CDOM_mesh_pred = np.reshape(
                CDOM_pred_reg, (x_mesh_reg.shape[0], x_mesh_reg.shape[0]))
            z_CDOM_mesh_pred[x_mesh_reg - y_mesh_reg == 0] = np.nan
            z_CDOM_mesh_pred[x_mesh_reg > y_mesh_reg] = np.nan
            '''
            Neural Network with data from XGBoost
            '''
            X_CDOM = CDOM.loc[:, ["CDOM.x1", "CDOM.x2"]].to_numpy()
            y_CDOM = CDOM_pred[:, np.newaxis]  #Predicted data from XGboost

            n_hidden_neurons = []
            for layer in range(self.paramData['NHiddenLayers']):
                n_hidden_neurons.append(self.paramData['NHiddenNeurons'])
            for layer in range(1, self.paramData['NHiddenLayers'], 1):
                n_hidden_neurons[layer] = int(n_hidden_neurons[layer - 1] / 2)
            #print(n_hidden_neurons)

            NN_reg = neural.NeuralNetworkML(
                X_CDOM,
                y_CDOM,
                trainingShare=1 - self.paramData['TestSize'],
                n_hidden_layers=self.paramData['NHiddenLayers'],
                n_hidden_neurons=n_hidden_neurons,
                n_categories=1,
                epochs=self.paramData['epochs'],
                batch_size=self.paramData['BatchSize'],
                eta=self.paramData['alpha'],
                lmbd=0,
                fixed_LR=False,
                method="regression",
                activation="sigmoid",
                seed=self.paramData['RandomSeed'])

            NN_reg.train()
            test_predict = NN_reg.predict(NN_reg.XTest)
            print(NN_reg.accuracy_list)

            #Use log-transformed CDOM values for creating design matrix, then plot on original values
            x_mesh = np.log10(
                np.arange(min(CDOM.loc[:, "CDOM.x1"]),
                          max(CDOM.loc[:, "CDOM.x2"]) + 0.01, 0.01)) + 1
            y_mesh = x_mesh.copy()
            x_mesh, y_mesh = np.meshgrid(x_mesh, y_mesh)
            X_CDOM_mesh = self.funcs.pdCat(
                x_mesh.ravel()[:, np.newaxis],
                y_mesh.ravel()[:, np.newaxis]).to_numpy()
            best_prediction = NN_reg.model_prediction(
                X_CDOM_mesh,
                NN_reg.accuracy_list.index(min(NN_reg.accuracy_list)))

            x_mesh = np.arange(min(CDOM.loc[:, "CDOM.x1"]),
                               max(CDOM.loc[:, "CDOM.x2"]) + 0.01, 0.01)
            y_mesh = x_mesh.copy()
            x_mesh, y_mesh = np.meshgrid(x_mesh, y_mesh)

            ff_pred = best_prediction.copy()
            ff_pred = np.reshape(ff_pred, (363, 363))
            ff_pred[x_mesh - y_mesh == 0] = np.nan
            ff_pred[x_mesh > y_mesh] = np.nan
            '''
            Plotting 3d graphs for all data
            '''
            fontsize = 6
            #Compare raw data to XGboost, neural network predicted data and XGboost predicted data smoothed with neural network
            fig = plt.figure(figsize=plt.figaspect(0.5))
            ax = fig.add_subplot(2, 3, 1, projection='3d')
            ax.set_title("BCC Bray distances by sites' DOM", fontsize=fontsize)
            #plt.subplots_adjust(left=0, bottom=0, right=2, top=2, wspace=0, hspace=0)
            ax.view_init(elev=30.0, azim=300.0)
            surf = ax.plot_trisurf(CDOM.loc[:, "CDOM.x1"],
                                   CDOM.loc[:, "CDOM.x2"],
                                   CDOM.loc[:, "ASV.dist"],
                                   cmap='viridis',
                                   edgecolor='none')
            # Customize the z axis.
            ax.set_zlim(0.3, 1)
            ax.zaxis.set_major_locator(LinearLocator(10))
            ax.zaxis.set_major_formatter(FormatStrFormatter("%.02f"))
            ax.tick_params(labelsize=8)
            ax.set_zlabel(zlabel="Bray distance")
            ax.set_ylabel(ylabel="DOM site 2")
            ax.set_xlabel(xlabel="DOM site 1")

            # Set up the axes for the second plot
            ax = fig.add_subplot(2, 3, 2, projection='3d')
            #ax.set_title("XGboost-Predicted BCC Bray distances by sites' CDOM, dataset CDOM coordinates", fontsize=8)
            ax.set_title(
                "XGboost-Predicted BCC \n Bray distances by sites' DOM",
                fontsize=fontsize)
            ax.view_init(elev=30.0, azim=300.0)

            # Plot the surface.
            ax.plot_trisurf(
                CDOM.loc[:, "CDOM.x1"],
                CDOM.loc[:, "CDOM.x2"],
                CDOM_pred,  #197109 datapoints
                cmap='viridis',
                edgecolor='none')

            # Customize the z axis.
            z_range = (np.nanmax(CDOM_pred) - np.nanmin(CDOM_pred))
            ax.set_zlim(np.nanmin(CDOM_pred) - z_range, 1)
            ax.zaxis.set_major_locator(LinearLocator(10))
            ax.zaxis.set_major_formatter(FormatStrFormatter("%.02f"))
            ax.tick_params(labelsize=8)
            ax.set_zlabel(zlabel="Bray distance")
            ax.set_ylabel(ylabel="DOM site 2")
            ax.set_xlabel(xlabel="DOM site 1")

            # Set up the axes for the third plot
            ax = fig.add_subplot(2, 3, 3, projection='3d')
            #ax.set_title("OLS (SVD) regression-predicted BCC Bray distances by sites' CDOM, CDOM 0.01 step meshgrid", fontsize=6)
            ax.set_title(
                "OLS (SVD) regression-predicted \n BCC Bray distances by sites' DOM",
                fontsize=fontsize)
            ax.view_init(elev=30.0, azim=300.0)

            # Plot the surface.
            ax.plot_trisurf(
                x_mesh_reg.ravel(),
                y_mesh_reg.ravel(),
                z_CDOM_mesh_pred.ravel(),
                cmap='viridis',  #197109 datapoints
                vmin=np.nanmin(z_CDOM_mesh_pred),
                vmax=np.nanmax(z_CDOM_mesh_pred),
                edgecolor='none')

            # Customize the z axis.
            z_range = (np.nanmax(z_CDOM_mesh_pred) -
                       np.nanmin(z_CDOM_mesh_pred))
            ax.set_zlim(np.nanmin(z_CDOM_mesh_pred) - z_range, 1)
            ax.zaxis.set_major_locator(LinearLocator(10))
            ax.zaxis.set_major_formatter(FormatStrFormatter("%.02f"))
            ax.tick_params(labelsize=8)
            ax.set_zlabel(zlabel="Bray distance")
            ax.set_ylabel(ylabel="DOM site 2")
            ax.set_xlabel(xlabel="DOM site 1")

            # Set up the axes for the fourth plot
            ax = fig.add_subplot(2, 3, 4, projection='3d')
            #ax.set_title("NN-smoothed XGboost-predicted BCC Bray distances by sites' CDOM, CDOM 0.01 step meshgrid", fontsize=6)
            ax.set_title(
                "NN-smoothed XGboost-predicted \n BCC Bray distances by sites' DOM",
                fontsize=fontsize)
            ax.view_init(elev=30.0, azim=300.0)

            # Plot the surface.
            ax.plot_trisurf(
                x_mesh.ravel(),
                y_mesh.ravel(),
                ff_pred.ravel(),  #197109 datapoints
                cmap='viridis',
                edgecolor='none',
                vmin=np.nanmin(ff_pred),
                vmax=np.nanmax(ff_pred))

            # Customize the z axis.
            z_range = (np.nanmax(ff_pred) - np.nanmin(ff_pred))
            ax.set_zlim(np.nanmin(ff_pred) - z_range, 1)
            ax.zaxis.set_major_locator(LinearLocator(10))
            ax.zaxis.set_major_formatter(FormatStrFormatter("%.02f"))
            ax.tick_params(labelsize=8)
            ax.set_zlabel(zlabel="Bray distance")
            ax.set_ylabel(ylabel="DOM site 2")
            ax.set_xlabel(xlabel="DOM site 1")

            # Set up the axes for the fifth plot
            ax = fig.add_subplot(2, 3, 5, projection='3d')
            #ax.set_title("NN-predicted BCC Bray distances by sites' CDOM, CDOM 0.01 step meshgrid", fontsize=8)
            ax.set_title("NN-predicted BCC Bray \n distances by sites' DOM",
                         fontsize=fontsize)
            ax.view_init(elev=30.0, azim=300.0)

            # Plot the surface.
            ax.plot_trisurf(
                x_mesh.ravel(),
                y_mesh.ravel(),
                ff_pred_original.ravel(),  #197109 datapoints
                cmap='viridis',
                edgecolor='none',
                vmin=np.nanmin(ff_pred_original),
                vmax=np.nanmax(ff_pred_original))

            # Customize the z axis.
            z_range = (np.nanmax(ff_pred_original) -
                       np.nanmin(ff_pred_original))
            ax.set_zlim(np.nanmin(ff_pred_original) - z_range, 1)
            ax.zaxis.set_major_locator(LinearLocator(10))
            ax.zaxis.set_major_formatter(FormatStrFormatter("%.02f"))
            ax.tick_params(labelsize=8)
            ax.set_zlabel(zlabel="Bray distance")
            ax.set_ylabel(ylabel="DOM site 2")
            ax.set_xlabel(xlabel="DOM site 1")

            # Set up the axes for the sixth plot
            ax = fig.add_subplot(2, 3, 6, projection='3d')
            #ax.set_title("XGboost-predicted BCC Bray distances by sites' CDOM, CDOM 0.01 step meshgrid", fontsize=8)
            ax.set_title(
                "XGboost-predicted BCC Bray \n distances by sites' DOM",
                fontsize=fontsize)
            ax.view_init(elev=30.0, azim=300.0)

            # Plot the surface.
            ax.plot_trisurf(
                X_CDOM_diag_mesh.loc[:, "CDOM.x1"],
                X_CDOM_diag_mesh.loc[:, "CDOM.x2"],
                CDOM_pred_fine_mesh,  #197109 datapoints
                cmap='viridis',
                edgecolor='none')

            # Customize the z axis.
            z_range = (np.nanmax(CDOM_pred_fine_mesh) -
                       np.nanmin(CDOM_pred_fine_mesh))
            ax.set_zlim(np.nanmin(CDOM_pred_fine_mesh) - z_range, 1)
            ax.zaxis.set_major_locator(LinearLocator(10))
            ax.zaxis.set_major_formatter(FormatStrFormatter("%.02f"))
            ax.tick_params(labelsize=8)
            ax.set_zlabel(zlabel="Bray distance")
            ax.set_ylabel(ylabel="DOM site 2")
            ax.set_xlabel(xlabel="DOM site 1")

            #filename = self.paramData['OutputPath']
            filename = self.paramData[
                'OutputPath'] + '/' + 'everything_3d' + '.png'
            fig.savefig(filename)

            plt.show()
Beispiel #17
0
#my_seed = 95
#random.seed(my_seed)
#np.random.seed(my_seed)


# Load data
def load_data(path):
    f = open(path, 'r')
    features = []
    labels = []
    rows = f.readlines()
    for row in rows[0:200]:
        values = [float(x) for x in row.split(',')]
        features.append(values[1:])  # Ignore first column
        labels.append(values[0:1])  # Only label
    return np.array(features,
                    dtype=np.float128).T, np.array(labels, dtype=np.float128).T


# Split data in training and testing
X_train, y_train = load_data('mnist_train.csv')
X_test, y_test = load_data('mnist_test.csv')

# First train
nn = neural.NeuralNetwork([784, 100, 1], activations=['sigmoid', 'sigmoid'])

nn.train(X_train, y_train, epochs=100, batch_size=200, lr=0.1)

nn.evaluate(X_test, y_test)
nn.evaluate(X_train, y_train)
 '''
     
 if (nLayers > 2):
     
     '''
     Switching to Neural Network
     '''
     
     
     
     #m = np.size(Y_train)
     print('Neural Network')
     # passing configuration
     neuralNet = neural.NeuralNetwork(NNType, NNArch, \
                                      nLayers, nFeatures, \
                                      nHidden, nOutput, \
                                      epochs, alpha, \
                                      lmbd, nInput, seed, BatchSize, optimization)
     
     
     '''
     1. Add Parallelisation
     2. Add plots
     3. Save all the values for scores etc. to table or whatever
     '''
     #myResult = Parallel(n_jobs=nproc, verbose=10)(delayed(DoGriSearchClassification)\
     # (logisticData, i, j, alpha, lmbd) for i, alpha in enumerate(alphas) for j, lmbd in enumerate(lambdas))
     
     
     
     #print(type(BatchSize))
    def test_speed(self):

        inputs = 2
        hidden = 256
        outputs = 3

        dims = (inputs, hidden, outputs)

        if IMPLEMENTATION == 'neural':
            self.nn = neural.NeuralNetwork( dims, init='norm' )
        elif IMPLEMENTATION == 'mini':
            self.nn = neural_mini.NeuralNetwork2( dims )
        elif IMPLEMENTATION == 'tensor':
            neural_tf.reset_default_graph()
            self.nn = neural_tf.NeuralNetworkTF( dims )
        elif IMPLEMENTATION == 'keras':
            self.nn = neural_keras.NeuralKeras( dims )
        elif IMPLEMENTATION == 'reference':
            self.nn = nndl.Network( dims )  # reference neural network
            
            self.data_vec = [ (it[0].T, it[1]) for it in self.data_vec ]
        else:
            raise ValueError('Unknown implementation: ' + IMPLEMENTATION)

        

        # data = np.random.uniform(-1.0, 1.0, size=(100000, 10))
        # labels = np.random.randint(0, 2, size=(100000, 10)).astype(float)

        # self.nn.train_SGD(data, labels, batch_size=100, eta=0.001)

        dtype = np.float32

        data = np.random.uniform(-1.0, 1.0, size=(1000000, 2)).astype(dtype)
        labels = np.random.randint(0, 2, size=(1000000, 3)).astype(dtype)

        print(data.dtype)
        print(labels.dtype)

        

        ts = time.time()

        for i in range(1000):
            chunk_start = i*1000
            chunk_end = i*1000 + 1000
            self.nn.forward(data[chunk_start:chunk_end])

        span = time.time() - ts

        print('Predict time:', span)



        ts = time.time()

        for i in range(1000):
            chunk_start = i*1000
            chunk_end = i*1000 + 1000
            self.nn.train_batch(data[chunk_start:chunk_end], 
                                labels[chunk_start:chunk_end], eta=0.001)

        span = time.time() - ts

        print('Training time:', span)
Beispiel #20
0
rows = f.readlines()
for row in rows:
	values = [float(x) for x in row.split(',')]
	features.append(values[:-1]) # Ignore last column
	labels.append(values[-1:]) # Only label

# Split data in training and testing
X_train, X_test, y_train, y_test = [], [], [], []
for i in range(len(features)):
	if random.random() > 0.25:
		X_train.append(features[i])
		y_train.append(labels[i])
	else:
		X_test.append(features[i])
		y_test.append(labels[i])
X_train = np.array(X_train, dtype=np.float128).T
y_train = np.array(y_train, dtype=np.float128).T
X_test = np.array(X_test, dtype=np.float128).T
y_test = np.array(y_test, dtype=np.float128).T

print(X_train.shape)
print(y_train.shape)


# First train
nn = neural.NeuralNetwork([7, 5, 1],activations=['sigmoid', 'relu'])

nn.train(X_train, y_train, epochs=1000, batch_size=64, lr = 0.1)

nn.evaluate(X_test, y_test)