def calculate_loss(self, X, y): mulGate = MultiplyGate() addGate = AddGate() layer = Tanh() softmaxOutput = Softmax() input = X for i in range(len(self.W)): mul = mulGate.forward(self.W[i], input) add = addGate.forward(mul, self.b[i]) input = layer.forward(add) return softmaxOutput.loss(input, y)
def predict(self, X): mulGate = MultiplyGate() addGate = AddGate() layer = Tanh() softmaxOutput = Softmax() input = X for i in range(len(self.W)): mul = mulGate.forward(self.W[i], input) add = addGate.forward(mul, self.b[i]) input = layer.forward(add) probs = softmaxOutput.predict(input) return np.argmax(probs, axis=1)
def train(self, X, y, num_passes=1000, lr=0.01, regularization=0.01, to_print=True): # add gates m_Gate = MultiplyGate() a_Gate = AddGate() # activate nonlinear layer if self.activation_func == 'sigmoid': layer = Sigmoid() elif self.activation_func == 'tanh': layer = Tanh() # activate output layer if self.output_func == 'softmax': output = Softmax() elif self.output_func == 'lse': output = LSE() # for each epoch for epoch in range(num_passes): # Forward propagation input = X forward = [(None, None, input)] # for each layer except the last one for i in range(len(self.W)): mul = m_Gate.forward(self.W[i], input) add = a_Gate.forward(mul, self.b[i]) input = layer.forward(add) forward.append((mul, add, input)) # last output of forward propagation is an array: num_samples * num_neurons_last_layer # Back propagation # derivative of cumulative error from output layer dfunc = output.calc_diff(forward[len(forward) - 1][2], y) for i in range(len(forward) - 1, 0, -1): # 1 layer consists of mul, add and layer dadd = layer.backward(forward[i][1], dfunc) # dLdb and dLdmul are functions of dLdadd db, dmul = a_Gate.backward(forward[i][0], self.b[i - 1], dadd) dW, dfunc = m_Gate.backward(self.W[i - 1], forward[i - 1][2], dmul) # Add regularization terms (b1 and b2 don't have regularization terms) dW += regularization * self.W[i - 1] # Gradient descent parameter update self.b[i - 1] += -lr * db self.W[i - 1] += -lr * dW if to_print and epoch % 100 == 0: print("Loss after iteration %i: %f" % (epoch, self.calculate_loss(X, y)))
def calculate_loss(self, X, y): m_Gate = MultiplyGate() a_Gate = AddGate() if self.activation_func == 'sigmoid': layer = Sigmoid() elif self.activation_func == 'tanh': layer = Tanh() if self.output_func == 'softmax': output = Softmax() elif self.output_func == 'lse': output = LSE() input = X # loop through each layer for i in range(len(self.W)): # X*W mul = m_Gate.forward(self.W[i], input) # X*W + b add = a_Gate.forward(mul, self.b[i]) # nonlinear activation input = layer.forward(add) return output.eval_error(input, y)
def predict(self, X): m_Gate = MultiplyGate() a_Gate = AddGate() if self.activation_func == 'sigmoid': layer = Sigmoid() elif self.activation_func == 'tanh': layer = Tanh() if self.output_func == 'softmax': output = Softmax() elif self.output_func == 'lse': output = LSE() input = X for i in range(len(self.W)): mul = m_Gate.forward(self.W[i], input) add = a_Gate.forward(mul, self.b[i]) input = layer.forward(add) if self.output_func == 'softmax': probs = output.eval(input) return np.argmax(probs, axis=1) elif self.output_func == 'lse': return (np.greater(input, 0.5)) * 1
def train(self, X, y, num_passes=20000, epsilon=0.01, reg_lambda=0.01, print_loss=False): mulGate = MultiplyGate() addGate = AddGate() layer = Tanh() softmaxOutput = Softmax() for epoch in range(num_passes): # Forward propagation input = X forward = [(None, None, input)] for i in range(len(self.W)): mul = mulGate.forward(self.W[i], input) add = addGate.forward(mul, self.b[i]) input = layer.forward(add) forward.append((mul, add, input)) # Back propagation dtanh = softmaxOutput.diff(forward[len(forward) - 1][2], y) for i in range(len(forward) - 1, 0, -1): dadd = layer.backward(forward[i][1], dtanh) db, dmul = addGate.backward(forward[i][0], self.b[i - 1], dadd) dW, dtanh = mulGate.backward(self.W[i - 1], forward[i - 1][2], dmul) # Add regularization terms (b1 and b2 don't have regularization terms) dW += reg_lambda * self.W[i - 1] # Gradient descent parameter update self.b[i - 1] += -epsilon * db self.W[i - 1] += -epsilon * dW # write log nn_log_instance.w = self.W nn_log_instance.b = self.b nn_log_instance.forward = forward nn_log_instance.write_log() if print_loss and epoch % 1000 == 0: print("Loss after iteration %i: %f" % (epoch, self.calculate_loss(X, y)))
from tensor import Tensor from optimizer import SGD from layer import MSELoss, Linear, Tanh, Sigmoid from model import Sequential import numpy as np #Toy example of Using Tensor Class np.random.seed(0) data = Tensor(np.array([[0, 0], [0, 1], [1, 0], [1, 1]]), requires_grad=True) target = Tensor(np.array([[0], [1], [0], [1]]), requires_grad=True) #Every element in w, is an Object of Tensor representing weight matrix model = Sequential( Linear(2, 3), Tanh(), Linear(3, 3), Tanh(), Linear(3, 1), ) optim = SGD(parameters=model.get_parameters(), lr=0.1) criterion = MSELoss() for i in range(10): pred = model(data) loss = criterion(pred, target) loss.backward(Tensor(np.ones_like(loss.data), is_grad=True)) optim.step() print(loss.data) print( "------------------------------------------------------------------------")
def build_network(self, *d, **types): """ Method to build a neural network structure """ # check number of layers Nlayers = len(d) if Nlayers < 2: print( "ERROR: A neural network needs at least an input and an output layer!" ) exit(1) if types.has_key("verbose"): self.verbose = types.get("verbose") # check if the user specified the scope of the network # if not set it to classification if types.has_key("scope"): self.scope = types.get("scope") else: self.scope = "classification" # check if the user specified the types of layers # if not set to default types if types.has_key("out_type"): self.out_type = types.get("out_type") else: if d[Nlayers - 1] == 1: self.out_type = "linear" else: self.out_type = "softmax" if types.has_key("hidden_type"): self.hidden_type = types.get("hidden_type") else: if Nlayers > 2: self.hidden_type = "tanh" # add layers to the neural network # add input layers self.layers.append(Layer(d[0])) # if present, add hidden layers if Nlayers > 2: if self.hidden_type == "tanh": for i in range(1, Nlayers - 1): self.layers.append(Tanh(d[i], d[i - 1])) self.layers[i].xavier_init_weights() elif self.hidden_type == "sigmoid": for i in range(1, Nlayers - 1): self.layers.append(Sigmoid(d[i], d[i - 1])) self.layers[i].xavier_init_weights() elif self.hidden_type == "linear": for i in range(1, Nlayers - 1): self.layers.append(Linear(d[i], d[i - 1])) self.layers[i].xavier_init_weights() elif self.hidden_type == "softmax": for i in range(1, Nlayers - 1): self.layers.append(Softmax(d[i], d[i - 1])) self.layers[i].xavier_init_weights() elif self.hidden_type == "softsign": for i in range(1, Nlayers - 1): self.layers.append(SoftSign(d[i], d[i - 1])) self.layers[i].xavier_init_weights() elif self.hidden_type == "relu": for i in range(1, Nlayers - 1): self.layers.append(ReLU(d[i], d[i - 1])) self.layers[i].xavier_init_weights() else: print("ERROR: no layer with " + str(self.hidden_type) + " exist!") exit(1) # add output layer if self.out_type == "softmax": self.layers.append(Softmax(d[Nlayers - 1], d[Nlayers - 2])) self.layers[Nlayers - 1].xavier_init_weights() elif self.out_type == "sigmoid": self.layers.append(Sigmoid(d[Nlayers - 1], d[Nlayers - 2])) self.layers[Nlayers - 1].xavier_init_weights() elif self.out_type == "linear": self.layers.append(Linear(d[Nlayers - 1], d[Nlayers - 2])) self.layers[Nlayers - 1].xavier_init_weights() elif self.out_type == "tanh": self.layers.append(Tanh(d[Nlayers - 1], d[Nlayers - 2])) self.layers[Nlayers - 1].xavier_init_weights() elif self.out_type == "softsign": self.layers.append(SoftSign(d[Nlayers - 1], d[Nlayers - 2])) self.layers[Nlayers - 1].xavier_init_weights() elif self.out_type == "relu": self.layers.append(ReLU(d[Nlayers - 1], d[Nlayers - 2])) self.layers[Nlayers - 1].xavier_init_weights() else: print("ERROR: no layer with " + str(self.out_type) + " exist!") exit(1) #save number of layers self.Nlayers = Nlayers if self.verbose: self.print_network_structure()
def add_layer(self, type, dim): """ Method that adds to the network a layer of dimension dim and type type """ if type == "input": if self.Nlayers == 0: self.layers.append(Layer(dim)) self.Nlayers = len(self.layers) else: print("ERROR: the network already has an input layer!") exit(1) elif type == "linear": if self.Nlayers == 0: print("ERROR: the network needs an input layer first!") exit(1) else: self.layers.append(Linear(dim, self.layers[self.Nlayers - 1].n)) self.Nlayers = len(self.layers) self.layers[self.Nlayers - 1].xavier_init_weights() elif type == "tanh": if self.Nlayers == 0: print("ERROR: the network needs an input layer first!") exit(1) else: self.layers.append(Tanh(dim, self.layers[self.Nlayers - 1].n)) self.Nlayers = len(self.layers) self.layers[self.Nlayers - 1].xavier_init_weights() elif type == "relu": if self.Nlayers == 0: print("ERROR: the network needs an input layer first!") exit(1) else: self.layers.append(ReLU(dim, self.layers[self.Nlayers - 1].n)) self.Nlayers = len(self.layers) self.layers[self.Nlayers - 1].xavier_init_weights() elif type == "softsign": if self.Nlayers == 0: print("ERROR: the network needs an input layer first!") exit(1) else: self.layers.append( SoftSign(dim, self.layers[self.Nlayers - 1].n)) self.Nlayers = len(self.layers) self.layers[self.Nlayers - 1].xavier_init_weights() elif type == "sigmoid": if self.Nlayers == 0: print("ERROR: the network needs an input layer first!") exit(1) else: self.layers.append( Sigmoid(dim, self.layers[self.Nlayers - 1].n)) self.Nlayers = len(self.layers) self.layers[self.Nlayers - 1].xavier_init_weights() elif type == "softmax": if self.Nlayers == 0: print("ERROR: the network needs an input layer first!") exit(1) else: self.layers.append( Softmax(dim, self.layers[self.Nlayers - 1].n)) self.Nlayers = len(self.layers) self.layers[self.Nlayers - 1].xavier_init_weights() else: print("ERROR: no such layer available!") exit(1)
# now what to do with input # can't train on 1 to 100 (actual set under consideration), # so we'll train on numbers bigger than 100 def binary_encode(x: int) -> List[int]: """ 10 digit binary enconding of x """ return [x >> i & 1 for i in range(10)] # train numbers bigger than 100 inputs = np.array([binary_encode(x) for x in range(101, 1024)]) targets = np.array([fizz_buzz_encode(x) for x in range(101, 1024)]) net = NeuralNet([ Linear(input_size=10, output_size=50), Tanh(), Linear(input_size=50, output_size=4), ]) train(net, inputs, targets, num_epochs=5000) for x in range(1, 101): predicted = net.forward(binary_encode(x)) predicted_idx = np.argmax(predicted) actual_idx = np.argmax(fizz_buzz_encode(x)) labels = [str(x), "fizz", "buzz", "fizzbuzz"] print(x, labels[predicted_idx], labels[actual_idx])
Sigmoid(16, 16), Sigmoid(16, 10), ]) # train net.train(x_train, y_train, learning_rate=0.2, epochs=500) # test print('Accuracy in test set: ', net.get_accuracy(x_test, y_test)) # saving model net.save_model('model_sigmoid8.json') # neural network build net = NeuralNetwork([ Tanh(8 * 8, 16), Tanh(16, 16), Tanh(16, 10), ]) # train net.train(x_train, y_train, learning_rate=0.2, epochs=500) # test print('Accuracy in test set: ', net.get_accuracy(x_test, y_test)) # saving model net.save_model('model_tanh8.json') #loading data MNIST (x_train, y_train), (x_test, y_test) = load_data()