def run_test_model(): basic_NN = JTDNN() input = basic_NN.input(input_dims=(2, None)) Z1 = layers.Linear(output_dims=(10, None), initialiser="glorot", name="linear")(input) A1 = activations.Relu(Z1, name='relu') Z2 = layers.Linear(output_dims=(5, None), initialiser="glorot", name="Henry")(A1) A2 = activations.Relu(Z2, name='relu') Z3 = layers.Linear(output_dims=(1, None), initialiser="glorot")( A2) #name shoud be automatically set to "Henry2" output = activations.Sigmoid(Z3, name='sigmoid') optimiser = optimisers.GradientDesc(learning_rate=0.001) basic_NN.compile( input=input, output=output, lambd=0.01, loss="BinaryCrossEntropy", optimiser=optimiser) # BGD stands for Batch Gradient Descent #Basic_NN.fit(X, Y, num_iterations = 10000, verbose = 1) num_iterations = 10000 for _ in range(num_iterations): basic_NN.forward_prop(X) basic_NN.compute_cost(Y) basic_NN.back_prop() basic_NN.update_weights()
def __init__(self, num_in, num_out, args): # number of in and out neurons self.num_in = num_in self.num_out = num_out # set activation function if 'activation' in args: self.activation = args['activation']() else: self.activation = activations.Sigmoid() # set learning rate if 'eta' in args: self.eta = args['eta'] else: # some default value self.eta = 1 / num_in # when initializing the weights it makes sense to peak the Gaussian # distribution to not saturate the output of the first layer which # typically happens for number of input neurons self.W = np.random.randn(num_out, num_in) self.b = np.random.randn(num_out, 1) self.delta_W = np.zeros_like(self.W) self.delta_b = np.zeros_like(self.b) self.da = np.zeros( (self.b.size, self.b.size) ) return
def test_fully_connected_NN(): basic_NN = JTDNN() input = basic_NN.input(input_dims=(2, None)) Z1 = layers.Linear(output_dims=(10, None), initialiser="glorot", name="linear")(input) A1 = activations.Sigmoid(Z1, name='sigmoid') Z2 = layers.Linear(output_dims=(5, None), initialiser="glorot", name="linear")(A1) A2 = activations.Sigmoid(Z2, name='sigmoid') Z3 = layers.Linear(output_dims=(1, None), initialiser="glorot", name="linear")(A2) output = activations.Sigmoid(Z3, name='sigmoid') print(f'basic_NN.graph_lis {basic_NN.graph_lis}') #['linear1'] print(f'basic_NN.graph_dict {basic_NN.graph_dict}' ) # "linear1 <layers.linear object> print(f'output.jtdnn_obj {output.jtdnn_obj}') print(f'output.output_size {output.output_size}')
def __init__(self, num_in, num_out, args): # number of in and out neurons self.num_in = num_in self.num_out = num_out # set activation function if 'activation' in args: self.activation = args['activation']() else: self.activation = activations.Sigmoid() # set learning rate if 'eta' in args: self.eta = args['eta'] else: # some default value self.eta = 0.1 # set parameter weight function if 'weight' in args: self.params_weight_funct = args['weight']() else: # use L2 weight as default self.params_weight_funct = weights.L2() # set matrix learning rate if 'weight' in args: self.params_omega = args['omega'] else: # some default value self.params_omega = 0.1 # initialize weights to zero self.W = np.zeros((num_out, num_in)) self.b = np.zeros((num_out, 1)) # or according to the choices if 'init' in args: if args['init'] == "norm": # initialize weights with normal random numbers self.W = np.random.randn(num_out, num_in) self.b = np.random.randn(num_out, 1) elif args['init'] == "norm_adapt": # initialize weights with normal random numbers # but reduce the variance in order not to saturate # the neurons self.W = np.random.randn(num_out, num_in) / np.sqrt(num_in) self.b = np.random.randn(num_out, 1) self.delta_W = np.zeros_like(self.W) self.delta_b = np.zeros_like(self.b) self.da = np.zeros((self.b.size, self.b.size)) return
def __init__(self, f=''): # Forward computations self.x = None # layer input signal self.z = None # weighted sum (forward) self.a = None # activation (forward) self.f = None # Set activation if f == 'sigmoid': self.f = activations.Sigmoid() elif f == 'tanh': self.f = activations.Tanh() elif f == 'softmax': self.f = activations.Softmax() else: raise Exception('Currently need to specify activation.') # Back computations self.delta = None # delta for this layer
def activation_forward(self,input,W,b,activation_type): ''' :param input: the input of the current layer :param W: the weights of the current layer :param b: biases of the current layer :param activation_type: Type of activation function used in the forward propagation :return: - A --> the output of the activation function - packet_of_packets --> Tuple of 2 elements which will be used in backward propagation : 1- linear packer : contains ( input , weights , biases ) of the current layer 2- activation packet : contains ( Z ) which is the input to the activation function ''' if activation_type == "sigmoid": Z, linear_packet = self.identity_forward(input, W, b) ## Z = input * w + b temp=activations.Sigmoid() A, activation_packet = temp.forward(Z) ## A = sig(z) elif activation_type == "relu": Z, linear_packet = self.identity_forward(input, W, b) temp = activations.relu() A, activation_packet = temp.forward(Z) elif activation_type == "leaky_relu": Z, linear_packet = self.identity_forward(input, W, b) temp = activations.leaky_relu() A, activation_packet = temp.forward(Z) elif activation_type == "tanh": Z, linear_packet = self.identity_forward(input, W, b) temp = activations.tanh() A, activation_packet = temp.forward(Z) elif activation_type == "softmax": Z, linear_packet = self.identity_forward(input, W, b) #temp = A, activation_packet = activations.Softmax().forward(Z) elif activation_type == "linear": Z, linear_packet = self.identity_forward(input, W, b) # temp = A, activation_packet = Z,Z else: raise ValueError("ERROR : Activation Function is Not Determined") packet_of_packets = linear_packet, activation_packet return A, packet_of_packets
def __init__(self, input_size=784, output_size=10, hiddens=[64,32], activations=[ac.Sigmoid(), ac.Sigmoid(), ac.Sigmoid()], \ criterion=ac.SoftmaxCrossEntropy(),\ lr=0.008, momentum=0.856, num_bn_layers=1): self.train_mode = True self.num_bn_layers = num_bn_layers self.bn = num_bn_layers > 0 self.nlayers = len(hiddens) + 1 self.input_size = input_size self.output_size = output_size self.activations = activations self.criterion = criterion self.lr = lr self.momentum = momentum self.W = None self.b = None # self.weight_init_fn = weight_init_fn # self.bias_init_fn = bias_init_fn if self.bn: self.bn_layers = [ bn.BatchNorm(hiddens[t]) for t in range(0, num_bn_layers) ] self.loss = None self.firstinit = True self.hiddens = hiddens self.zerosW = None self.zerosb = None self.batch_size = 10 self.epochs = 40 self.training_loss = [] self.validation_acc = []
def activation_backward(self, delta_A, packet_of_packets, activation_type, lambd): ''' :param delta_A: the derivative of the loss function w.r.t the activation function :param packet_of_packets: Tuple of 2 elements which will be used in backward propagation : 1- linear packer : contains ( input , weights , biases ) of the current layer 2- activation packet : contains ( Z ) which is the input to the activation function :param activation_type: the type of the activation function used in this layer :param lambd: regularization parameter :return: - delta_input_previous , the gradient of the past input - delta_w : gradient of the weights of the current layer - delta_b : the gradient of the biases of the current layer ''' linear_packet, act_packet = packet_of_packets if activation_type == "relu": #print("hi") temp = activations.relu() dZ = temp.backaward( delta_A, act_packet) # we have to implement this relu backward function dA_prev, dW, db = self.identity_backward(dZ, linear_packet, lambd) elif activation_type == "sigmoid": #print("hi") temp = activations.Sigmoid() dZ = temp.backward(delta_A, act_packet) dA_prev, dW, db = self.identity_backward(dZ, linear_packet, lambd) # we will start from here tomorrow , we have to deal with Y_hat , y_true while creating instance from cost class # temp = Losses.square_difference() #dA = temp.backprop_cost(self.linear_packet) elif activation_type == "softmax": temp = activations.Softmax() dZ = temp.diff(delta_A) dA_prev, dW, db = self.identity_backward(dZ, linear_packet, lambd) return dA_prev, dW, db
def test_forward_prop(): file_name = 'cubic_model.plk' basic_NN = JTDNN() input = basic_NN.input(input_dims=(2, None)) Z1 = layers.Linear(output_dims=(10, None), initialiser="glorot", name="linear")(input) #10 A1 = activations.Sigmoid(Z1, name='sigmoid') Z2 = layers.Linear(output_dims=(5, None), initialiser="glorot", name="linear")(A1) # 5 A2 = activations.Sigmoid(Z2, name='sigmoid') Z3 = layers.Linear(output_dims=(1, None), initialiser="glorot", name="linear")(A2) output = activations.Sigmoid(Z3, name='sigmoid') optimiser = optimisers.GradientDesc(learning_rate=0.01) fig_num_cost = 2 loss = losses.BinaryCrossEntropy(basic_NN, store_cost=True, fig_num=fig_num_cost) basic_NN.compile( input=input, output=output, lambd=0.01, loss=loss, metrics="Accuracy", optimiser=optimiser) # BGD stands for Batch Gradient Descent csv_file = r'C:\Users\josia\Desktop\Josiah_Folder\UNI\Semester_1\PEP1\robotics_club\YOLOv3_tiny\labelled_data2D_3.csv' plots = pd.read_csv(csv_file) plots = plots.to_numpy() X = plots[:, [0, 1]].T X, mu, sigma = feature_norm(X) Y = plots[:, -1].astype(np.uint8).reshape(1, -1) fig_num_dec = 1 for itera in range(1000000): AL = basic_NN.forward_prop(X) if itera % 10000 == 0: loss = basic_NN.compute_cost(Y) print(loss) #print('accuracy after iteration %d: %4.2f' % itera, np.mean((AL >= 0.5) == Y) * 100) basic_NN.back_prop(Y) basic_NN.update_weights() basic_NN.plot_cost(title="Cost per Iteration", xlabel="Number of number of iterations (10000s)", ylabel="Cost") print(basic_NN.get_costs()) plot_decision_boundary(X, Y, basic_NN, fig_num_dec) plt.show() #joblib.dump(basic_NN, file_name) # testing whether we can dump the object in a file #print(A1, A2, output, Z1, Z2, Z3) # prints out all the objects """ sequence generated from print statements
def test_mini_batches(): file_name = 'cubic_model.plk' basic_NN = JTDNN() input = basic_NN.input(input_dims=(2, None)) Z1 = layers.Linear(output_dims=(10, None), initialiser="glorot", name="linear")(input) #10 A1 = activations.Sigmoid(Z1, name='sigmoid') Z2 = layers.Linear(output_dims=(5, None), initialiser="glorot", name="linear")(A1) # 5 A2 = activations.Sigmoid(Z2, name='sigmoid') Z3 = layers.Linear(output_dims=(1, None), initialiser="glorot", name="linear")(A2) output = activations.Sigmoid(Z3, name='sigmoid') optimiser = optimisers.GradientDesc(learning_rate=0.001) fig_num_cost = 2 loss = losses.BinaryCrossEntropy(basic_NN, store_cost=True, fig_num=fig_num_cost) basic_NN.compile( input=input, output=output, lambd=0.01, loss=loss, metrics="Accuracy", optimiser=optimiser ) # BGD stands for Batch Gradient Descent # BGD stands for Batch Gradient Descent csv_file = r'C:\Users\josia\Desktop\Josiah_Folder\UNI\Semester_1\PEP1\robotics_club\YOLOv3_tiny\labelled_data2D_3.csv' plots = pd.read_csv(csv_file) plots = plots.to_numpy() X = plots[:, [0, 1]].T X, mu, sigma = feature_norm(X) Y = plots[:, -1].astype(np.uint8).reshape(1, -1) fig_num_dec = 1 mini_batch_size = 64 num_epoches = 10 """ for epoch in range(num_epoches): for mini_batch in mini_batch_generator(X, Y, mini_batch_size): print ("shape of mini_batch_X: " + str(mini_batch[0].shape)) print ("shape of mini_batch_Y: " + str(mini_batch[1].shape)) """ """ shape of mini_batch_X: (2, 64) shape of mini_batch_Y: (1, 64) shape of mini_batch_X: (2, 64) shape of mini_batch_Y: (1, 64) shape of mini_batch_X: (2, 64) shape of mini_batch_Y: (1, 64) shape of mini_batch_X: (2, 7) shape of mini_batch_Y: (1, 7) """ for epoch in range(num_epoches): mini_batch_num = 1 for mini_batch_X, mini_batch_Y in mini_batch_generator( X, Y, mini_batch_size): """ #random experiment here if mini_batch_X.shape[-1] != mini_batch_size: print(mini_batch_X.shape) continue """ AL = basic_NN.forward_prop(mini_batch_X) cost = basic_NN.compute_cost(mini_batch_Y) print( 'epoch %d accuracy after iteration %d: %4.2f' % (epoch, mini_batch_num, np.mean( (AL >= 0.5) == mini_batch_Y) * 100)) basic_NN.back_prop(mini_batch_Y) basic_NN.update_weights() mini_batch_num += 1 """ for itera in range(1000000): AL = basic_NN.forward_prop(X) if itera % 10000 == 0: loss = basic_NN.compute_cost(Y) print(loss) #print('accuracy after iteration %d: %4.2f' % itera, np.mean((AL >= 0.5) == Y) * 100) basic_NN.back_prop(Y) basic_NN.update_weights() """ basic_NN.plot_cost(title="Cost per Iteration", xlabel="Number of iterations", ylabel="Cost") plot_decision_boundary(X, Y, basic_NN, fig_num_dec) plt.show()
import mlp import activations as ac import mnist import numpy as np np.random.seed(11785) #initialize neural parameters learning_rate = 0.004 momentum = 0.996 #0.956 num_bn_layers = 1 mini_batch_size = 10 epochs = 40 train, val, test = mnist.load_mnist() net = mlp.MLP( 784, 10, [64, 32], [ac.Sigmoid(), ac.Sigmoid(), ac.Sigmoid()], ac.SoftmaxCrossEntropy(), learning_rate, momentum, num_bn_layers) net.fit(train, val, epochs, mini_batch_size) test_acc = net.validate(test) * 100.0 net.save(str(test_acc) + "_acc_nn_model.pkl") print("Test Accuracy: " + str(test_acc) + "%")