def tanhexp(xTrain, xTest, yTrain, yTest, hots): print('EXPERIMENTO CON FUNCION DE ACTIVACION TANH') # Primera red neuronal nn = NeuralNetwork(7, [5, 4], 2, 3) nn.set_activation([Tanh(), Tanh(), Tanh()]) nn.set_learning_rate([0.6, 0.6, 0.6]) train_plot_cm(nn, xTrain, xTest, yTrain, yTest, epochs=60, encoding_dict=hots) # Segunda red neuronal nn = NeuralNetwork(7, [5, 4], 2, 3) nn.set_activation([Tanh(), Tanh(), Tanh()]) nn.set_learning_rate([0.3, 0.3, 0.3]) train_plot_cm(nn, xTrain, xTest, yTrain, yTest, epochs=60, encoding_dict=hots) # Tercera red neuronal nn = NeuralNetwork(7, [5, 4], 2, 3) nn.set_activation([Tanh(), Tanh(), Tanh()]) nn.set_learning_rate([0.2, 0.2, 0.2]) train_plot_cm(nn, xTrain, xTest, yTrain, yTest, epochs=60, encoding_dict=hots)
def test_set_activation(self): self.assertEqual(Sigmoid().__class__, self.nn.layers[0].get_activation().__class__) self.assertEqual(Sigmoid().__class__, self.nn.layers[1].get_activation().__class__) self.assertEqual(Sigmoid().__class__, self.nn.layers[2].get_activation().__class__) self.nn.set_activation([Tanh(), Step(), Tanh()]) self.assertEqual(Tanh().__class__, self.nn.layers[0].get_activation().__class__) self.assertEqual(Step().__class__, self.nn.layers[1].get_activation().__class__) self.assertEqual(Tanh().__class__, self.nn.layers[2].get_activation().__class__)
def twohl(xTrain, xTest, yTrain, yTest, hots): print('EXPERIMENTO CON DOS CAPAS ESCONDIDAS') # Primera red neuronal nn = NeuralNetwork(7, [5, 4], 2, 3) nn.set_activation([Sigmoid(), Sigmoid(), Sigmoid()]) nn.set_learning_rate([0.6, 0.6, 0.6]) train_plot_cm(nn, xTrain, xTest, yTrain, yTest, epochs=50, encoding_dict=hots) # Segunda red neuronal nn = NeuralNetwork(7, [5, 4], 2, 3) nn.set_activation([Tanh(), Tanh(), Tanh()]) nn.set_learning_rate([0.6, 0.6, 0.6]) train_plot_cm(nn, xTrain, xTest, yTrain, yTest, epochs=50, encoding_dict=hots) # Tercera red neuronal nn = NeuralNetwork(7, [5, 4], 2, 3) nn.set_activation([Sigmoid(), Sigmoid(), Sigmoid()]) nn.set_learning_rate([0.3, 0.3, 0.3]) train_plot_cm(nn, xTrain, xTest, yTrain, yTest, epochs=50, encoding_dict=hots) # Cuarta red neuronal nn = NeuralNetwork(7, [10, 10], 2, 3) nn.set_activation([Sigmoid(), Sigmoid(), Sigmoid()]) nn.set_learning_rate([0.6, 0.6, 0.6]) train_plot_cm(nn, xTrain, xTest, yTrain, yTest, epochs=50, encoding_dict=hots) # Quinta red neuronal nn = NeuralNetwork(7, [1, 2], 2, 3) nn.set_activation([Sigmoid(), Sigmoid(), Sigmoid()]) nn.set_learning_rate([0.6, 0.6, 0.6]) train_plot_cm(nn, xTrain, xTest, yTrain, yTest, epochs=50, encoding_dict=hots)
def run_all_model(train_input, train_target, test_input, test_target, Sample_number, save_plot=False): # Define constants along the test hidden_nb = 25 std = 0.1 eta = 3e-1 batch_size = 200 epochs_number = 1000 # Model 1. No dropout; constant learning rate (SGD) print('\nModel 1: Optimizer: SGD; No dropout; ReLU; CrossEntropy') # Define model name for plots mname = 'Model1' # Define structure of the network linear_1 = Linear(2, hidden_nb) relu_1 = Relu() linear_2 = Linear(hidden_nb, hidden_nb) relu_2 = Relu() linear_3 = Linear(hidden_nb, hidden_nb) relu_3 = Relu() linear_4 = Linear(hidden_nb, 2) loss = CrossEntropy() model_1 = Sequential(linear_1, relu_1, linear_2, relu_2, linear_3, relu_3, linear_4, loss=CrossEntropy()) # Initialize weights model_1.normalize_parameters(mean=0, std=std) # Define optimizer optimizer = Sgd(eta) # Train model my_loss_1 = train_model(model_1, train_input, train_target, optimizer, epochs_number, Sample_number, batch_size) # Evalute model and produce plots model_1_perf = evaluate_model(model_1, train_input, train_target, test_input, test_target, my_loss_1, save_plot, mname=mname) # Model 2. No dropout; decreasing learning rate (DecreaseSGD) print('\nModel 2: Optimizer: DecreaseSGD; No dropout; ReLU; CrossEntropy') # Define model name for plots mname = 'Model2' # Define structure of the network linear_1 = Linear(2, hidden_nb) relu_1 = Relu() linear_2 = Linear(hidden_nb, hidden_nb) relu_2 = Relu() linear_3 = Linear(hidden_nb, hidden_nb) relu_3 = Relu() linear_4 = Linear(hidden_nb, 2) model_2 = Sequential(linear_1, relu_1, linear_2, relu_2, linear_3, relu_3, linear_4, loss=CrossEntropy()) # Initialize weights model_2.normalize_parameters(mean=0, std=std) # Define optimizer optimizer = DecreaseSGD(eta) # Train model my_loss_2 = train_model(model_2, train_input, train_target, optimizer, epochs_number, Sample_number, batch_size) # Evalute model and produce plots model_2_perf = evaluate_model(model_2, train_input, train_target, test_input, test_target, my_loss_2, save_plot, mname=mname) # Model 3. No dropout; Adam Optimizer print('\nModel 3: Optimizer: Adam; No dropout; ReLU; CrossEntropy') # Define model name for plots mname = 'Model3' # Custom hyperparameters eta_adam = 1e-3 epochs_number_adam = 500 # Define structure of the network linear_1 = Linear(2, hidden_nb) relu_1 = Relu() linear_2 = Linear(hidden_nb, hidden_nb) relu_2 = Relu() linear_3 = Linear(hidden_nb, hidden_nb) relu_3 = Relu() linear_4 = Linear(hidden_nb, 2) loss = CrossEntropy() model_3 = Sequential(linear_1, relu_1, linear_2, relu_2, linear_3, relu_3, linear_4, loss=CrossEntropy()) # Initialize weights model_3.normalize_parameters(mean=0, std=std) # Define optimizer optimizer = Adam(eta_adam, 0.9, 0.99, 1e-8) # Train model my_loss_3 = train_model(model_3, train_input, train_target, optimizer, epochs_number_adam, Sample_number, batch_size) # Evalute model and produce plots model_3_perf = evaluate_model(model_3, train_input, train_target, test_input, test_target, my_loss_3, save_plot, mname=mname) # PLOT TO COMPARE OPTIMIZERS if save_plot: fig = plt.figure(figsize=(10, 4)) plt.plot(range(0, epochs_number), my_loss_1, linewidth=1) plt.plot(range(0, epochs_number), my_loss_2, linewidth=1) plt.plot(range(0, epochs_number_adam), my_loss_3, linewidth=1) plt.legend(["SGD", "Decreasing SGD", "Adam"]) plt.title("Loss") plt.xlabel("Epochs") plt.savefig('output/compare_optimizers.pdf', bbox_inches='tight') plt.close(fig) # Model 4. Dropout; SGD print('\nModel 4: Optimizer: SGD; Dropout; ReLU; CrossEntropy') # Define model name for plots mname = 'Model4' # Define structure of the network dropout = 0.15 linear_1 = Linear(2, hidden_nb) relu_1 = Relu() linear_2 = Linear(hidden_nb, hidden_nb, dropout=dropout) relu_2 = Relu() linear_3 = Linear(hidden_nb, hidden_nb, dropout=dropout) relu_3 = Relu() linear_4 = Linear(hidden_nb, 2) model_4 = Sequential(linear_1, relu_1, linear_2, relu_2, linear_3, relu_3, linear_4, loss=CrossEntropy()) # Initialize weights model_4.normalize_parameters(mean=0, std=std) # Define optimizer optimizer = Sgd(eta) # Train model my_loss_4 = train_model(model_4, train_input, train_target, optimizer, epochs_number, Sample_number, batch_size) # Evalute model and produce plots model_4_perf = evaluate_model(model_4, train_input, train_target, test_input, test_target, my_loss_4, save_plot, mname=mname) # PLOT TO COMPARE DROPOUT AND NO DROPOUT if save_plot: fig = plt.figure(figsize=(10, 4)) plt.plot(range(0, epochs_number), my_loss_1, linewidth=1) plt.plot(range(0, epochs_number), my_loss_4, linewidth=1) plt.legend(["Without Dropout", "With Dropout"]) plt.title("Loss") plt.xlabel("Epochs") plt.savefig('output/compare_dropout.pdf', bbox_inches='tight') plt.close(fig) print('\nEvaluation of different activation functions\n') # Model 5. No Dropout; SGD; Tanh print('\nModel 5: Optimizer: SGD; No dropout; Tanh; CrossEntropy') # Define model name for plots mname = 'Model5' # Define structure of the network linear_1 = Linear(2, hidden_nb) relu_1 = Tanh() linear_2 = Linear(hidden_nb, hidden_nb) relu_2 = Tanh() linear_3 = Linear(hidden_nb, hidden_nb) relu_3 = Tanh() linear_4 = Linear(hidden_nb, 2) model_5 = Sequential(linear_1, relu_1, linear_2, relu_2, linear_3, relu_3, linear_4, loss=CrossEntropy()) # Initialize weights model_5.normalize_parameters(mean=0, std=std) # Define optimizer optimizer = Sgd(eta) # Train model my_loss_5 = train_model(model_5, train_input, train_target, optimizer, epochs_number, Sample_number, batch_size) # Evalute model and produce plots model_5_perf = evaluate_model(model_5, train_input, train_target, test_input, test_target, my_loss_5, save_plot, mname=mname) # Model 6. Xavier Initialization print( '\nModel 6: Optimizer: SGD; No dropout; Tanh; Xavier initialization; CrossEntropy' ) # Define model name for plots mname = 'Model6' # Define network structure linear_1 = Linear(2, hidden_nb) relu_1 = Tanh() linear_2 = Linear(hidden_nb, hidden_nb) relu_2 = Tanh() linear_3 = Linear(hidden_nb, hidden_nb) relu_3 = Tanh() linear_4 = Linear(hidden_nb, 2) model_6 = Sequential(linear_1, relu_1, linear_2, relu_2, linear_3, relu_3, linear_4, loss=CrossEntropy()) model_6.xavier_parameters() optimizer = Sgd() # Train model my_loss_6 = train_model(model_6, train_input, train_target, optimizer, epochs_number, Sample_number, batch_size) # Evalute model and produce plots model_6_perf = evaluate_model(model_6, train_input, train_target, test_input, test_target, my_loss_6, save_plot, mname=mname) # Model 7. Sigmoid print('\nModel 7: Optimizer: SGD; No dropout; Sigmoid; CrossEntropy') # Define model name for plots mname = 'Model7' # Define parameter for sigmoid activation p_lambda = 0.1 # Define network structure linear_1 = Linear(2, hidden_nb) relu_1 = Sigmoid(p_lambda) linear_2 = Linear(hidden_nb, hidden_nb) relu_2 = Sigmoid(p_lambda) linear_3 = Linear(hidden_nb, hidden_nb) relu_3 = Sigmoid(p_lambda) linear_4 = Linear(hidden_nb, 2) model_7 = Sequential(linear_1, relu_1, linear_2, relu_2, linear_3, relu_3, linear_4, loss=CrossEntropy()) model_7.normalize_parameters(mean=0.5, std=1) optimizer = Sgd(eta=0.5) # Train model my_loss_7 = train_model(model_7, train_input, train_target, optimizer, epochs_number, Sample_number, batch_size) # Evalute model and produce plots model_7_perf = evaluate_model(model_7, train_input, train_target, test_input, test_target, my_loss_7, save_plot, mname=mname) # PLOT TO COMPARE EFFECT OF DIFFERENT ACTIVATIONS if save_plot: fig = plt.figure(figsize=(10, 4)) plt.plot(range(0, epochs_number), my_loss_1, linewidth=0.5) plt.plot(range(0, epochs_number), my_loss_5, linewidth=0.5, alpha=0.8) plt.plot(range(0, epochs_number), my_loss_6, linewidth=0.5, alpha=0.8) plt.plot(range(0, epochs_number), my_loss_7, linewidth=0.5) plt.legend(["Relu", "Tanh", "Tanh (Xavier)", "Sigmoid"]) plt.title("Loss") plt.xlabel("Epochs") plt.savefig('output/compare_activations.pdf', bbox_inches='tight') plt.close(fig) print('\nEvaluation of base model with MSE loss\n') # Model 8. MSE loss print('\nModel 8: Optimizer: SGD; No dropout; Relu; MSE') # Define model name for plots mname = 'Model8' linear_1 = Linear(2, hidden_nb) relu_1 = Relu() linear_2 = Linear(hidden_nb, hidden_nb) relu_2 = Relu() linear_3 = Linear(hidden_nb, hidden_nb) relu_3 = Relu() linear_4 = Linear(hidden_nb, 2) loss = LossMSE() model_8 = Sequential(linear_1, relu_1, linear_2, relu_2, linear_3, relu_3, linear_4, loss=loss) model_8.normalize_parameters(mean=0, std=std) optimizer = Sgd(eta) # Train model my_loss_8 = train_model(model_8, train_input, train_target, optimizer, epochs_number, Sample_number, batch_size) # Evalute model and produce plots model_8_perf = evaluate_model(model_8, train_input, train_target, test_input, test_target, my_loss_8, save_plot, mname=mname) print('Evaluation done! ') train_loss = torch.tensor([ model_1_perf[0], model_2_perf[0], model_3_perf[0], model_4_perf[0], model_5_perf[0], model_6_perf[0], model_7_perf[0], model_8_perf[0] ]) train_error = torch.tensor([ model_1_perf[1], model_2_perf[1], model_3_perf[1], model_4_perf[1], model_5_perf[1], model_6_perf[1], model_7_perf[1], model_8_perf[1] ]) test_loss = torch.tensor([ model_1_perf[2], model_2_perf[2], model_3_perf[2], model_4_perf[2], model_5_perf[2], model_6_perf[2], model_7_perf[2], model_8_perf[2] ]) test_error = torch.tensor([ model_1_perf[3], model_2_perf[3], model_3_perf[3], model_4_perf[3], model_5_perf[3], model_6_perf[3], model_7_perf[3], model_8_perf[3] ]) return train_loss, train_error, test_loss, test_error
from Activation import Tanh from Gates import AddGate, MultiplyGate mulGate = MultiplyGate() addGate = AddGate() activation = Tanh() class Layer: def forward(self, x, prev, U, W, V): self.mulu = mulGate.forward(U, x) self.mulw = mulGate.forward(W, prev) self.add = addGate.forward(self.mulw, self.mulu) self.s = activation.forward(self.add) self.mulv = mulGate.forward(V, self.s) def backward(self, x, prev, U, W, V, diff, dmulv): self.forward(x, prev, U, W, V) dV, dsv = mulGate.backward(V, self.s, dmulv) ds = dsv + diff dadd = activation.backward(self.add, ds) dmulw, dmulu = addGate.backward(self.mulw, self.mulu, dadd) dW, dprev = mulGate.backward(W, prev, dmulw) dU, dx = mulGate.backward(U, x, dmulu) return (dprev, dU, dW, dV)
############################################################### if args.load: train_conv = False train_fc = True weights_conv = args.load weights_fc = args.load else: train_conv = False train_fc = True weights_conv = '../vgg_weights/vgg_weights.npy' weights_fc = None if args.act == 'tanh': act = Tanh() elif args.act == 'relu': act = Relu() else: assert (False) ############################################################### dropout_rate = tf.placeholder(tf.float32, shape=()) learning_rate = tf.placeholder(tf.float32, shape=()) l0 = Convolution(input_sizes=[batch_size, 224, 224, 3], filter_sizes=[3, 3, 3, 64], num_classes=num_classes, init_filters=args.init, strides=[1, 1, 1, 1],
def test_set_activation(self): self.assertEqual(Sigmoid().__class__, self.layer1.get_activation().__class__) self.layer1.set_activation(Tanh()) self.assertEqual(Tanh().__class__, self.layer1.get_activation().__class__)
############################################## tf.set_random_seed(0) tf.reset_default_graph() batch_size = tf.placeholder(tf.int32, shape=()) XTRAIN = tf.placeholder(tf.float32, [None, 32, 32, 3]) YTRAIN = tf.placeholder(tf.float32, [None, 100]) XTRAIN = tf.map_fn(lambda frame: tf.image.per_image_standardization(frame), XTRAIN) XTEST = tf.placeholder(tf.float32, [None, 32, 32, 3]) YTEST = tf.placeholder(tf.float32, [None, 100]) XTEST = tf.map_fn(lambda frame1: tf.image.per_image_standardization(frame1), XTEST) l0 = Convolution(input_sizes=[batch_size, 32, 32, 3], filter_sizes=[5, 5, 3, 96], num_classes=100, init_filters=args.init, strides=[1, 1, 1, 1], padding="SAME", alpha=ALPHA, activation=Tanh(), last_layer=False) l1 = MaxPool(size=[batch_size, 32, 32, 96], ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="SAME") l2 = FeedbackConv(size=[batch_size, 16, 16, 96], num_classes=100, sparse=sparse, rank=rank) l3 = Convolution(input_sizes=[batch_size, 16, 16, 96], filter_sizes=[5, 5, 96, 128], num_classes=100, init_filters=args.init, strides=[1, 1, 1, 1], padding="SAME", alpha=ALPHA, activation=Tanh(), last_layer=False) l4 = MaxPool(size=[batch_size, 16, 16, 128], ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="SAME") l5 = FeedbackConv(size=[batch_size, 8, 8, 128], num_classes=100, sparse=sparse, rank=rank) l6 = Convolution(input_sizes=[batch_size, 8, 8, 128], filter_sizes=[5, 5, 128, 256], num_classes=100, init_filters=args.init, strides=[1, 1, 1, 1], padding="SAME", alpha=ALPHA, activation=Tanh(), last_layer=False) l7 = MaxPool(size=[batch_size, 8, 8, 256], ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="SAME") l8 = FeedbackConv(size=[batch_size, 4, 4, 256], num_classes=100, sparse=sparse, rank=rank) l9 = ConvToFullyConnected(shape=[4, 4, 256]) l10 = FullyConnected(size=[4*4*256, 2048], num_classes=100, init_weights=args.init, alpha=ALPHA, activation=Tanh(), last_layer=False) l11 = FeedbackFC(size=[4*4*256, 2048], num_classes=100, sparse=sparse, rank=rank)
get_conv1_bias = tf.get_default_graph().get_tensor_by_name(os.path.split(conv1.name)[0] + '/bias:0') get_conv2_weights = tf.get_default_graph().get_tensor_by_name(os.path.split(conv2.name)[0] + '/kernel:0') get_conv2_bias = tf.get_default_graph().get_tensor_by_name(os.path.split(conv2.name)[0] + '/bias:0') get_conv3_weights = tf.get_default_graph().get_tensor_by_name(os.path.split(conv3.name)[0] + '/kernel:0') get_conv3_bias = tf.get_default_graph().get_tensor_by_name(os.path.split(conv3.name)[0] + '/bias:0') get_conv4_weights = tf.get_default_graph().get_tensor_by_name(os.path.split(conv4.name)[0] + '/kernel:0') get_conv4_bias = tf.get_default_graph().get_tensor_by_name(os.path.split(conv4.name)[0] + '/bias:0') get_conv5_weights = tf.get_default_graph().get_tensor_by_name(os.path.split(conv5.name)[0] + '/kernel:0') get_conv5_bias = tf.get_default_graph().get_tensor_by_name(os.path.split(conv5.name)[0] + '/bias:0') else: l0 = Convolution(input_sizes=[batch_size, 227, 227, 3], filter_sizes=[11, 11, 3, 96], num_classes=num_classes, init_filters=args.init, strides=[1, 4, 4, 1], padding="VALID", alpha=ALPHA, activation=Tanh(), bias=0.0, last_layer=False) l1 = MaxPool(size=[batch_size, 55, 55, 96], ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID") l2 = Convolution(input_sizes=[batch_size, 27, 27, 96], filter_sizes=[5, 5, 96, 256], num_classes=num_classes, init_filters=args.init, strides=[1, 1, 1, 1], padding="SAME", alpha=ALPHA, activation=Tanh(), bias=0.0, last_layer=False) l3 = MaxPool(size=[batch_size, 27, 27, 256], ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID") l4 = Convolution(input_sizes=[batch_size, 13, 13, 256], filter_sizes=[3, 3, 256, 384], num_classes=num_classes, init_filters=args.init, strides=[1, 1, 1, 1], padding="SAME", alpha=ALPHA, activation=Tanh(), bias=0.0, last_layer=False) l5 = Convolution(input_sizes=[batch_size, 13, 13, 384], filter_sizes=[3, 3, 384, 384], num_classes=num_classes, init_filters=args.init, strides=[1, 1, 1, 1], padding="SAME", alpha=ALPHA, activation=Tanh(), bias=0.0, last_layer=False) l6 = Convolution(input_sizes=[batch_size, 13, 13, 384], filter_sizes=[3, 3, 384, 256], num_classes=num_classes, init_filters=args.init, strides=[1, 1, 1, 1], padding="SAME", alpha=ALPHA, activation=Tanh(), bias=0.0, last_layer=False) l7 = MaxPool(size=[batch_size, 13, 13, 256], ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID") l8 = ConvToFullyConnected(shape=[6, 6, 256]) l9 = FullyConnected(size=[6*6*256, 4096], num_classes=num_classes, init_weights=args.init, alpha=ALPHA, activation=Tanh(), bias=0.0, last_layer=False)