def test_bigger_batch_batchnorm(self): multi_layer_perceptron = MultiLayerPerceptron( typeSupervised="multiclass", numberInputFeatures=784, regularization="L2", reg_parameter=0.01) multi_layer_perceptron.add_layer(num_neurons=100, activation_function=ReLU(), layer=DenseBatchNormLayer) multi_layer_perceptron.add_layer(num_neurons=100, activation_function=ReLU(), layer=DenseBatchNormLayer) multi_layer_perceptron.add_layer(num_neurons=10, activation_function=Softmax(), isSoftmax=True) train_loss, valid_loss, train_acc, valid_acc = multi_layer_perceptron.fit( self.x_mini_train, self.y_mini_train, xvalid=self.x_mini_valid, yvalid=self.y_mini_valid, num_epochs=500, ret_train_loss=True, optim=GradientDescentMomentum(), learn_rate=0.1) print(train_loss) print("\n") print(valid_loss) print("\n") print(train_acc) print("\n") print(valid_acc)
def test_binaryclassification_unregularized(self): """ Learning rate KEY: If its set to high, you will diverge. If its set to low, your network won"t learn anything. The more layers you have, the higher the learning rate should be. Example seen here: 1 layer only - learn rate 0.1 suffices. With two layers ~ 0.7, with 3 layers ~2.5. """ MLP = MultiLayerPerceptron(typeSupervised="binary", numberInputFeatures=self.x_train.shape[0]) # Just add Dense Layers # 10 features learnt in first hidden layer w/ ReLU MLP.add_layer(10, activation_function=ReLU()) # # 5 features learnt in second hidden layer w/ ReLU MLP.add_layer(5, activation_function=ReLU()) # # Output layer sigmoid activation MLP.add_layer(1, activation_function=Sigmoid()) MLP.fit(self.x_train, self.y_train, self.x_valid, self.y_valid, ret_train_loss=True, num_epochs=100, learn_rate=2.6) preds = MLP.predict_multi_layer_perceptron(self.x_test, 0.5) acc = accuracy(self.y_test, preds) self.assertGreaterEqual(acc, 0.95)
def test_binaryclassifcation_regularized2(self): """ You have to be really careful with the reg parameter. If its to high (past 0.1), your network won't learn anything. With softmax regression and logistic regression, there existed leeway to make the reg parameter pretty high but that absolutely won"t work with neural networks multiple layers deep. """ # normal regularization multi_layer_perceptron3 = MultiLayerPerceptron( typeSupervised="binary", numberInputFeatures=self.x_train.shape[0], regularization="L1", reg_parameter=0.01) multi_layer_perceptron4 = MultiLayerPerceptron( typeSupervised="binary", numberInputFeatures=self.x_train.shape[0], regularization="L2", reg_parameter=0.01) # 10 features learnt in first hidden layer w/ ReLU multi_layer_perceptron3.add_layer(10, activation_function=ReLU()) # # 5 features learnt in second hidden layer w/ ReLU multi_layer_perceptron3.add_layer(5, activation_function=ReLU()) # # Output layer sigmoid activation multi_layer_perceptron3.add_layer(1, activation_function=Sigmoid()) # 10 features learnt in first hidden layer w/ ReLU multi_layer_perceptron4.add_layer(10, activation_function=ReLU()) # # 5 features learnt in second hidden layer w/ ReLU multi_layer_perceptron4.add_layer(5, activation_function=ReLU()) # # Output layer sigmoid activation multi_layer_perceptron4.add_layer(1, activation_function=Sigmoid()) multi_layer_perceptron3.fit(self.x_train, self.y_train, self.x_valid, self.y_valid, ret_train_loss=True, num_epochs=100, learn_rate=2.8) preds3 = multi_layer_perceptron3.predict_multi_layer_perceptron( self.x_test, 0.5) multi_layer_perceptron4.fit(self.x_train, self.y_train, self.x_valid, self.y_valid, ret_train_loss=True, num_epochs=100, learn_rate=2.6) preds4 = multi_layer_perceptron4.predict_multi_layer_perceptron( self.x_test, 0.5) acc3 = accuracy(self.y_test, preds3) acc4 = accuracy(self.y_test, preds4) self.assertGreaterEqual(acc3, 0.95) self.assertGreaterEqual(acc4, 0.95)
def test_binaryclassification_regularized(self): # Sanity check - high regularization leads to very high losses. multi_layer_perceptron1 = MultiLayerPerceptron( typeSupervised="binary", numberInputFeatures=self.x_train.shape[0], regularization="L1", reg_parameter=500) multi_layer_perceptron2 = MultiLayerPerceptron( typeSupervised="binary", numberInputFeatures=self.x_train.shape[0], regularization="L2", reg_parameter=500) # 10 features learnt in first hidden layer w/ ReLU multi_layer_perceptron1.add_layer(10, activation_function=ReLU()) # # 5 features learnt in second hidden layer w/ ReLU multi_layer_perceptron1.add_layer(5, activation_function=ReLU()) # # Output layer sigmoid activation multi_layer_perceptron1.add_layer(1, activation_function=Sigmoid()) # 10 features learnt in first hidden layer w/ ReLU multi_layer_perceptron2.add_layer(10, activation_function=ReLU()) # # 5 features learnt in second hidden layer w/ ReLU multi_layer_perceptron2.add_layer(5, activation_function=ReLU()) # # Output layer sigmoid activation multi_layer_perceptron2.add_layer(1, activation_function=Sigmoid()) multi_layer_perceptron1.fit(self.x_train, self.y_train, self.x_valid, self.y_valid, ret_train_loss=True, num_epochs=10, learn_rate=2.6) preds1 = multi_layer_perceptron1.predict_multi_layer_perceptron( self.x_test, 0.5) multi_layer_perceptron2.fit(self.x_train, self.y_train, self.x_valid, self.y_valid, ret_train_loss=True, num_epochs=10, learn_rate=3) preds2 = multi_layer_perceptron2.predict_multi_layer_perceptron( self.x_test, 0.5) acc1 = accuracy(self.y_test, preds1) acc2 = accuracy(self.y_test, preds2) self.assertLessEqual(acc1, 0.69) self.assertLessEqual(acc2, 0.69)
def test_overfit_small_batch(self): multi_layer_perceptron = MultiLayerPerceptron( typeSupervised="multiclass", numberInputFeatures=784) multi_layer_perceptron.add_layer(num_neurons=100, activation_function=ReLU(), layer=DenseBatchNormLayer) multi_layer_perceptron.add_layer(num_neurons=10, activation_function=Softmax(), isSoftmax=True) train_loss1, train_acc1 = multi_layer_perceptron.fit( self.x_train[:, :100].reshape(784, -1), self.y_train[:, :100], num_epochs=150, ret_train_loss=True, optim=RMSProp(), learn_rate=0.001) predictions1 = multi_layer_perceptron.predict_multi_layer_perceptron( self.x_train[:, :100].reshape(784, -1)) acc = accuracy(self.saved_y[:, :100].reshape(1, -1), predictions1) print(train_loss1) print(acc) self.assertLessEqual(train_loss1[-1], 0.09) self.assertEqual(acc, 1) multi_layer_perceptron2 = MultiLayerPerceptron( typeSupervised="multiclass", numberInputFeatures=784) multi_layer_perceptron2.add_layer(num_neurons=100, activation_function=ReLU(), layer=DenseBatchNormLayer) multi_layer_perceptron2.add_layer(num_neurons=10, activation_function=Softmax(), isSoftmax=True) train_loss2, train_acc2 = multi_layer_perceptron2.fit( self.x_train[:, :100].reshape(784, -1), self.y_train[:, :100], num_epochs=150, ret_train_loss=True, optim=GradientDescentMomentum(), learn_rate=0.1) predictions2 = multi_layer_perceptron2.predict_multi_layer_perceptron( self.x_train[:, :100].reshape(784, -1)) acc2 = accuracy(self.saved_y[:, :100].reshape(1, -1), predictions2) print(train_loss2) print(acc2) self.assertLessEqual(train_loss2[-1], 0.09) self.assertEqual(acc2, 1)
def _build_encoder(self): encoder = [] encoder.append( DenseBatchNormLayer(num_in=784, num_layer=300, activation_function=ReLU())) encoder.append( DenseBatchNormLayer(num_in=300, num_layer=150, activation_function=ReLU())) # fully encoded units encoder.append( DenseBatchNormLayer(num_in=150, num_layer=self.size_encoding, activation_function=ReLU())) return encoder
def test_2(self): """ With a reasonable dropout probability, we can overfit to a small batch of data so it looks like everything is wired correctly """ multi_layer_perceptron = MultiLayerPerceptron( typeSupervised="multiclass", numberInputFeatures=784) # make sure we get high training loss, low acccuracy when we dropout # 99% of activations. Just sanity checking the implementation of the # dropout layer multi_layer_perceptron.add_layer(num_neurons=100, activation_function=ReLU(), layer=DenseDropOutLayer, keep_prob=0.6) multi_layer_perceptron.add_layer(num_neurons=10, activation_function=Softmax(), isSoftmax=True) train_loss1, train_acc1 = multi_layer_perceptron.fit( self.x_train[:, :100].reshape(784, -1), self.y_train[:, :100], num_epochs=500, ret_train_loss=True, optim=RMSProp(), learn_rate=0.001) train_acc1 = np.average(train_acc1) self.assertGreaterEqual(train_acc1, 0.89)
def _build_decoder(self): decoder = [] decoder.append( DenseBatchNormLayer(num_in=self.size_encoding, num_layer=150, activation_function=ReLU())) decoder.append( DenseBatchNormLayer(num_in=150, num_layer=300, activation_function=ReLU())) # decoded outputs - sigmoid activation used because # inputs are in the range of 0 -1, so our outputs should # also be in between the range of 0-1 decoder.append( DenseLayer(num_in=300, num_layer=self.num_input_features, activation_function=Sigmoid())) return decoder
def test_padding(self): # 10 pictures that are 3 x 5 x 3 noise_images = np.random.randn(10, 3, 5, 3) conv_layer = Conv2D(filter_size=3, input_depth=3, num_filters=3, activation_function=ReLU(), padding="same", stride=1) # looks good conv_layer.compute_forward(noise_images)
def test_3(self): """ The architecture goes between overfitting to the training set when keep_prob is low, to underfitting when keep prob is high. So overall we could remedy the overfitting by training on more examples and adding L2 regularization. But the dropout layer itself seems to be implemented fine """ multi_layer_perceptron = MultiLayerPerceptron( typeSupervised="multiclass", numberInputFeatures=784) multi_layer_perceptron.add_layer(num_neurons=25, activation_function=ReLU(), layer=DenseDropOutLayer, keep_prob=0.09) multi_layer_perceptron.add_layer(num_neurons=25, activation_function=ReLU(), layer=DenseDropOutLayer, keep_prob=0.09) multi_layer_perceptron.add_layer(num_neurons=10, activation_function=Softmax(), isSoftmax=True) _, _, train_acc, valid_acc = multi_layer_perceptron.fit( self.x_mini_train, self.y_mini_train, self.x_mini_valid, self.y_mini_valid, num_epochs=800, ret_train_loss=True, optim=RMSProp(), learn_rate=0.001) train_acc = np.average(train_acc) valid_acc = np.average(valid_acc) self.assertLessEqual(train_acc, 0.23) self.assertLessEqual(valid_acc, 0.17)
def test_multi_class2(self): x, y, not_encoded_y = create_spiral_dataset() multi_layer_perceptron = MultiLayerPerceptron( typeSupervised="multiclass", numberInputFeatures=2) multi_layer_perceptron.add_layer(100, activation_function=ReLU()) multi_layer_perceptron.add_layer(3, activation_function=Softmax(), isSoftmax=1) train_loss6, _ = multi_layer_perceptron.fit(xtrain=x, ytrain=y, num_epochs=1000, learn_rate=1, ret_train_loss=True) preds = multi_layer_perceptron.predict_multi_layer_perceptron(x) acc_6 = accuracy(not_encoded_y, preds) # Performance without regularization should be above 90% self.assertLessEqual(train_loss6[-1], 0.245) self.assertGreaterEqual(acc_6, 0.90)
def test_overfit_smallbatch_avgpool(self): """ Testing conv layer followed by avg pool layer followed by classifier """ obj3 = ConvolutionalNeuralNetwork(typeSupervised="multiclass", input_depth=1) params_layer1 = { "filter_size": 3, "input_depth": None, "num_filters": 5, "activation_function": ReLU(), "padding": "same", "stride": 1, "final_conv_layer": False } obj3.addConvNetLayer(Conv2D, **params_layer1) params_layer2 = { "filter_size": 3, "stride": 2, "final_conv_layer": True, "poolType": "avg", "padding": "valid" } obj3.addConvNetLayer(Pool, **params_layer2) params_layer6 = { "num_neurons": 10, "activation_function": Softmax(), "regularization": None, "reg_parameter": None, "isSoftmax": 1 } obj3.addConvNetLayer(DenseLayer, **params_layer6) train_loss, train_acc = obj3.fit(self.x_mini_train[:32], self.y_mini_train[:, :32], num_epochs=500, ret_train_loss=True, verbose=True, learn_rate=0.4, optim=GradientDescentMomentum()) print(train_loss, train_acc) self.assertGreaterEqual(train_acc, 0.90)
def test_multi_class3(self): # Performance with L2 regularization should be much better x, y, not_encoded_y = create_spiral_dataset() multi_layer_perceptron = MultiLayerPerceptron( typeSupervised="multiclass", numberInputFeatures=2, regularization="L2", reg_parameter=1e-3) # Learn 100 features in first hidden layer multi_layer_perceptron.add_layer(100, activation_function=ReLU()) # Output layer learn 3 features for softmax multi_layer_perceptron.add_layer(3, activation_function=Softmax(), isSoftmax=1) train_loss7, _ = multi_layer_perceptron.fit(xtrain=x, ytrain=y, num_epochs=5000, learn_rate=1, ret_train_loss=True) preds = multi_layer_perceptron.predict_multi_layer_perceptron(x) acc_7 = accuracy(not_encoded_y, preds) self.assertLessEqual(train_loss7[-1], 0.40) self.assertGreaterEqual(acc_7, 0.98)
def test_overfit_smallbatch(self): """ We're gonna test if we can overfit a small conv net on a small batch w/ just one layer followed by a softmax classifier. If we can't.. then somethings wrong with the backprop for the conv layer. """ obj2 = ConvolutionalNeuralNetwork(typeSupervised="multiclass", input_depth=1) params_layer1 = { "filter_size": 3, "input_depth": None, "num_filters": 5, "activation_function": ReLU(), "padding": "same", "stride": 1, "final_conv_layer": True } obj2.addConvNetLayer(Conv2D, **params_layer1) params_layer6 = { "num_neurons": 10, "activation_function": Softmax(), "regularization": None, "reg_parameter": None, "isSoftmax": 1 } obj2.addConvNetLayer(DenseLayer, **params_layer6) train_loss, train_acc = obj2.fit(self.x_mini_train[:32], self.y_mini_train[:, :32], num_epochs=350, ret_train_loss=True, verbose=True, learn_rate=0.4, optim=GradientDescentMomentum()) print(train_loss, train_acc) self.assertGreaterEqual(train_acc, 0.90)
def test_1(self): multi_layer_perceptron = MultiLayerPerceptron( typeSupervised="multiclass", numberInputFeatures=784) # make sure we get high training loss, low acccuracy when we # dropout 99% of activations. Just sanity checking the implementation # of the droput layer multi_layer_perceptron.add_layer(num_neurons=100, activation_function=ReLU(), layer=DenseDropOutLayer, keep_prob=0.01) multi_layer_perceptron.add_layer(num_neurons=10, activation_function=Softmax(), isSoftmax=True) train_loss1, train_acc1 = multi_layer_perceptron.fit( self.x_train[:, :100].reshape(784, -1), self.y_train[:, :100], num_epochs=500, ret_train_loss=True, optim=RMSProp(), learn_rate=0.001) train_acc1 = np.average(train_acc1) self.assertLessEqual(train_acc1, 0.40)
def test_full_network(self): """ Train the full net on 60k images - takes a long time to train but gets great performance! """ obj1 = ConvolutionalNeuralNetwork(typeSupervised="multiclass", input_depth=1) params_layer1 = { "filter_size": 3, "input_depth": None, "num_filters": 5, "activation_function": ReLU(), "padding": "same", "stride": 1, "final_conv_layer": False } obj1.addConvNetLayer(Conv2D, **params_layer1) params_layer2 = { "filter_size": 3, "stride": 2, "final_conv_layer": False, "poolType": "avg", "padding": "valid" } obj1.addConvNetLayer(Pool, **params_layer2) params_layer3 = { "filter_size": 3, "input_depth": None, "num_filters": 5, "activation_function": ReLU(), "padding": "same", "stride": 1, "final_conv_layer": False } obj1.addConvNetLayer(Conv2D, **params_layer3) params_layer4 = { "filter_size": 3, "final_conv_layer": True, "stride": 2, "poolType": "avg", "padding": "valid" } obj1.addConvNetLayer(Pool, **params_layer4) params_layer5 = { "num_neurons": 75, "activation_function": ReLU(), "regularization": None, "reg_parameter": None } obj1.addConvNetLayer(DenseLayer, **params_layer5) params_layer6 = { "num_neurons": 10, "activation_function": Softmax(), "regularization": None, "reg_parameter": None, "isSoftmax": 1 } obj1.addConvNetLayer(DenseLayer, **params_layer6) train_loss, train_acc = obj1.fit(self.x_train, self.y_train, xvalid=self.x_test, yvalid=self.y_test, num_epochs=150, ret_train_loss=True, verbose=True, learn_rate=0.01, batch_size=128, optim=AdaGrad()) self.assertGreaterEqual(train_acc, 0.90)