def test_forward_backward(self): l = SoftMaxLayer() y = l.forward(np.array([5, 5, 6])) self.assertEqual(y.shape, (3, )) assert_almost_equal(y, np.array([0.2119416, 0.2119416, 0.5761169]), decimal=5) assert_array_equal(1, np.sum(y)) d = l.backward(np.array([2, 3, 6])) self.assertEqual(d.shape, (3, )) #assert_almost_equal(d,np.array([-1.792177 , -1.5802354, 1.2406412])) return
def test_calc_delta(self): l1 = SoftMaxLayer() n = Sequential([l1]) x = np.array([15.0, 10.0, 2.0]) y = n.forward(x) self.assertEqual(y.shape, (3, )) nll = NegativeLogLikelihoodLoss() t = np.array([0.0, 0.0, 1.0]) self.assertEqual(y.shape, t.shape) J1 = nll.loss(y, t) self.assertEqual(J1.shape, (3, )) assert_almost_equal(J1, [0.0, 0.0, 13.0067176], decimal=5) cel = CrossEntropyLoss() t = np.array([0.0, 0.0, 1.0]) J2 = cel.loss(x, t) self.assertEqual(J2.shape, (3, )) assert_almost_equal(J2, [0.0, 0.0, 13.0067176], decimal=5) delta_in = -nll.dJdy_gradient(y, t) assert_almost_equal(delta_in, [0.0, 0.0, 445395.349996]) delta_out1 = n.backward(delta_in) assert_almost_equal(delta_out1, [-0.9933049, -0.0066928, 0.9999978], decimal=5) # delta_out2 = -cel.dJdy_gradient(x, t) assert_almost_equal(delta_out2, [-0.9933049, -0.0066928, 0.9999978], decimal=5)
def test_calc_loss(self): l1 = SoftMaxLayer() n = Sequential([l1]) x = np.array([15.0, 10.0, 2.0]) y = n.forward(x) self.assertEqual(y.shape, (3, )) nll = NegativeLogLikelihoodLoss() t = np.array([0.0, 0.0, 1.0]) self.assertEqual(y.shape, t.shape) J = nll.loss(y, t) self.assertEqual(J.shape, (3, )) assert_almost_equal(J, [0.0, 0.0, 13.0067176], decimal=5)
class cnn(object): class simple_cnn_model(object): def __init__(self, epochs, batch_size, lr): self.epochs = epochs self.batch_size = batch_size self.lr = lr def load_data(self): # load data from cifar100 folder (x_train, y_train), (x_test, y_test) = cifar100(1211506319) return x_train, y_train, x_test, y_test def train_model(self, layers, loss_metrics, x_train, y_train): # build model self.model = Sequential(layers, loss_metrics) # train the model loss = self.model.fit(x_train, y_train, self.epochs, self.lr, self.batch_size, print_output=True) avg_loss = np.mean(np.reshape(loss, (self.epochs, -1)), axis=1) return avg_loss def test_model(self, x_test, y_test): # make a prediction pred_result = self.model.predict(x_test) accuracy = np.mean(pred_result == y_test) return accuracy if __name__ == '__main__': # define model parameters epochs = 15 batch_size = 128 lr = [.1] # define layers layers = (ConvLayer(3, 16, 3), ReluLayer(), MaxPoolLayer(), ConvLayer(16, 32, 3), ReluLayer(), MaxPoolLayer(), FlattenLayer(), FullLayer(2048, 4), SoftMaxLayer()) loss_matrics = CrossEntropyLayer() # build and train model model = simple_cnn_model(epochs, batch_size, lr) x_train, y_train, x_test, y_test = model.load_data() loss = model.train_model(layers, loss_matrics, x_train, y_train) accuracy = model.test_model(x_test, y_test) print("loss: %s" % loss) print("The accuracy of the model is %s" % accuracy)
def test_numeric_gradient(self): l = SoftMaxLayer() x = np.random.rand(3) in_delta = np.random.rand(3) for i, d in enumerate(in_delta): aux_delta = np.zeros(in_delta.size) aux_delta[i] = in_delta[i] l.forward(x) delta = l.backward(aux_delta) gradient = l.numeric_gradient(x) assert_almost_equal(in_delta[i] * gradient[i, :], delta, decimal=5)
def build_model(self): layers = [] input_shape = np.array( [self.batch_size, self.x_dim, self.x_dim, self.c_dim]) # layer_1: input_layer ==> [n, 28, 28, 1] x = InputLayer(input_shape) layers.append(x) # layer_2: conv_layer [n, 28, 28, 1] ==> [n, 28, 28, 32] x = ConvLayer(x, output_nums=20, kernel=5, strides=1, padding='SAME', name='conv1') layers.append(x) # layer_4: avgpool_layer [n, 28, 28, 32] ==> [n, 14, 14, 32] x = MaxPoolLayer(x, kernel=2, strides=2, paddind='SAME', name='pool1') layers.append(x) # layer_5: conv_layer [n, 14, 14, 32] ==> [n, 14, 14, 64] x = ConvLayer(x, output_nums=50, kernel=5, strides=1, padding='SAME', name='conv2') layers.append(x) # layer_7: avgpool_layer [n, 14, 14, 64] ==> [n, 7, 7, 64] x = MaxPoolLayer(x, kernel=2, strides=2, padding='SAME', name='pool2') layers.append(x) # layer_8: flatten_layer [n, 7, 7, 64] ==> [n, 7*7*64] x = FlattenLayer(x, name='flatten') layers.append(x) # layer_9: fullconnected_layer [n, 3136] ==> [n, 500] x = DenseLayer(x, output_nums=500, name='dense1') layers.append(x) # layer_10: relu_layer [n, 500] ==> [n, 500] x = ReLULayer(x, name='relu1') layers.append(x) # layer_11: fullconnected_layer [n, 500] ==> [n, 10] x = DenseLayer(x, output_nums=10, name='dense2') layers.append(x) # layer_12: softmax_layer [n, 10] ==> [n, 10] x = SoftMaxLayer(x, name='softmax') layers.append(x) self.layers = layers
return train, test train, test = gen_data() model = Sequential([ LinearLayer(2, 20, weights='random'), TanhLayer(), #SigmoidLayer(), # HeavisideLayer(), # LinearLayer(10, 20, weights='random'), # SigmoidLayer(), LinearLayer(20, num_classes, weights='random', L1=0.001), # ReluLayer(), # SigmoidLayer() SoftMaxLayer() ]) # model = Sequential([ # LinearLayer(2, 5, weights='random'), # SigmoidLayer(), # #LinearLayer(3, 3, weights='random'), # #SigmoidLayer(), # LinearLayer(5, 4, weights='random'), # # Parallel([ # # LinearLayer(5, 1, weights='random'), # # LinearLayer(5, 1, weights='random'), # # LinearLayer(5, 1, weights='random'), # # LinearLayer(5, 1, weights='random'), # # ]), # # SigmoidLayer(),
bh = np.zeros((hidden_size, 1)) # hidden bias by = np.zeros((vocab_size, 1)) # output bias van = Vanilla(vocab_size, vocab_size, hidden_size, seq_length, Wxh=SharedWeights(Wxh.copy()), Whh=Whh.copy(), Why=Why.copy(), bh=bh.copy(), by=by.copy()) #negLog = NegativeLogLikelihoodLoss() cross = CrossEntropyLoss() opt = AdaGrad(learning_rate=learning_rate, clip=5) soft = SoftMaxLayer() vantr = Vanilla(vocab_size, vocab_size, hidden_size, seq_length, Wxh=Wxh.copy(), Whh=Whh.copy(), Why=Why.copy(), bh=bh.copy(), by=by.copy()) crosstr = CrossEntropyLoss() opttr = AdaGrad(learning_rate=learning_rate, clip=5) trainer = Trainer()
# # Whh = Whh, # # Why = Why, # # bh = bh, # # by = by # ), # Vanilla( # vocab_size,vocab_size,hidden_size,window_size#, # # Wxh = Wxh, # # Whh = Whh, # # Why = Why, # # bh = bh, # # by = by # ) # ) sm = SoftMaxLayer() # x = to_one_hot_vect(char_to_ix['b'],vocab_size) # print len(x) # print v.forward(x) # print v.backward(x) epochs = 50 # opt = GradientDescent(learning_rate=0.01), # opt = GradientDescentMomentum(learning_rate=0.01,momentum=0.5), opt = AdaGrad(learning_rate=0.1) #,clip=100.0), display = ShowTraining( epochs_num=epochs ) #, weights_list = {'Wx':v.Wxh,'Whh':v.Whh,'Why':v.Why,'by':v.by,'bh':v.bh})
Created on Sun Mar 25 19:52:43 2018 @author: kaushik """ import time import numpy as np import matplotlib.pyplot as plt from layers.dataset import cifar100 from layers import (ConvLayer, FullLayer, FlattenLayer, MaxPoolLayer, ReluLayer, SoftMaxLayer, CrossEntropyLayer, Sequential) (x_train, y_train), (x_test, y_test) = cifar100(1337) model = Sequential(layers=(ConvLayer(3, 16, 3), ReluLayer(), MaxPoolLayer(), ConvLayer(16, 32, 3), ReluLayer(), MaxPoolLayer(), FlattenLayer(), FullLayer(8 * 8 * 32, 4), SoftMaxLayer()), loss=CrossEntropyLayer()) start_time = time.clock() lr_vals = [0.1] losses_train = list() losses_test = list() test_acc = np.zeros(len(lr_vals)) for j in range(len(lr_vals)): train_loss, test_loss = model.fit(x_train, y_train, x_test, y_test, epochs=8, lr=lr_vals[j], batch_size=128) losses_train.append(train_loss)
import numpy as np from layers.dataset import cifar100 # Please make sure that cifar-100-python is present in the same folder as dataset.py (x_train, y_train), (x_test, y_test) = cifar100(1212356299) from layers import (FullLayer, ReluLayer, SoftMaxLayer, CrossEntropyLayer, Sequential_better) model = Sequential_better(layers=(FullLayer(3072, 1500), ReluLayer(), FullLayer(1500, 500), ReluLayer(), FullLayer(500, 4), SoftMaxLayer()), loss=CrossEntropyLayer()) loss2 = model.fit(x_train, y_train, lr=0.1, epochs=15) y_predict = model.predict(x_test) count = 0 for i in range(np.size(y_test)): if y_predict[i] == y_test[i]: count += 1 accuracy = (100.0 * count) / np.shape(y_predict)[0] print "Accuracy of better CIFAR = ", accuracy, "%"