def example_GCN(name, adj, weights, layer_config): model = Network() model.add(Aggregate('A1', adj)) model.add(Linear('W1', layer_config[0], layer_config[1], 'kaiming')) model.add(Relu('Relu1')) model.add(Aggregate('A2', adj)) model.add(Linear('W2', layer_config[1], layer_config[1], 'kaiming')) model.add(Relu('Relu2')) model.add(Aggregate('A3', adj)) model.add(Linear('W3', layer_config[1], layer_config[2], 'kaiming')) loss = SoftmaxCrossEntropyLoss(name='loss') print("Model "+name) for layer in model.layer_list: print(":\t" + repr(layer)) print(':\t' + repr(loss)) print('Forward Computation: ', model.str_forward('X')) print('Backward Computation:', model.str_backward('Z-Y')) print() model.str_update() print() return model, loss
def test3layergradients(samples=1, dimensions=3072): print("\n\nTesting 3-layer gradients using a batch size of {}".format(samples)) trainingData, trainingLabels, encodedTrainingLabels = loadData("Datasets/cifar-10-batches-mat/data_batch_1.mat") trainingData = trainingData[0:dimensions, 0:samples] trainingLabels = trainingLabels[0:dimensions, 0:samples] encodedTrainingLabels = encodedTrainingLabels[0:dimensions, 0:samples] network = Model() linear = Linear(dimensions, 50, regularization=0.00, initializer="he") network.addLayer(linear) network.addLayer(Relu()) linear2 = Linear(50, 30, regularization=0.00, initializer="he") network.addLayer(linear2) network.addLayer(Relu()) linear3 = Linear(30, 10, regularization=0.00, initializer="he") network.addLayer(linear3) network.addLayer(Softmax()) sgd = SGD(lr=0.001, lr_decay=1.0, momentum=0.0, shuffle=True) network.compile(sgd, "cce") network.predict(trainingData, updateInternal=True) network.backpropagate(encodedTrainingLabels) timestamp = datetime.now().strftime('%Y-%b-%d--%H-%M-%S') numerical_gradW1 = compute_grads_w_BN(1e-4, linear.W, trainingData, encodedTrainingLabels, network) numerical_gradb1 = compute_grads_w_BN(1e-4, linear.b, trainingData, encodedTrainingLabels, network) numerical_gradW2 = compute_grads_w_BN(1e-4, linear2.W, trainingData, encodedTrainingLabels, network) numerical_gradb2 = compute_grads_w_BN(1e-4, linear2.b, trainingData, encodedTrainingLabels, network) numerical_gradW3 = compute_grads_w_BN(1e-4, linear3.W, trainingData, encodedTrainingLabels, network) numerical_gradb3 = compute_grads_w_BN(1e-4, linear3.b, trainingData, encodedTrainingLabels, network) print("W1") relative_errorW = grad_difference(linear.gradW, numerical_gradW1) print("b1") relative_errorb = grad_difference(linear.gradb, numerical_gradb1) print("W2") relative_errorW2 = grad_difference(linear2.gradW, numerical_gradW2) print("b2") relative_errorb2 = grad_difference(linear2.gradb, numerical_gradb2) print("W3") relative_errorW3 = grad_difference(linear3.gradW, numerical_gradW3) print("b3") relative_errorb3 = grad_difference(linear3.gradb, numerical_gradb3) print("\n")
def test_numerical_grad(self): layer = Relu() x = np.random.rand(5) layer.forward(x) grad = layer.backward(np.array([1.])) num_grad = numerical_gradient.calc(layer.forward, x) num_grad = num_grad.diagonal() numerical_gradient.assert_are_similar(grad, num_grad)
def Model_Linear_Relu_2_EuclideanLoss(): name = '2_Relu_EuclideanLoss' model = Network() model.add(Linear('fc1', 784, 441, 0.01)) model.add(Relu('a1')) model.add(Linear('fc2', 441, 196, 0.01)) model.add(Relu('a2')) model.add(Linear('fc3', 196, 10, 0.01)) loss = EuclideanLoss(name='loss') return name, model, loss
def Model_Linear_Relu_2_SoftmaxCrossEntropyLoss(): name = '2_Relu_SoftmaxCrossEntropyLoss' model = Network() model.add(Linear('fc1', 784, 441, 0.01)) model.add(Relu('a1')) model.add(Linear('fc2', 441, 196, 0.01)) model.add(Relu('a2')) model.add(Linear('fc3', 196, 10, 0.01)) loss = SoftmaxCrossEntropyLoss(name='loss') return name, model, loss
def main(): (train_x, train_t), (test_x, test_t) = load_mnist(flatten=True, normalize=True, one_hot_label=True) weight_init_std = 0.01 W1 = Value(weight_init_std * np.random.randn(784, 50)) b1 = Value(np.zeros(50)) W2 = Value(weight_init_std * np.random.randn(50, 10)) b2 = Value(np.zeros(10)) train_data_size = train_x.shape[0] batch_size = 100 epoc_num = 10000 train_num_per_epoc = max(int(train_data_size / batch_size), 1) for epoc in range(epoc_num): print('---------epoc {}------------'.format(epoc)) train_accuracy = 0 # training for i in range(train_num_per_epoc): # ミニバッチの取得 batch_x, batch_t = make_minibatch(train_x, train_t, batch_size) network = SoftMaxWithCrossEntropyError( Add( Mul(Relu(Add(Mul(Value(batch_x), W1), b1), ), W2), b2, ), Value(batch_t)) loss, acc = network.forward() train_accuracy += acc network.backward() network = SoftMaxWithCrossEntropyError( Add( Mul(Relu(Add(Mul(Value(test_x), W1), b1), ), W2), b2, ), Value(test_t)) # testing train_accuracy /= batch_size * train_num_per_epoc _, test_accuracy = network.forward() test_accuracy /= test_x.shape[0] print('epoc {} : train_accuracy = {}, test_accuracy = {}'.format( epoc, train_accuracy, test_accuracy)) network.forward() network.backward()
def basicConv2Layer(): model = Network() model.add(Conv2D('conv1', 1, 4, 3, 1, 1)) model.add(Relu('relu1')) model.add(AvgPool2D('pool1', 2, 0)) # output shape: N x 4 x 14 x 14 model.add(Conv2D('conv2', 4, 4, 3, 1, 1)) model.add(Relu('relu2')) model.add(AvgPool2D('pool2', 2, 0)) # output shape: N x 4 x 7 x 7 model.add(Reshape('flatten', (-1, 196))) model.add(Linear('fc3', 196, 10, 0.1)) loss = SoftmaxCrossEntropyLoss(name='loss') return model, loss
def bn_2_layer_test(epochs=2, reg=0.0, lr=0.01, momentum=0.7): trainingData, trainingLabels, \ validationData, validationLabels, \ testingData, testingLabels = loadAllData("Datasets/cifar-10-batches-mat/", valsplit=0.20) timestamp = datetime.now().strftime('%Y-%b-%d--%H-%M-%S') network = Model(name="2-layer(NO BN)") network.addLayer(Linear(32*32*3, 50, regularization=reg, initializer="he")) network.addLayer(Relu()) network.addLayer(Linear(50,10, regularization=reg, initializer="he")) network.addLayer(Softmax()) sgd = SGD(lr=lr, lr_decay=1.00, momentum=momentum, shuffle=True, lr_min=1e-5) network.compile(sgd, "cce") network.fit(trainingData, trainingLabels, epochs=epochs, batch_size=64, validationData=(validationData, validationLabels)) networkBN = Model(name="2-layer(WITH BN)") networkBN.addLayer(Linear(32*32*3, 50, regularization=reg, initializer="he")) networkBN.addLayer(BatchNormalization(50, trainable=True, alpha=0.90)) networkBN.addLayer(Relu()) networkBN.addLayer(Linear(50,10, regularization=reg, initializer="he")) networkBN.addLayer(Softmax()) sgd2 = SGD(lr=lr, lr_decay=1.00, momentum=momentum, shuffle=True, lr_min=1e-5) networkBN.compile(sgd2, "cce") networkBN.fit(trainingData, trainingLabels, epochs=epochs, batch_size=64, validationData=(validationData, validationLabels)) #plotAccuracy(network, "plots/", timestamp) #plotLoss(network, "plots/", timestamp) #loss, acc = network.evaluate(testingData, testingLabels) #print("Test loss: {} , Test acc: {}".format(loss, acc) ) #plotAccuracy(network, "plots/", timestamp, title="2-layer(NO BN) accuracy over epochs", fileName="nobnacc") #plotLoss(network, "plots/", timestamp, title="2-layer(NO BN) loss over epochs", fileName="nobnloss") #plotAccuracy(networkBN, "plots/", timestamp, title="2-layer(WITH BN) accuracy over epochs", fileName="bnacc") #plotLoss(networkBN, "plots/", timestamp, title="2-layer(WITH BN) loss over epochs", fileName="bnloss") multiPlotLoss((network, networkBN), "plots/", timestamp, title="2-layer network loss over epochs, eta:{}, lambda:{}".format(lr, reg)) multiPlotAccuracy((network, networkBN), "plots/", timestamp, title="2-layer network accuracy over epochs, eta:{}, lambda:{}".format(lr, reg))
def two_layer_relu(): model = Network() model.add(Linear('fc1', 784, 256, 0.001)) model.add(Relu('rl1')) model.add(Linear('fc2', 256, 10, 0.001)) model.add(Relu('rl2')) config = { 'learning_rate': 0.0001, 'weight_decay': 0.005, 'momentum': 0.9, 'batch_size': 200, 'max_epoch': 40, 'disp_freq': 50, 'test_epoch': 5 } return model, config
def build_model(config): model = Network() layer_num = 0 for layer in config['use_layer']: if layer['type'] == "Linear": in_num = layer['in_num'] out_num = layer['out_num'] if "init_std" in layer.keys(): model.add( Linear(layer['type'] + str(layer_num), in_num, out_num, init_std=layer['init_std'])) else: model.add( Linear(layer['type'] + str(layer_num), in_num, out_num)) layer_num += 1 elif layer['type'] == 'Relu': model.add(Relu(layer['type'] + str(layer_num))) layer_num += 1 elif layer['type'] == 'Sigmoid': model.add(Sigmoid(layer['type'] + str(layer_num))) layer_num += 1 else: assert 0 loss_name = config['use_loss'] if loss_name == 'EuclideanLoss': loss = EuclideanLoss(loss_name) elif loss_name == 'SoftmaxCrossEntropyLoss': loss = SoftmaxCrossEntropyLoss(loss_name) else: assert 0 return model, loss
def GCN(name, adj, weights, layer_config): num_layer = len(layer_config) model = Network() for i in range(num_layer - 2): model.add(Aggregate('A{}'.format(i), adj)) model.add(Linear('W{}'.format(i), layer_config[i], layer_config[i + 1], 'kaiming')) model.add(Relu('Relu{}'.format(i))) model.add(Aggregate('A{}'.format(num_layer - 2), adj)) model.add(Linear('W{}'.format(num_layer - 2), layer_config[-2], layer_config[-1], 'kaiming')) loss = SoftmaxCrossEntropyLoss(name='loss') # loss = EuclideanLoss(name='loss') print("Model "+name) for layer in model.layer_list: print(":\t" + repr(layer)) print(':\t' + repr(loss)) print('Forward Computation: ', model.str_forward('X')) print('Backward Computation:', model.str_backward('Z-Y')) print() model.str_update() print() return model, loss
def getNetwork(): ''' to obtain network structure from specified file ''' file_name = "models/structure.json" if len(sys.argv)>1: file_name = sys.argv[1] f = file(file_name, "r") s = f.read() f.close() networks = json.loads(s) for network in networks: config = network['config'] dis_model = network['model'] model = Network() for layer in dis_model: if layer['type'] == 'Linear': model.add(Linear(layer['name'], layer['in_num'], layer['out_num'], layer['std'])) if layer['type'] == 'Relu': model.add(Relu(layer['name'])) if layer['type'] == 'Sigmoid': model.add(Sigmoid(layer['name'])) if layer['type'] == 'Softmax': model.add(Softmax(layer['name'])) loss = EuclideanLoss('loss') if 'loss' in config: if config['loss'] == 'CrossEntropyLoss': loss = CrossEntropyLoss('loss') yield network['name'], model, config, loss
def regularizationSearch(): trainingData, trainingLabels, \ validationData, validationLabels, \ testingData, testingLabels = loadAllData("Datasets/cifar-10-batches-mat/", valsplit=0.10) bestLambda = 0.0 bestValAcc = 0.0 bestLoss = 0.0 for lambdaValue in np.arange(0, 0.2, 0.005): network = Model() network.addLayer(Linear(32*32*3, 50, regularization=lambdaValue, initializer="he")) network.addLayer(BatchNormalization(50, trainable=True)) network.addLayer(Relu()) network.addLayer(Linear(50, 30, regularization=lambdaValue, initializer="he")) network.addLayer(BatchNormalization(30, trainable=True)) network.addLayer(Relu()) network.addLayer(Linear(30,10, regularization=lambdaValue, initializer="he")) network.addLayer(Softmax()) sgd = SGD(lr=0.01, lr_decay=0.95, momentum=0.7, shuffle=True, lr_min=1e-5) network.compile(sgd, "cce") timestamp = datetime.now().strftime('%Y-%b-%d--%H-%M-%S') network.fit(trainingData, trainingLabels, epochs=20, validationData=(validationData, validationLabels), batch_size=64) #plotAccuracy(network, "plots/", timestamp) #plotLoss(network, "plots/", timestamp) print("Lambda:{}".format(lambdaValue)) loss, acc = network.evaluate(validationData, validationLabels) print("Val loss: {} , Val acc: {}".format(loss, acc) ) print("\n\n") if acc > bestValAcc: bestLambda = lambdaValue bestValAcc = acc bestLoss = loss return bestLambda, bestValAcc, bestLoss
def __init__(self, input_dim=(1, 28, 28), conv_param={ 'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1 }, hidden_size=100, output_size=10, weight_init_std=0.01): filter_num = conv_param['filter_num'] filter_size = conv_param['filter_size'] filter_pad = conv_param['pad'] filter_stride = conv_param['stride'] input_size = input_dim[1] conv_output_size = (input_size - filter_size + 2 * filter_pad) / filter_stride + 1 pool_output_size = int(filter_num * (conv_output_size / 2) * (conv_output_size / 2)) # 权重初始化 self.params = {} self.params['W1'] = weight_init_std * \ np.random.randn(filter_num, input_dim[0], filter_size, filter_size) self.params['b1'] = np.zeros(filter_num) self.params['W2'] = weight_init_std * \ np.random.randn(pool_output_size, hidden_size) self.params['b2'] = np.zeros(hidden_size) self.params['W3'] = weight_init_std * \ np.random.randn(hidden_size, output_size) self.params['b3'] = np.zeros(output_size) # 层生成 self.layers = OrderedDict() self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'], conv_param['stride'], conv_param['pad']) self.layers['Relu1'] = Relu() self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2) self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2']) self.layers['Relu2'] = Relu() self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3']) self.last_layer = SoftmaxWithLoss()
def Model_Linear_Relu_1_SoftmaxCrossEntropyLoss(): name = '1_Relu_SoftmaxCrossEntropyLoss' model = Network() model.add(Linear('fc1', 784, 256, 0.01)) model.add(Relu('a1')) model.add(Linear('fc2', 256, 10, 0.01)) loss = SoftmaxCrossEntropyLoss(name='loss') return name, model, loss
def LeNet(): model = Network() model.add(Conv2D('conv1', 1, 6, 5, 2, 1)) model.add(Relu('relu1')) model.add(AvgPool2D('pool1', 2, 0)) # output shape: N x 6 x 14 x 14 model.add(Conv2D('conv2', 6, 16, 5, 0, 1)) model.add(Relu('relu2')) model.add(AvgPool2D('pool2', 2, 0)) # output shape: N x 16 x 5 x 5 model.add(Reshape('flatten', (-1, 400))) model.add(Linear('fc1', 400, 120, 0.1)) model.add(Relu('relu3')) model.add(Linear('fc2', 120, 84, 0.1)) model.add(Relu('relu4')) model.add(Linear('fc3', 84, 10, 0.1)) loss = SoftmaxCrossEntropyLoss(name='loss') return model, loss
def Model_Linear_Relu_1_EuclideanLoss(): name = '1_Relu_EuclideanLoss' model = Network() model.add(Linear('fc1', 784, 256, 0.01)) model.add(Relu('a1')) model.add(Linear('fc2', 256, 10, 0.01)) loss = EuclideanLoss(name='loss') return name, model, loss
def _make_layers(self): self.layers = [] for i in range(self.layer_num): self.layers.append(Affine(self.weights[i], self.bias[i])) if i == self.layer_num - 1: pass else: self.layers.append(Relu()) self.lastLayer = SoftmaxWithLoss()
def test_TwoDifferentModelsShouldHaveDifferentGradients(self): x = np.random.rand(5) real_model = Seq([ Linear(5, 3, initialize='ones'), Tanh(), Linear(3, 5, initialize='ones'), Tanh() ]) y = real_model.forward(x) real_grad = real_model.backward(np.ones(5)) num_model = Seq([ Linear(5, 3, initialize='ones'), Relu(), Linear(3, 5, initialize='ones'), Relu() ]) num_grad = numerical_gradient.calc(num_model.forward, x) num_grad = np.sum(num_grad, axis=1) self.assertFalse(numerical_gradient.are_similar(real_grad, num_grad))
def inner_model(trainable, x): layers_list = [ Reshape([-1, 28, 28, 1]), Conv(32), BatchNormalization(), Relu(), MaxPool(), Conv(64), BatchNormalization(), Relu(), MaxPool(), Reshape([-1, 7 * 7 * 64]), FullyConnected(1024), Relu(), FullyConnected(10) ] variable_saver = VariableSaver() signal = x print('shape', signal.get_shape()) for idx, layer in enumerate(layers_list): signal = layer.contribute(signal, idx, trainable, variable_saver.save_variable) print('shape', signal.get_shape()) return signal, variable_saver.var_list
def __init__(self, hidden_size, weight_init_std=0.01): super().__init__() self.params = dict() self.params['W1'] = weight_init_std * np.random.randn( self.x_train.shape[1], hidden_size) self.params['b1'] = np.zeros(hidden_size) self.params['W2'] = weight_init_std * np.random.randn( hidden_size, self.t_train.shape[1]) self.params['b2'] = np.zeros(self.t_train.shape[1]) # 生成层 self.layers = OrderedDict() self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1']) self.layers['Relu'] = Relu() self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2']) self.last_layer = SoftmaxWithLoss()
def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01): # 初始化权重 self.params = {} self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size) # self.params['W1'] = np.ones((input_size, hidden_size)) self.params['b1'] = np.zeros(hidden_size) self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size) # self.params['W2'] = np.ones((hidden_size, output_size)) self.params['b2'] = np.zeros(output_size) # 生成层 self.layers = OrderedDict() self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1']) self.layers['Relu1'] = Relu() self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2']) self.lastLayer = SoftmaxWithLoss()
def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01): self.params = {} self.params['w1'] = np.random.randn(input_size, hidden_size) / weight_init_std self.params['b1'] = np.zeros(hidden_size) self.params['w2'] = np.random.randn(hidden_size, output_size) / weight_init_std self.params['b2'] = np.zeros(output_size) self.layers = OrderedDict() self.layers['Affine1'] = Affine(self.params['w1'], self.params['b1']) self.layers['Relu1'] = Relu() self.layers['Affine2'] = Affine(self.params['w2'], self.params['b2']) self.lastlayer = SoftmaxWithLoss()
def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01): # Initialize weights. self.params = {} self.params['W1'] = weight_init_std * \ np.random.randn(input_size, hidden_size) self.params['b1'] = np.zeros(hidden_size) self.params['W2'] = weight_init_std * \ np.random.randn(hidden_size, output_size) self.params['b2'] = np.zeros(output_size) # Generate layers. self.layers = OrderedDict() # Ordered dictionary self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1']) self.layers['Relu'] = Relu() self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2']) self.lastLayer = SoftmaxWithLoss()
def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01): self.params = {} self.params["W1"] = weight_init_std * np.random.randn( input_size, hidden_size) self.params["W2"] = weight_init_std * np.random.randn( hidden_size, output_size) self.params["b1"] = np.zeros(hidden_size) self.params["b2"] = np.zeros(output_size) self.layers = OrderedDict() self.layers["Affine1"] = Affine(self.params["W1"], self.params["b1"]) self.layers["Relu1"] = Relu() self.layers["Affine2"] = Affine(self.params["W2"], self.params["b2"]) self.lastLayer = SoftmaxLoss()
def build_model_from_string(def_str): model = Network() def_str.strip() layer_strs = def_str.split(';') for layer_str in layer_strs: tokens = layer_str.split(',') if (len(tokens) <= 1): raise Exception( "Invalid token: {} in layer definition".format(layer_str)) type = tokens[0].strip() name = tokens[1].strip() if (type == "linear"): model.add( Linear(name, int(tokens[2]), int(tokens[3]), float(tokens[4]))) elif (type == "sigmoid"): model.add(Sigmoid(name)) elif (type == "relu"): model.add(Relu(name)) else: raise NotImplementedError("Unsupported layer type {}".format(type)) print("=" * 50 + "\nModel Summary:\n{}\n".format(model) + "=" * 50 + "\n") return model
def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01): self.params = {} self.params['W1'] = weight_init_std * np.random.randn( input_size, hidden_size) self.params['b1'] = np.zeros(hidden_size) self.params['W2'] = weight_init_std * np.random.randn( hidden_size, output_size) self.params['b2'] = np.zeros(output_size) # f = open("./db/param_result/784x50x10-0.99162.json", 'r') # self.params = json.load(f) # f.close() # for key in ('W1', 'b1', 'W2', 'b2'): # self.params[key] = np.array(self.params[key]) # 创建各层的对象 self.layers = OrderedDict() self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1']) self.layers['Relu1'] = Relu() self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2']) self.last_layer = SoftmaxWithLoss()
from network import Network from layers import Relu, Linear, Conv2D, AvgPool2D, Reshape from utils import LOG_INFO from loss import EuclideanLoss, SoftmaxCrossEntropyLoss from solve_net import train_net, test_net from load_data import load_mnist_4d from plot import show from solve_net import show4category train_data, test_data, train_label, test_label = load_mnist_4d('data') # Your model defintion here # You should explore different model architecture model = Network() model.add(Conv2D('conv1', 1, 4, 3, 1, 0.01)) model.add(Relu('relu1')) model.add(AvgPool2D('pool1', 2, 0)) # output shape: N x 4 x 14 x 14 model.add(Conv2D('conv2', 4, 8, 3, 1, 0.01)) model.add(Relu('relu2')) model.add(AvgPool2D('pool2', 2, 0)) # output shape: N x 8 x 7 x 7 model.add(Reshape('flatten', (-1, 392))) model.add(Linear('fc3', 392, 10, 0.01)) loss = SoftmaxCrossEntropyLoss(name='loss') # Training configuration # You should adjust these hyperparameters # NOTE: one iteration means model forward-backwards one batch of samples. # one epoch means model has gone through all the training samples. # 'disp_freq' denotes number of iterations in one epoch to display information. config = {
def main(): trainingData, trainingLabels, \ validationData, validationLabels, \ testingData, testingLabels = loadAllData("Datasets/cifar-10-batches-mat/", valsplit=.10) #Settings 1 #reg = 0.065 #lr = 0.002 #Settings 2 #reg = 0.0021162 #lr = 0.061474 #Settings 3 #reg = 0.0010781 #lr = 0.069686 #Settings 4 #reg = 0.0049132 #lr = 0.07112 #Settings 5 reg = 0.005 lr = 0.007 network = Model() network.addLayer( Linear(32 * 32 * 3, 50, regularization=reg, initializer="he")) network.addLayer(BatchNormalization(50, trainable=True)) network.addLayer(Relu()) network.addLayer(Linear(50, 30, regularization=reg, initializer="he")) network.addLayer(BatchNormalization(30, trainable=True)) network.addLayer(Relu()) network.addLayer(Linear(30, 10, regularization=reg, initializer="he")) network.addLayer(Softmax()) sgd = SGD(lr=lr, lr_decay=0.95, momentum=0.7, shuffle=True, lr_min=1e-5) network.compile(sgd, "cce") timestamp = datetime.now().strftime('%Y-%b-%d--%H-%M-%S') network.fit(trainingData, trainingLabels, epochs=30, batch_size=100, validationData=(validationData, validationLabels)) plotAccuracy( network, "plots/", timestamp, title="3-layer network accuracy over epochs, eta:{}, lambda:{}".format( lr, reg)) plotLoss( network, "plots/", timestamp, title="3-layer network loss over epochs, eta:{}, lambda:{}".format( lr, reg)) loss, acc = network.evaluate(testingData, testingLabels) print("Test loss: {} , Test acc: {}".format(loss, acc))
def __init__(self, input_dim, conv_params=[ { 'filter_num': 32, 'filter_size': 9, 'pad': 0, 'stride': 3 }, { 'filter_num': 64, 'filter_size': 5, 'pad': 2, 'stride': 1 }, { 'filter_num': 128, 'filter_size': 7, 'pad': 0, 'stride': 1 }, ], hidden_size=128, dropout_ratio=[0.2, 0.5], output_size=5): self.params = {} self.layers = {} pre_shape = input_dim for idx, conv_param in enumerate(conv_params): # init parameters self.params['W' + str(idx + 1)] = init_he(pre_shape[0] * conv_param['filter_size']**2) *\ np.random.randn( conv_param['filter_num'], pre_shape[0], conv_param['filter_size'], conv_param['filter_size']) self.params['b' + str(idx + 1)] = np.zeros( conv_param['filter_num']) # set layers self.layers['Conv' + str(idx + 1)] = Convolution( self.params['W' + str(idx + 1)], self.params['b' + str(idx + 1)], conv_param['stride'], conv_param['pad']) self.layers['Relu' + str(idx + 1)] = Relu() # calc output image size of conv layers pre_shape = self.layers['Conv' + str(idx + 1)].output_size(pre_shape) idx = len(conv_params) # init parameters and set layers Affine self.params['W' + str(idx + 1)] = init_he(pre_shape[0] * pre_shape[1]**2) *\ np.random.randn(pre_shape[0] * pre_shape[1]**2, hidden_size) self.params['b' + str(idx + 1)] = np.zeros(hidden_size) self.layers['Affine' + str(idx + 1)] = Affine( self.params['W' + str(idx + 1)], self.params['b' + str(idx + 1)]) self.layers['Relu' + str(idx + 1)] = Relu() idx += 1 # init parameters and set layers output self.params['W' + str(idx + 1)] = init_he(hidden_size) * np.random.randn( hidden_size, output_size) self.params['b' + str(idx + 1)] = np.zeros(output_size) self.layers['Affine' + str(idx + 1)] = Affine( self.params['W' + str(idx + 1)], self.params['b' + str(idx + 1)]) # set loss function layer self.loss_layer = SoftmaxWithLoss()