def main(): (x_train, y_train), (x_test, y_test) = mnist.load_data() print 'Imported MNIST data: training input %s and training labels %s.' % ( x_train.shape, y_train.shape) print 'Imported MNIST data: test input %s and test labels %s.' % ( x_test.shape, y_test.shape) N, H, W = x_train.shape x = x_train.reshape((N, H * W)).astype('float') / 255 y = to_categorical(y_train, num_classes=10) model = Sequential() model.add(Dense(), ReLU(), layer_dim=(28 * 28, 300), weight_scale=1e-2) model.add(Dense(), ReLU(), layer_dim=(300, 100), weight_scale=1e-2) model.add(Dense(), Softmax(), layer_dim=(100, 10), weight_scale=1e-2) model.compile(optimizer=GradientDescent(learning_rate=1e-2), loss_func=categorical_cross_entropy) model.fit(x, y, epochs=10, batch_size=50, verbose=False) N, H, W = x_test.shape x = x_test.reshape((N, H * W)).astype('float') / 255 y = to_categorical(y_test, num_classes=10) model.evaluate(x, y)
def CNN(): ''' CNN network on the MNIST dataset ''' np.random.seed(42) n_classes = 10 inputs, labels = load_mnist_images() # Define network without batch norm net = Network(learning_rate = 1e-3) net.add_layer(Convolution2D(1,2,28,28,pad=0,stride=1,filter_size=3,dilation=2)) net.add_layer(ReLU()) net.add_layer(BatchNorm(800)) net.add_layer(Linear(800, 128)) net.add_layer(ReLU()) net.add_layer(BatchNorm(128)) net.add_layer(Linear(128, n_classes)) net.set_loss(SoftmaxCrossEntropyLoss()) train_network(net, inputs, labels, 250) test_loss, test_acc = validate_network(net, inputs['test'], labels['test'], batch_size=128) print('Baseline CNN Network with batch normalization:') print('Test loss:', test_loss) print('Test accuracy:', test_acc) return net
def main(): ''' The first layer of this network is dilated conv layer The second layer of this work is fully connected layer ''' np.random.seed(42) n_classes = 10 dim = 784 batch_norm = True inputs, labels = load_normalized_mnist_data() net = Network(learning_rate=1e-3) net.add_layer(DilatedConv(8, 3, 2, 1, 2)) net.add_layer(ReLU()) net.add_layer(Linear(8 * 784, 128)) net.add_layer(ReLU()) net.add_layer(Linear(128, n_classes)) net.set_loss(SoftmaxCrossEntropyLoss()) train_network(net, inputs, labels, 50) test_loss, test_acc = validate_network(net, inputs['test'], labels['test'], batch_size=128) print('Baseline MLP Network with Conv:') print('Test loss:', test_loss) print('Test accuracy:', test_acc)
def __init__(self, device, dataset, input_channel, input_size, width, linear_size): super(cnn_2layer, self).__init__() mean, sigma = get_mean_sigma(device, dataset, IBP=True) self.normalizer = Normalization(mean, sigma) self.layers = [ Normalization(mean, sigma), Conv2d(input_channel, 4 * width, 4, stride=2, padding=1, dim=input_size), ReLU((4 * width, input_size // 2, input_size // 2)), Conv2d(4 * width, 8 * width, 4, stride=2, padding=1, dim=input_size // 2), ReLU((8 * width, input_size // 4, input_size // 4)), Flatten(), Linear(8 * width * (input_size // 4) * (input_size // 4), linear_size), ReLU(linear_size), Linear(linear_size, 10), ]
def main(): ''' Trains two networks on the MNIST dataset. Both have two hidden ReLU layers with 256 and 128 units The fist one has a mean batch normalization layer before every layer ''' np.random.seed(42) n_classes = 10 dim = 784 inputs, labels = load_normalized_mnist_data() # Define network without batch norm net = Network(learning_rate=1e-3) net.add_layer(Linear(dim, 256)) net.add_layer(ReLU()) net.add_layer(Linear(256, 128)) net.add_layer(ReLU()) net.add_layer(Linear(128, n_classes)) net.set_loss(SoftmaxCrossEntropyLoss()) train_network(net, inputs, labels, 50) test_loss, test_acc = validate_network(net, inputs['test'], labels['test'], batch_size=128) print('Baseline MLP Network without batch normalization:') print('Test loss:', test_loss) print('Test accuracy:', test_acc)
def initialize(input_size, hidden_size, output_size, init_weight=0.01, init_params=None): hidden_count = len(hidden_size) if init_params is None: params['w1'] = init_weight * np.random.randn(input_size, hidden_size[0]) params['b1'] = np.zeros(hidden_size[0]) for idx in range(1, hidden_count): params[f'w{idx+1}'] = init_weight * np.random.randn( hidden_size[idx - 1], hidden_size[idx]) params[f'b{idx+1}'] = np.zeros(hidden_size[idx]) params[f'w{hidden_count+1}'] = init_weight * np.random.randn( hidden_size[hidden_count - 1], output_size) params[f'b{hidden_count+1}'] = np.zeros(output_size) else: globals()['params'] = init_params layers.append(Affine(params['w1'], params['b1'])) layers.append(ReLU()) for idx in range(1, hidden_count): layers.append(Affine(params[f'w{idx+1}'], params[f'b{idx+1}'])) layers.append(ReLU()) layers.append( Affine(params[f'w{hidden_count+1}'], params[f'b{hidden_count+1}'])) layers.append(SoftmaxWithLoss())
def test_relu_layer_NUMERICAL_GRADIENT_CHECK(self): x = np.linspace(-1, 1, 10 * 32).reshape([10, 32]) layer = ReLU() grads = layer.backward(x, np.ones([10, 32]) / (32 * 10)) numeric_grads = eval_numerical_gradient( lambda x: layer.forward(x).mean(), x=x) self.assertTrue( np.allclose(grads, numeric_grads, rtol=1e-5, atol=0), msg= "gradient returned by your layer does not match the numerically computed gradient" )
def gradient_check(): # prepare a subset of the train data subset = 50 grad_train_img = train['images'][:subset, :].T grad_train_truth = train['one_hot'][:subset, :].T # init the network N_hidden = 50 lin = [ Linear(cifar.in_size, N_hidden, lam=0.1), Linear(N_hidden, cifar.out_size, lam=0.1) ] g_net = Net( [lin[0], ReLU(N_hidden), lin[1], Softmax(cifar.out_size)], lam=0.1, l_rate=0.001, decay=0.99, mom=0.99) # do the pass grad_out = g_net.forward(grad_train_img) g_net.backward(grad_train_truth) cost = g_net.cost(grad_train_truth, out=grad_out) # calc the numeric grad for each linear layer for linear in lin: num_gradient(grad_train_img, grad_train_truth, g_net, linear, cost)
def tryParameters(test_name, N_hidden, lam, l_rate, decay, mom, epochs=50, batch_size=250): net = Net([ BatchNorm(cifar.in_size, trainMean()), Linear(cifar.in_size, N_hidden, lam=lam), ReLU(N_hidden), Linear(N_hidden, cifar.out_size, lam=lam), Softmax(cifar.out_size) ], lam, l_rate, decay, mom) results = net.trainMiniBatch(train, val, epochs, batch_size, shuffle=True) print('{} Test Accuracy: {:.2f}'.format( test_name, net.accuracy(test['one_hot'].T, test['images'].T))) print('Final train a/c, val a/c: {:.2f}/{:.2f}, {:.2f}/{:.2f}'.format( results['last_a_train'], results['last_c_train'], results['last_a_val'], results['last_c_val'])) plotResults(test_name, results['a_train'], results['c_train'], results['a_val'], results['c_val']) #weights_plot(net, "plots/weights_vizualisation_{}.png".format(test_name), labels) return results
def gradient_check(lam, lin_neurons, with_BN): # prepare a subset of the train data subset = 50 grad_train_img = train['images'][:subset, :].T grad_train_truth = train['one_hot'][:subset, :].T count = 0 layers = [] for N in lin_neurons: not_last_layer = count < (len(lin_neurons) - 1) layers.append( Linear(cifar.in_size if count == 0 else lin_neurons[count - 1], N if not_last_layer else cifar.out_size, lam=lam)) if not_last_layer: if with_BN: layers.append(BatchNorm(N)) layers.append(ReLU(N)) count += 1 if len(lin_neurons) == 1 and with_BN: layers.append(BatchNorm(cifar.out_size)) layers.append(Softmax(cifar.out_size)) # init the network print(["{}:{},{}".format(l.name, l.in_size, l.out_size) for l in layers]) g_net = Net(layers, lam=lam, l_rate=0.001, decay=0.99, mom=0.99) # do the pass grad_out = g_net.forward(grad_train_img, train=True) g_net.backward(grad_train_truth) cost = g_net.cost(grad_train_truth, out=grad_out) # calc the numeric grad for each linear layer for linear in [l for l in layers if l.isActivation == False]: num_gradient(grad_train_img, grad_train_truth, g_net, linear, cost)
def __init__(self, device, dataset, n_class=10, input_size=32, input_channel=3, width1=1, width2=1, width3=1, linear_size=100): super(ConvMedBig, self).__init__() mean, sigma = get_mean_sigma(device, dataset) self.normalizer = Normalization(mean, sigma) layers = [ Normalization(mean, sigma), Conv2d(input_channel, 16 * width1, 3, stride=1, padding=1, dim=input_size), ReLU((16 * width1, input_size, input_size)), Conv2d(16 * width1, 16 * width2, 4, stride=2, padding=1, dim=input_size // 2), ReLU((16 * width2, input_size // 2, input_size // 2)), Conv2d(16 * width2, 32 * width3, 4, stride=2, padding=1, dim=input_size // 2), ReLU((32 * width3, input_size // 4, input_size // 4)), Flatten(), Linear(32 * width3 * (input_size // 4) * (input_size // 4), linear_size), ReLU(linear_size), Linear(linear_size, n_class), ] self.blocks = Sequential(*layers)
def generate_network_batch_norm(): ''' To generate a network with a batchnorm layer for gradient checking Returns: net:(data type:Network) Network defined for testing purposes, inthis case specifically for batch norm layer ''' n_classes = 10 dim = 784 net = Network(learning_rate=1e-3) net.add_layer(Linear(dim, 256)) net.add_layer(ReLU()) net.add_layer(BatchNorm(256)) net.add_layer(Linear(256, 128)) net.add_layer(ReLU()) net.add_layer(Linear(128, n_classes)) net.set_loss(SoftmaxCrossEntropyLoss()) return net
def compile_make_fully_convolutional(nnet): # for naming convenience nnet.dense3_layer = nnet.svm_layer pad = 'valid' nnet.dense1_conv_layer = ConvLayer(nnet.maxpool5_layer, num_filters=4096, filter_size=(7, 7), pad=pad, flip_filters=False) relu_ = ReLU(nnet.dense1_conv_layer) nnet.dense2_conv_layer = ConvLayer(relu_, num_filters=4096, filter_size=(1, 1), pad=pad, flip_filters=False) relu_ = ReLU(nnet.dense2_conv_layer) nnet.dense3_conv_layer = ConvLayer(relu_, num_filters=1000, filter_size=(1, 1), pad=pad, flip_filters=False) W_dense1_reshaped = \ nnet.dense1_layer.W.T.reshape(nnet.dense1_conv_layer.W.shape) W_dense2_reshaped = \ nnet.dense2_layer.W.T.reshape(nnet.dense2_conv_layer.W.shape) W_dense3_reshaped = \ nnet.dense3_layer.W.T.reshape(nnet.dense3_conv_layer.W.shape) updates = ((nnet.dense1_conv_layer.W, W_dense1_reshaped), (nnet.dense2_conv_layer.W, W_dense2_reshaped), (nnet.dense3_conv_layer.W, W_dense3_reshaped), (nnet.dense1_conv_layer.b, nnet.dense1_layer.b), (nnet.dense2_conv_layer.b, nnet.dense2_layer.b), (nnet.dense3_conv_layer.b, nnet.dense3_layer.b)) return theano.function([], updates=updates)
def initialize(input_size, hidden_size, output_size, init_weight=0.01): params['w1'] = init_weight * np.random.randn(input_size, hidden_size) params['b1'] = np.zeros(hidden_size) params['w2'] = init_weight * np.random.randn(hidden_size, output_size) params['b2'] = np.zeros(output_size) layers.append(Affine(params['w1'], params['b1'])) layers.append(ReLU()) layers.append(Affine(params['w2'], params['b2'])) layers.append(SoftmaxWithLoss())
def testNetwork(): # noqa D103 net = Network([Linear(10, 64), ReLU(), Linear(64, 2), Sigmoid()]) x = np.random.randn(32, 10) y = np.random.randn(32, 2) mse = MSE() optim = SGD(0.001, 0.001) pred = net(x) _ = mse(pred, y) _ = net.backward(mse.grad) optim.step(net)
def __call__(self, q_input, a_input, *args, **kwargs): # convolve input feature maps with filters q_conv_out = conv2d(input=q_input, filters=self.W, filter_shape=self.filter_shape) a_conv_out = conv2d(input=a_input, filters=self.W, filter_shape=self.filter_shape) # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height if self.non_linear == "tanh": q_conv_out_tanh = Tanh(q_conv_out + self.b.dimshuffle('x', 0, 'x', 'x')) a_conv_out_tanh = Tanh(a_conv_out + self.b.dimshuffle('x', 0, 'x', 'x')) q_output = pool.pool_2d(input=q_conv_out_tanh, ws=self.pool_size, ignore_border=True) # max a_output = pool.pool_2d(input=a_conv_out_tanh, ws=self.pool_size, ignore_border=True) elif self.non_linear == "relu": q_conv_out_relu = ReLU(q_conv_out + self.b.dimshuffle('x', 0, 'x', 'x')) a_conv_out_relu = ReLU(a_conv_out + self.b.dimshuffle('x', 0, 'x', 'x')) q_output = pool.pool_2d(input=q_conv_out_relu, ws=self.pool_size, ignore_border=True) a_output = pool.pool_2d(input=a_conv_out_relu, ws=self.pool_size, ignore_border=True) else: q_output = pool.pool_2d(input=q_conv_out, ws=self.pool_size, ignore_border=True) a_output = pool.pool_2d(input=a_conv_out, ws=self.pool_size, ignore_border=True) return q_output, a_output
def generate_CNN(): ''' To generate a network with a CNN layer for gradient checking Returns: net:(data type:Network) Network defined for testing purposes, inthis case specifically for batch norm layer ''' n_classes = 10 net = Network(learning_rate=1e-3) net.add_layer( Convolution2D(1, 1, 28, 28, pad=0, stride=1, filter_size=3, dilation=1)) net.add_layer(ReLU()) net.add_layer(BatchNorm(676)) net.add_layer(Linear(676, 128)) net.add_layer(ReLU()) net.add_layer(BatchNorm(128)) net.add_layer(Linear(128, n_classes)) net.set_loss(SoftmaxCrossEntropyLoss()) return net
def make_cnn(X_dim, num_class): conv = Conv(X_dim, n_filter=16, h_filter=5, w_filter=5, stride=1, padding=2) relu = ReLU() maxpool = Maxpool(conv.out_dim, size=2, stride=2) conv2 = Conv(maxpool.out_dim, n_filter=20, h_filter=5, w_filter=5, stride=1, padding=2) relu2 = ReLU() maxpool2 = Maxpool(conv2.out_dim, size=2, stride=2) flat = Flatten() fc = FullyConnected(np.prod(maxpool2.out_dim), num_class) return [conv, relu, maxpool, conv2, relu2, maxpool2, flat, fc]
def initialize(input_size, hidden_size, output_size, init_weight=0.01, init_params=None): if init_params is None: params['w1'] = init_weight * np.random.randn(input_size, hidden_size) params['b1'] = np.zeros(hidden_size) params['w2'] = init_weight * np.random.randn(hidden_size, output_size) params['b2'] = np.zeros(output_size) else: globals()['params'] = init_params layers.append(Affine(params['w1'], params['b1'])) layers.append(ReLU()) layers.append(Affine(params['w2'], params['b2'])) layers.append(SoftmaxWithLoss())
def addReLU(self, **kwargs): """ Add ReLU activation layer. """ input_layer = self.input_layer if not self.all_layers \ else self.all_layers[-1] self.n_relu_layers += 1 name = "relu%i" % self.n_relu_layers new_layer = ReLU(input_layer, name=name, **kwargs) self.all_layers += (new_layer, )
def __init__(self, device, dataset, sizes, n_class=10, input_size=32, input_channel=3): super(FFNN, self).__init__() mean, sigma = get_mean_sigma(device, dataset) self.normalizer = Normalization(mean, sigma) layers = [ Flatten(), Linear(input_size * input_size * input_channel, sizes[0]), ReLU(sizes[0]) ] for i in range(1, len(sizes)): layers += [ Linear(sizes[i - 1], sizes[i]), ReLU(sizes[i]), ] layers += [Linear(sizes[-1], n_class)] self.blocks = Sequential(*layers)
def build_network(hidden_layer_sizes: List[int], batch_normalized: bool, regularization: float) -> Network: net = Network() layer_sizes = [CIFAR10.input_size ] + hidden_layer_sizes + [CIFAR10.output_size] for i, (size_in, size_out) in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])): net.add_layer( Linear(size_in, size_out, regularization, Xavier(), name='Li' + str(i + 1))) if i < len(layer_sizes) - 2: if batch_normalized: net.add_layer( BatchNormalization(size_out, name='Bn' + str(i + 1))) net.add_layer(ReLU(size_out, name='Re' + str(i + 1))) else: net.add_layer(Softmax(size_out, name='S')) return net
def tryParameters(test_name, lin_neurons, with_BN, lam, l_rate, decay, mom, epochs=50, batch_size=250): count = 0 layers = [] for N in lin_neurons: not_last_layer = count < (len(lin_neurons) - 1) layers.append( Linear(cifar.in_size if count == 0 else lin_neurons[count - 1], N if not_last_layer else cifar.out_size, lam=lam)) if not_last_layer: if with_BN: layers.append(BatchNorm(N)) layers.append(ReLU(N)) count += 1 if len(lin_neurons) == 1 and with_BN: layers.append(BatchNorm(cifar.out_size)) layers.append(Softmax(cifar.out_size)) # init the network print(["{}:{},{}".format(l.name, l.in_size, l.out_size) for l in layers]) net = Net(layers, lam=lam, l_rate=l_rate, decay=0.99, mom=0.99) results = net.trainMiniBatch(train, val, epochs, batch_size, shuffle=True) print('{} Test Accuracy: {:.2f}'.format( test_name, net.accuracy(test['one_hot'].T, test['images'].T))) print('Final train a/c, val a/c: {:.2f}/{:.2f}, {:.2f}/{:.2f}'.format( results['last_a_train'], results['last_c_train'], results['last_a_val'], results['last_c_val'])) plotResults(test_name, results['a_train'], results['c_train'], results['a_val'], results['c_val']) #weights_plot(net, "plots/weights_vizualisation_{}.png".format(test_name), labels) return results
def __init__(self, device, dataset, input_channel, input_size, linear_size): super(cnn_IBP_large, self).__init__() mean, sigma = get_mean_sigma(device, dataset, IBP=True) self.normalizer = Normalization(mean, sigma) self.layers = [ Normalization(mean, sigma), Conv2d(input_channel, 64, 3, stride=1, padding=1, dim=input_size), ReLU((64, input_size, input_size)), Conv2d(64, 64, 3, stride=1, padding=1, dim=input_size), ReLU((64, input_size, input_size)), Conv2d(64, 128, 3, stride=2, padding=1, dim=input_size // 2), ReLU((128, input_size // 2, input_size // 2)), Conv2d(128, 128, 3, stride=1, padding=1, dim=input_size // 2), ReLU((128, input_size // 2, input_size // 2)), Conv2d(128, 128, 3, stride=1, padding=1, dim=input_size // 2), ReLU((128, input_size // 2, input_size // 2)), Flatten(), Linear(128 * (input_size // 2) * (input_size // 2), linear_size), ReLU(linear_size), Linear(linear_size, 10), ]
#Dense size_trainset = train_data.shape[0] size_testset = test_data.shape[0] train_data = train_data.reshape(size_trainset, -1) test_data = test_data.reshape(size_testset, -1) print(train_labels.shape) print(train_labels.dtype) print(train_labels.dtype) inputs = train_data dense1 = Dense(inputs.shape[1], 64) activation1 = ReLU() dense2 = Dense(64, 128) activation2 = ReLU() dense3 = Dense(128, 64) activation3 = ReLU() dense4 = Dense(64, len(labels_uniques)) activation_and_loss = Softmax_CELoss() optimizer = SGD(learning_rate=0.1) def iterate(inputs, labels): dense1.forward(inputs) activation1.forward(dense1.outputs) dense2.forward(activation1.outputs) activation2.forward(dense2.outputs) dense3.forward(activation2.outputs)
X = X[:100] # Using all test examples would take too long y_pred_proba = nn.predict_proba(X) result_name = f'test_predictions_{docker_image_tag}_{uuid.uuid4().hex}' exp.save(y_pred_proba, result_name) if len(sys.argv) < 3: print( 'Specify the type of neural network: cnn, mlp and whether to train or evaluate' ) neural_network_type = sys.argv[1] if neural_network_type == 'mlp': hyperparameters = { 'architecture': (Dense(1024), ReLU(), Dense(256), ReLU(), Dense(10)), # 1024 because 32x32 for cifar10 'epsilon': 1e-6, 'lr': 5e-2, 'batch_size': 64, 'n_epochs': 100 } sample_shape = (784, ) elif neural_network_type == 'cnn': architecture = (Convolution(3, 32), ReLU(), Convolution(3, 32), ReLU(), Convolution(3, 32), ReLU(), Flatten(), Dense(10)) hyperparameters = { 'architecture': architecture, 'epsilon': 1e-6, 'lr': 5e-2,
def testReLU(): # noqa D103 relu = ReLU() x = np.random.randn(32, 10) # batch size by in_features output = relu(x) assert output.shape == (32, 10)
# 1. Load training/tet data # 배치테스트 N=2 _x, _t = np.array([[2.6, 3.9, 5.6], [1.76, 2.19, 0.6]]), np.array([[0, 0, 1], [0, 1, 0]]) # 2. hyperparameter #==Numerical Gradient============================= # 3. initialize network network.initialize(3, 2, 3) _layers = [ Affine(network.params['w1'], network.params['b1']), ReLU(), Affine(network.params['w2'], network.params['b2']), SoftmaxWithLoss() ] grad = network.numerical_gradient_net(_x, _t) print(grad) #== Backpropagation Gradient============================= # 3. initialize network np.random.seed(3) network.initialize(3, 2, 3) _layers = [ Affine(network.params['w1'], network.params['b1']), ReLU(),
import numpy as np from dnn import NeuralNet from layers import Dense, ReLU, SoftMax net = NeuralNet() net.add_layer(Dense(units=32)) net.add_layer(Dense(units=32)) net.add_layer(Dense(units=2)) net.add_layer(ReLU()) net.add_layer(SoftMax()) input_ = np.array([[1, 1], [2, 3]]) output_ = np.array([0, 1]) net.compile(loss='mse') net.train(input_, output_, 100)
img_cols = 28 input_shape = (1, img_rows, img_cols) (train_x, train_y), (test_x, test_y) = mnist.load_data() train_x = np.reshape(train_x, (len(train_x), 1, img_rows, img_cols)).astype(skml_config.config.i_type) train_y = convert_to_one_hot(train_y, num_classes) test_x = np.reshape(test_x, (len(test_x), 1, img_rows, img_cols)).astype(skml_config.config.i_type) test_y = convert_to_one_hot(test_y, num_classes) train_x, valid_x, train_y, valid_y = train_test_split(train_x, train_y) filters = 64 model = Sequential() model.add(Convolution(filters, 3, input_shape=input_shape)) model.add(BatchNormalization()) model.add(ReLU()) model.add(MaxPooling(2)) model.add(Convolution(filters, 3)) model.add(BatchNormalization()) model.add(ReLU()) model.add(GlobalAveragePooling()) model.add(Affine(num_classes)) model.compile(SoftmaxCrossEntropy(), Adam()) train_batch_size = 100 valid_batch_size = 1 print("訓練開始: {}".format(datetime.now().strftime("%Y/%m/%d %H:%M"))) model.fit(train_x, train_y, train_batch_size, 20, validation_data=(valid_batch_size, valid_x, valid_y), validation_steps=1) print("訓練終了: {}".format(datetime.now().strftime("%Y/%m/%d %H:%M"))) model.save(save_path)