def test_mnist(): (train_x, train_y), (test_x, test_y) = mnist.load_data() val_x = train_x[50000:] val_y = train_y[50000:] train_x = train_x[:50000] train_y = train_y[:50000] batch_size = 200 modle = models.Sequential() modle.add(layers.Linear(28, input_shape=(None, train_x.shape[1]))) modle.add(layers.ReLU()) modle.add(layers.Linear(10)) modle.add(layers.ReLU()) modle.add(layers.Linear(10)) modle.add(layers.Softmax()) acc = losses.categorical_accuracy.__name__ modle.compile(losses.CrossEntropy(), optimizers.SGD(lr=0.001), metrics=[losses.categorical_accuracy]) modle.summary() history = modle.train(train_x, train_y, batch_size, epochs=32, validation_data=(val_x, val_y)) epochs = range(1, len(history["loss"]) + 1) plt.plot(epochs, history["loss"], 'ro', label="Traning loss") plt.plot(epochs, history["val_loss"], 'go', label="Validating loss") plt.plot(epochs, history[acc], 'r', label="Traning accuracy") plt.plot(epochs, history["val_" + acc], 'g', label="Validating accuracy") plt.title('Training/Validating loss/accuracy') plt.xlabel('Epochs') plt.ylabel('Loss/Accuracy') plt.legend() plt.show(block=True)
def __init__(self, input_dim=(1,28,28), conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1}, hidden_size=100, output_size=10, weight_init_std=0.01): filter_num = conv_param['filter_num'] filter_size = conv_param['filter_size'] filter_pad = conv_param['pad'] filter_stride = conv_param['stride'] input_size = input_dim[1] conv_output_size = (input_size - filter_size + 2 * filter_pad) / filter_stride + 1 pool_output_size = int(filter_num * (conv_output_size/2) * (conv_output_size/2)) self.params={} self.params['W1'] = weight_init_std * np.random.randn(filter_num, input_dim[0], filter_size, filter_size) self.params['b1'] = np.zeros(filter_num) self.params['W2'] = weight_init_std * np.random.randn(pool_output_size, hidden_size) self.params['b2'] = np.zeros(hidden_size) self.params['W3'] = weight_init_std * np.random.randn(hidden_size, output_size) self.params['b3'] = np.zeros(output_size) self.layers = OrderedDict() self.layers['Conv1'] = conv.Convolution(self.params['W1'], self.params['b1'], filter_stride, filter_pad) self.layers['ReLU1'] = Layers.ReLU() self.layers['Pool1'] = conv.Pool(2,2,2) self.layers['Affine1'] =Layers.Affine(self.params['W2'], self.params['b2']) self.layers['ReLU2'] = Layers.ReLU() self.layers['Affine2'] = Layers.Affine(self.params['W3'], self.params['b3']) self.last_layer = Layers.SoftmaxWithLoss()
def __init__(self, input_size, mid_size, out_size, sig=True): mag = None if sig: mag = 1 else: mag = 2 self.weights = { 'W1': np.random.normal(0, mag / np.sqrt(input_size), (input_size, mid_size)), 'b1': np.random.normal(0, mag / np.sqrt(input_size), (mid_size, )), 'W2': np.random.normal(0, mag / np.sqrt(mid_size), (mid_size, out_size)), 'b2': np.random.normal(0, mag / np.sqrt(mid_size), (out_size, )) } self.layers = OrderedDict() self.layers['Affine1'] = layers.Affine(self.weights['W1'], self.weights['b1']) if sig: self.layers['Sig'] = layers.Sigmoid() else: self.layers['ReLU'] = layers.ReLU() self.layers['Dropout'] = layers.Dropout() self.layers['Affine2'] = layers.Affine(self.weights['W2'], self.weights['b2']) self.last_layer = layers.SmLo()
def __init__(self, n_in, nh, n_out): super().__init__() self.layers = nn.Sequential( layers.Linear(n_in, nh), layers.ReLU(), layers.Linear(nh, n_out)) self.loss = functions.MSE
def test_relu(): relu = ly.ReLU() bottom = np.random.randn(4,5) top = np.zeros_like(bottom) relu.setup(bottom, top) relu.forward(bottom, top) topgrad, botgrad = np.zeros_like(top), np.zeros_like(bottom) relu.backward(bottom, top, botgrad, topgrad)
def __init__(self): # Layers self.conv1 = l.Convolution("conv1",3,6,5,1) self.conv2 = l.Convolution("conv2",6,16,5,1) self.relu = l.ReLU("relu") self.pool = l.Maxpooling("pooling",2,2) self.dense1 = l.Dense("dense1",16*5*5,120) self.dense2 = l.Dense("dense1",120,84) self.dense3 = l.Dense("dense1",84,10)
def predict(self, X): """ Inputs: - X: A numpy array of shape (N, D) giving N D-dimensional data points to classify. Returns: - y_pred: A numpy array of shape (N,) giving predicted labels for each of the elements of X. For all i, y_pred[i] = c means that X[i] is predicted to have class c, where 0 <= c < C. """ y_pred = None h1 = layers.ReLU(np.dot(X, self.params['W1']) + self.params['b1']) scores = np.dot(h1, self.params['W2']) + self.params['b2'] y_pred = np.argmax(scores, axis=1) return y_pred
def test_overfitting(cifar, momentum): training = cifar.get_named_batches('data_batch_1').subset(100) net = Network() net.add_layer( layers.Linear(cifar.input_size, 50, 0, initializers.Xavier())) net.add_layer(layers.ReLU(50)) net.add_layer( layers.Linear(50, cifar.output_size, 0, initializers.Xavier())) net.add_layer(layers.Softmax(cifar.output_size)) opt = MomentumSGD(net, initial_learning_rate=0.005, momentum=momentum) opt.train(training, training, 400) costs_accuracies_plot(opt.epoch_nums, opt.acc_train, opt.acc_val, opt.cost_train, opt.cost_val, 'images/overfit_mom{}.png'.format(momentum)) show_plot('images/overfit_mom{}.png'.format(momentum))
test_y = ds_test.targets.numpy() train_mean = train_x.mean() train_x, valid_x, test_x = (x - train_mean for x in (train_x, valid_x, test_x)) train_y, valid_y, test_y = (dense_to_one_hot(y, 10) for y in (train_y, valid_y, test_y)) weight_decay = config['weight_decay'] net = [] regularizers = [] inputs = np.random.randn(config['batch_size'], 1, 28, 28) net += [layers.Convolution(inputs, 16, 5, "conv1")] regularizers += [ layers.L2Regularizer(net[-1].weights, weight_decay, 'conv1_l2reg') ] net += [layers.MaxPooling(net[-1], "pool1")] net += [layers.ReLU(net[-1], "relu1")] net += [layers.Convolution(net[-1], 32, 5, "conv2")] regularizers += [ layers.L2Regularizer(net[-1].weights, weight_decay, 'conv2_l2reg') ] net += [layers.MaxPooling(net[-1], "pool2")] net += [layers.ReLU(net[-1], "relu2")] ## 7x7 net += [layers.Flatten(net[-1], "flatten3")] net += [layers.FC(net[-1], 512, "fc3")] regularizers += [ layers.L2Regularizer(net[-1].weights, weight_decay, 'fc3_l2reg') ] net += [layers.ReLU(net[-1], "relu3")] net += [layers.FC(net[-1], 10, "logits")]
def autoencoder(bioma_shape=717, domain_shape=36, output_shape=717, latent_space=10, bioma_layers=[128, 64], domain_layers=[32, 16], input_transform=CenterLogRatio(), output_transform=None, activation_function_encoder=layers.ReLU(), activation_function_decoder=layers.ReLU(), activation_function_latent='tanh', ): has_domain = domain_shape is not None has_bioma = bioma_shape is not None if not has_bioma and not has_domain: raise Exception('Either bioma or domain has to be expecified.') # encoder bioma if has_bioma: in_bioma = layers.Input(shape=(bioma_shape,), name='bioma_input_{}'.format(bioma_shape)) net = in_bioma if input_transform is not None: net = input_transform(net) for s in bioma_layers: net = layers.Dense(s, activation=activation_function_encoder, name="encoder_bioma_dense_{}".format(s))(net) encoded_bioma = layers.Dense(latent_space, activation=activation_function_latent, name='encoded_bioma_{}'.format(latent_space))(net) encoder_bioma = keras.Model(inputs=in_bioma, outputs=encoded_bioma, name='EncoderBioma') else: encoded_bioma = None encoder_bioma = None # encoder domain if has_domain: in_domain = layers.Input(shape=(domain_shape,), name='domain_input_{}'.format(domain_shape)) net = in_domain for s in domain_layers: net = layers.Dense(s, activation=activation_function_encoder, name="encoder_domain_dense_{}".format(s))(net) encoded_domain = layers.Dense(latent_space, activation=activation_function_latent, name='encoded_domain_{}'.format(latent_space))(net) encoder_domain = keras.Model(inputs=in_domain, outputs=encoded_domain, name='EncoderDomain') else: encoded_domain = None encoder_domain = None # decoder bioma for both autoencoders in_latent_space = layers.Input(shape=(latent_space,), name='latent_space_input') net = in_latent_space net_bioma = encoded_bioma net_domain = encoded_domain for s in reversed(bioma_layers): layer = layers.Dense(s, activation=activation_function_decoder, name="decoder_dense_{}".format(s)) net = layer(net) if has_bioma: net_bioma = layer(net_bioma) if has_domain: net_domain = layer(net_domain) layer = layers.Dense(output_shape, activation=None, name='decoded_bioma') decoded_bioma = layer(net) if has_bioma: net_bioma = layer(net_bioma) if has_domain: net_domain = layer(net_domain) if output_transform is not None: decoded_bioma = output_transform(decoded_bioma) if has_bioma: net_bioma = output_transform(net_bioma) if has_domain: net_domain = output_transform(net_domain) decoder_bioma = keras.Model(inputs=in_latent_space, outputs=decoded_bioma, name='DecoderBioma') # combined model for training if has_domain and has_bioma: diff_encoders = tf.math.abs(encoded_domain - encoded_bioma, name='diff_encoded') diff_encoders = Identity(name='latent')(diff_encoders) net_bioma = Identity(name='bioma')(net_bioma) net_domain = Identity(name='domain')(net_domain) model = keras.Model(inputs=[in_bioma, in_domain], outputs=[net_bioma, net_domain, diff_encoders], name='model') else: if has_bioma: net_bioma = Identity(name='bioma')(net_bioma) model = keras.Model(inputs=[in_bioma], outputs=[net_bioma], name='model') if has_domain: net_domain = Identity(name='domain')(net_domain) model = keras.Model(inputs=[in_domain], outputs=[net_domain], name='model') return model, encoder_bioma, encoder_domain, decoder_bioma
print("Check grad wrt input") check_grad_inputs(conv, x, grad_out) print("Check grad wrt params") check_grad_params(conv, x, conv.weights, conv.bias, grad_out) print("\nMaxPooling") x = np.random.randn(5, 4, 8, 8) grad_out = np.random.randn(5, 4, 4, 4) pool = layers.MaxPooling(x, "pool", 2, 2) print("Check grad wrt input") check_grad_inputs(pool, x, grad_out) print("\nReLU") x = np.random.randn(4, 3, 5, 5) grad_out = np.random.randn(4, 3, 5, 5) relu = layers.ReLU(x, "relu") print("Check grad wrt input") check_grad_inputs(relu, x, grad_out) print("\nFC") x = np.random.randn(20, 40) grad_out = np.random.randn(20, 30) fc = layers.FC(x, 30, "fc") print("Check grad wrt input") check_grad_inputs(fc, x, grad_out) print("Check grad wrt params") check_grad_params(fc, x, fc.weights, fc.bias, grad_out) print("\nSoftmaxCrossEntropyWithLogits") x = np.random.randn(50, 20) y = np.zeros([50, 20])
Y), self.accuracy(dataset.one_hot_labels, None, Y)) if __name__ == '__main__': import layers import datasets import initializers import matplotlib.pyplot as plt cifar = datasets.CIFAR10() training = cifar.get_named_batches('data_batch_1', limit=4) net = Network() net.add_layer(layers.Linear(cifar.input_size, 50, 0, initializers.Xavier())) net.add_layer(layers.ReLU(50)) net.add_layer( layers.Linear(50, cifar.output_size, 0, initializers.Xavier())) net.add_layer(layers.Softmax(cifar.output_size)) Y = net.evaluate(training.images) print('Cost:', net.cost(training.one_hot_labels, None, Y)) print('Accuracy: {:.2%}'.format( net.accuracy(training.one_hot_labels, None, Y))) plt.subplot(1, 3, 1) plt.imshow(Y) plt.yticks(range(10), cifar.labels) plt.xlabel('Image number') plt.title('Probabilities')
def __init__(self): self.convlayer = ly.ConvLayer() self.relu = ly.ReLU()
def create_and_train(training: Batch, validation: Batch, epochs: int, hidden_size: int, regularization: float, initial_learning_rate: float, decay_factor: float, momentum: float, train_id: str, test: Batch = None): """ Create and train a 2 layer network: - subtract mean of the training set - linear layer - relu - linear layer - softmax The only parameters that are fixed are the layer initializers and the batch size. :param train_id: :param training: :param validation: :param epochs: :param hidden_size: :param regularization: :param initial_learning_rate: :param decay_factor: :param momentum: :return: """ # Mean of the training set mu = training.mean() # Definition of the network net = Network() net.add_layer(layers.BatchNormalization(CIFAR10.input_size, mu)) net.add_layer(layers.Linear(CIFAR10.input_size, hidden_size, regularization, initializers.Xavier())) net.add_layer(layers.ReLU(hidden_size)) net.add_layer(layers.Linear(hidden_size, CIFAR10.output_size, regularization, initializers.Xavier())) net.add_layer(layers.Softmax(CIFAR10.output_size)) # Training opt = optimizers.MomentumSGD(net, initial_learning_rate, decay_factor, True, momentum) opt.train(training, validation, epochs, 10000) # Plotting plot = costs_accuracies_plot(opt.epoch_nums, opt.acc_train, opt.acc_val, opt.cost_train, opt.cost_val, 'images/{}.png'.format(train_id)) result = { 'epochs': epochs, 'hidden_size': hidden_size, 'regularization': regularization, 'initial_learning_rate': initial_learning_rate, 'decay_factor': decay_factor, 'momentum': momentum, # 'net': net, # 'opt': opt, 'epoch_nums': opt.epoch_nums, 'cost_train': opt.cost_train, 'acc_train': opt.acc_train, 'cost_val': opt.cost_val, 'acc_val': opt.acc_val, 'final_cost_train': opt.cost_train[-1], 'final_acc_train': opt.acc_train[-1], 'final_cost_val': opt.cost_val[-1], 'final_acc_val': opt.acc_val[-1], 'plot': plot } # Test set if test is not None: result['final_cost_test'], result['final_acc_test'] = net.cost_accuracy(test) result['confusion_matrix'] = confusion_matrix_plot(net, test, CIFAR10().labels, 'images/{}_conf.png'.format(train_id)) return result
return train_loader, test_loader if __name__ == '__main__': torch.random.manual_seed(1234) np.random.seed(1234) epochs = 10 lr = 0.01 batch_size = 32 optimizer = optimizers.SGD(learning_rate=lr) criterion = loss.CrossEntropy() layers = [ layers.LinearLayer(784, 512), layers.ReLU(), layers.Dropout(keep_rate=0.8), layers.LinearLayer(512, 512), layers.ReLU(), layers.Dropout(keep_rate=0.8), layers.LinearLayer(512, 10) ] model = Model(layers, optimizer, criterion) train_loader, test_loader = get_dataset(batch_size) for epoch_id in range(epochs): model.train() total = 0 correct = 0 for i, (x, y) in enumerate(train_loader): x = x.numpy().reshape(y.shape[0], -1, 1)
print_grad_diff(linear.grad_W, grad_num, 'Grad W') # Biases matrix grad_num = compute_grads_for_matrix(training.one_hot_labels, training.images, linear.b, net, cost) print_grad_diff(linear.grad_b, grad_num, 'Grad b') # Two layer network with regularization net = Network() linear1 = layers.Linear(cifar.input_size, 15, 0.1, initializers.Xavier(), name='Linear 1') net.add_layer(linear1) net.add_layer(layers.ReLU(15)) linear2 = layers.Linear(15, cifar.output_size, 0.3, initializers.Xavier(), name='Linear 2') net.add_layer(linear2) net.add_layer(layers.Softmax(cifar.output_size)) outputs = net.evaluate(training.images) net.backward(training.one_hot_labels) cost = net.cost(training.one_hot_labels, outputs=outputs) # Weights matrix, layer 1 grad_num = compute_grads_for_matrix(training.one_hot_labels, training.images, linear1.W, net, cost)
def __init__(self): self.fc = ly.FClayer() self.relu = ly.ReLU()