def GCN_check(name, adj, weights, layer_config): num_layer = len(layer_config) model = Network() for i in range(num_layer - 2): model.add(Aggregate('A{}'.format(i), adj)) model.add( Linear('W{}'.format(i), layer_config[i], layer_config[i + 1], 'xavier').set_W(weights[i])) model.add(Tanh('Tanh{}'.format(i))) model.add(Aggregate('A{}'.format(num_layer - 2), adj)) model.add( Linear('W{}'.format(num_layer - 2), layer_config[-2], layer_config[-1], 'xavier').set_W(weights[-1])) loss = SoftmaxCrossEntropyLoss(name='loss') # loss = EuclideanLoss(name='loss') print("Model " + name) for layer in model.layer_list: print(":\t" + repr(layer)) print(':\t' + repr(loss)) print('Forward Computation: ', model.str_forward('X')) print('Backward Computation:', model.str_backward('Z-Y')) print() model.str_update() print() return model, loss
def main(): ''' The first layer of this network is dilated conv layer The second layer of this work is fully connected layer ''' np.random.seed(42) n_classes = 10 dim = 784 batch_norm = True inputs, labels = load_normalized_mnist_data() net = Network(learning_rate=1e-3) net.add_layer(DilatedConv(8, 3, 2, 1, 2)) net.add_layer(ReLU()) net.add_layer(Linear(8 * 784, 128)) net.add_layer(ReLU()) net.add_layer(Linear(128, n_classes)) net.set_loss(SoftmaxCrossEntropyLoss()) train_network(net, inputs, labels, 50) test_loss, test_acc = validate_network(net, inputs['test'], labels['test'], batch_size=128) print('Baseline MLP Network with Conv:') print('Test loss:', test_loss) print('Test accuracy:', test_acc)
def MLP(name, weights, layer_config): num_layer = len(layer_config) model = Network() for i in range(num_layer - 2): model.add(Linear('W{}'.format(i), layer_config[i], layer_config[i + 1], 'kaiming')) model.add(Relu('Relu{}'.format(i))) model.add(Linear('W{}'.format(num_layer - 2), layer_config[-2], layer_config[-1], 'kaiming')) loss = SoftmaxCrossEntropyLoss(name='loss') print("Model "+name) for layer in model.layer_list: print(":\t" + repr(layer)) print(':\t' + repr(loss)) print() print('Forward Computation: ', model.str_forward('X')) print('Backward Computation:', model.str_backward('Z-Y')) print() model.str_update() print() return model, loss
def example_GCN(name, adj, weights, layer_config): model = Network() model.add(Aggregate('A1', adj)) model.add(Linear('W1', layer_config[0], layer_config[1], 'kaiming')) model.add(Relu('Relu1')) model.add(Aggregate('A2', adj)) model.add(Linear('W2', layer_config[1], layer_config[1], 'kaiming')) model.add(Relu('Relu2')) model.add(Aggregate('A3', adj)) model.add(Linear('W3', layer_config[1], layer_config[2], 'kaiming')) loss = SoftmaxCrossEntropyLoss(name='loss') print("Model "+name) for layer in model.layer_list: print(":\t" + repr(layer)) print(':\t' + repr(loss)) print('Forward Computation: ', model.str_forward('X')) print('Backward Computation:', model.str_backward('Z-Y')) print() model.str_update() print() return model, loss
def get_model(args): loss_dict = {} softmaxLoss = SoftmaxCrossEntropyLoss("softmax") euclideanLoss = EuclideanLoss("euclidean") loss_dict['softmax'] = softmaxLoss loss_dict['euclidean'] = euclideanLoss config = { 'learning_rate': args.learning_rate, 'weight_decay': args.weight_decay, 'momentum': args.momentum, 'batch_size': args.batch_size, 'max_epoch': args.max_epoch, 'disp_freq': args.disp_freq, 'test_epoch': args.test_epoch } loss = loss_dict[args.loss] model = Network() layer = args.hidden_layer if layer == 1: model.add(Linear('fc1', 784, args.hidden_size, 0.01)) model.add(get_activation(args.activation, 0)) model.add(Linear('fc2', args.hidden_size, 10, 0.01)) model.add(get_activation(args.activation, 1)) else: model.add(Linear('fc1', 784, args.hidden_size, 0.01)) model.add(get_activation(args.activation, 0)) model.add(Linear('fc2', args.hidden_size, args.hidden_size//2, 0.01)) model.add(get_activation(args.activation, 1)) model.add(Linear('fc2', args.hidden_size//2, 10, 0.01)) model.add(get_activation(args.activation, 2)) return model, config, loss
def __init__(self): super(LightSpeech, self).__init__() # ------------ As Actor ------------ # self.embeddings = nn.Embedding(hparams.embedding_size + 1, hparams.embedding_dim, padding_idx=0) self.pre_gru = nn.GRU(hparams.pre_gru_in_dim, int(hparams.pre_gru_out_dim / 2), num_layers=hparams.pre_gru_layer_size, batch_first=True, bidirectional=True) self.pre_linear = Linear(hparams.pre_gru_out_dim, hparams.post_gru_in_dim) self.LR = LengthRegulator() # ------------ As Actor ------------ # self.post_gru = nn.GRU(hparams.post_gru_in_dim, int(hparams.post_gru_out_dim / 2), num_layers=hparams.post_gru_layer_size, batch_first=True, bidirectional=True) self.post_linear = Linear(hparams.post_gru_out_dim, hparams.n_mel_channels)
def one_layer_mlp(loss='enclidean', activation='relu'): model = Network() model.add(Linear('fc1', 784, 360, 0.01)) model.add(activation_map[activation]('activation1')) model.add(Linear('fc2', 360, 10, 0.01)) loss = loss_map[loss](name=loss) return model, loss
def test_CheckMinibatchTrainerEqualsSimpleTrainer(self): train_set = [(np.random.rand(2), i) for i in xrange(3)] loss = SquaredLoss() epochs = 1 optimizer = SGD(learning_rate=0.01) minibatch_model = Seq([Linear(2, 5, initialize='ones')]) minibatch_trainer = MinibatchTrainer() minibatch_trainer.train_minibatches(minibatch_model, train_set, batch_size=1, loss=loss, epochs=epochs, optimizer=optimizer, shuffle=False) simple_model = Seq([Linear(2, 5, initialize='ones')]) simple_trainer = OnlineTrainer() simple_trainer.train(simple_model, train_set, loss, epochs, optimizer) x = np.random.rand(2) simple_y = simple_model.forward(x) minibatch_y = minibatch_model.forward(x) assert_array_equal(simple_y, minibatch_y)
def test_ManyErrors(self): model = Seq( [Linear(2, 3, initialize='ones'), Linear(3, 1, initialize='ones')]) x = np.random.rand(2) y = model.forward(x) model.backward(np.array([1.]))
def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation: ActivationFunction = F.relu): super(TransformerDecoderLayer, self).__init__() self.self_attn = MultiHeadAttention(d_model, nhead, dropout=dropout) self.multihead_attn = MultiHeadAttention(d_model, nhead, dropout=dropout) # Implementation of Feedforward model self.linear1 = Linear(d_model, dim_feedforward) self.dropout = torch.nn.Dropout(dropout) self.linear2 = Linear(dim_feedforward, d_model) self.norm1 = LayerNorm(d_model) self.norm2 = LayerNorm(d_model) self.norm3 = LayerNorm(d_model) self.dropout1 = torch.nn.Dropout(dropout) self.dropout2 = torch.nn.Dropout(dropout) self.dropout3 = torch.nn.Dropout(dropout) self.activation = activation self.reset_parameters()
def tryParameters(test_name, N_hidden, lam, l_rate, decay, mom, epochs=50, batch_size=250): net = Net([ BatchNorm(cifar.in_size, trainMean()), Linear(cifar.in_size, N_hidden, lam=lam), ReLU(N_hidden), Linear(N_hidden, cifar.out_size, lam=lam), Softmax(cifar.out_size) ], lam, l_rate, decay, mom) results = net.trainMiniBatch(train, val, epochs, batch_size, shuffle=True) print('{} Test Accuracy: {:.2f}'.format( test_name, net.accuracy(test['one_hot'].T, test['images'].T))) print('Final train a/c, val a/c: {:.2f}/{:.2f}, {:.2f}/{:.2f}'.format( results['last_a_train'], results['last_c_train'], results['last_a_val'], results['last_c_val'])) plotResults(test_name, results['a_train'], results['c_train'], results['a_val'], results['c_val']) #weights_plot(net, "plots/weights_vizualisation_{}.png".format(test_name), labels) return results
def test_Reduce(self): model = Seq( [Linear(3, 2, initialize='ones'), Linear(2, 2, initialize='ones')]) x = np.random.rand(3) model.forward(x) model.backward(np.array([1., 1.]))
def test_iris(self): scores = [] for i in range(1): hidden = 50 l1 = Linear(4, hidden, initialize='ones') l2 = Linear(hidden, 3, initialize='ones') l1.W *= 0.000000001 l2.W *= 0.00000001 model = Seq(l1, Sigmoid, l2) loss = CrossEntropyLoss() trainer = OnlineTrainer() losses = trainer.train(model, self.train_set, epochs=100, loss=loss, optimizer=SGD(learning_rate=0.01)) score = loss.test_score(model, self.train_set) print("hidden=%f score=%f" % (hidden, score)) scores.append(score) self.plot_loss_history(losses) plt.show() self.assertGreaterEqual(numpy.mean(scores), 94.)
def main(): ''' Trains two networks on the MNIST dataset. Both have two hidden ReLU layers with 256 and 128 units The fist one has a mean batch normalization layer before every layer ''' np.random.seed(42) n_classes = 10 dim = 784 inputs, labels = load_normalized_mnist_data() # Define network without batch norm net = Network(learning_rate=1e-3) net.add_layer(Linear(dim, 256)) net.add_layer(ReLU()) net.add_layer(Linear(256, 128)) net.add_layer(ReLU()) net.add_layer(Linear(128, n_classes)) net.set_loss(SoftmaxCrossEntropyLoss()) train_network(net, inputs, labels, 50) test_loss, test_acc = validate_network(net, inputs['test'], labels['test'], batch_size=128) print('Baseline MLP Network without batch normalization:') print('Test loss:', test_loss) print('Test accuracy:', test_acc)
class ShallowConvNet(Net): def __init__(self, device=torch.device('cuda:0')): super().__init__(device) self.conv1 = Conv(1, 6, kernel_size=5, noise_std=1e-0, act='TanH', device=self.device) self.act1 = Activation('TanH') self.pool1 = Pool(2, device=self.device) self.fc1 = Linear(6*12*12, 100, noise_std=1e-0, act='TanH', device=self.device) self.act2 = Activation('TanH') self.fc2 = Linear(100, 10, noise_std=1e-0, act='TanH', device=self.device) self.softmax = Activation('Softmax') self.layers = [self.conv1, self.fc1, self.fc2] def forward(self, input): conv_out_1 = self.conv1.forward(input) act_out_1 = self.act1.forward(conv_out_1) pool_out_1 = self.pool1.forward(act_out_1) pool_out_1 = pool_out_1.reshape(len(pool_out_1), -1) fc_out_1 = self.fc1.forward(pool_out_1) act_out_2 = self.act2.forward(fc_out_1) fc_out_2 = self.fc2.forward(act_out_2) output = self.softmax.forward(fc_out_2) return output
def CNN(): ''' CNN network on the MNIST dataset ''' np.random.seed(42) n_classes = 10 inputs, labels = load_mnist_images() # Define network without batch norm net = Network(learning_rate = 1e-3) net.add_layer(Convolution2D(1,2,28,28,pad=0,stride=1,filter_size=3,dilation=2)) net.add_layer(ReLU()) net.add_layer(BatchNorm(800)) net.add_layer(Linear(800, 128)) net.add_layer(ReLU()) net.add_layer(BatchNorm(128)) net.add_layer(Linear(128, n_classes)) net.set_loss(SoftmaxCrossEntropyLoss()) train_network(net, inputs, labels, 250) test_loss, test_acc = validate_network(net, inputs['test'], labels['test'], batch_size=128) print('Baseline CNN Network with batch normalization:') print('Test loss:', test_loss) print('Test accuracy:', test_acc) return net
def build_model(config): model = Network() layer_num = 0 for layer in config['use_layer']: if layer['type'] == "Linear": in_num = layer['in_num'] out_num = layer['out_num'] if "init_std" in layer.keys(): model.add( Linear(layer['type'] + str(layer_num), in_num, out_num, init_std=layer['init_std'])) else: model.add( Linear(layer['type'] + str(layer_num), in_num, out_num)) layer_num += 1 elif layer['type'] == 'Relu': model.add(Relu(layer['type'] + str(layer_num))) layer_num += 1 elif layer['type'] == 'Sigmoid': model.add(Sigmoid(layer['type'] + str(layer_num))) layer_num += 1 else: assert 0 loss_name = config['use_loss'] if loss_name == 'EuclideanLoss': loss = EuclideanLoss(loss_name) elif loss_name == 'SoftmaxCrossEntropyLoss': loss = SoftmaxCrossEntropyLoss(loss_name) else: assert 0 return model, loss
def __init__(self, device, dataset, input_channel, input_size, width, linear_size): super(cnn_2layer, self).__init__() mean, sigma = get_mean_sigma(device, dataset, IBP=True) self.normalizer = Normalization(mean, sigma) self.layers = [ Normalization(mean, sigma), Conv2d(input_channel, 4 * width, 4, stride=2, padding=1, dim=input_size), ReLU((4 * width, input_size // 2, input_size // 2)), Conv2d(4 * width, 8 * width, 4, stride=2, padding=1, dim=input_size // 2), ReLU((8 * width, input_size // 4, input_size // 4)), Flatten(), Linear(8 * width * (input_size // 4) * (input_size // 4), linear_size), ReLU(linear_size), Linear(linear_size, 10), ]
def network_setup(model_file_path=None): freq_count = 4000 count_bins = 88 * 20 dataset = MapsDB('../db', freq_count=freq_count, count_bins=count_bins, batch_size=128, start_time=0.5, duration=0.5) model = Network() model.add(Linear('fc1', dataset.get_vec_input_width(), 2048, 0.001)) model.add(Sigmoid('sigmoid1')) model.add(Linear('fc2', 2048, dataset.get_label_width(), 0.001)) model.add(Softmax('softmax2')) loss = CrossEntropyLoss(name='xent') # loss = EuclideanLoss(name='r2') optim = SGDOptimizer(learning_rate=0.00001, weight_decay=0.005, momentum=0.9) # optim = AdagradOptimizer(learning_rate=0.001, eps=1e-6) input_placeholder = T.fmatrix('input') label_placeholder = T.fmatrix('label') label_active_size_placeholder = T.ivector('label_active_size') if model_file_path: model.loads(model_file_path) else: dataset.load_cache() model.compile(input_placeholder, label_placeholder, label_active_size_placeholder, loss, optim) return model, dataset, freq_count, count_bins
def gradient_check(): # prepare a subset of the train data subset = 50 grad_train_img = train['images'][:subset, :].T grad_train_truth = train['one_hot'][:subset, :].T # init the network N_hidden = 50 lin = [ Linear(cifar.in_size, N_hidden, lam=0.1), Linear(N_hidden, cifar.out_size, lam=0.1) ] g_net = Net( [lin[0], ReLU(N_hidden), lin[1], Softmax(cifar.out_size)], lam=0.1, l_rate=0.001, decay=0.99, mom=0.99) # do the pass grad_out = g_net.forward(grad_train_img) g_net.backward(grad_train_truth) cost = g_net.cost(grad_train_truth, out=grad_out) # calc the numeric grad for each linear layer for linear in lin: num_gradient(grad_train_img, grad_train_truth, g_net, linear, cost)
def test_OneNeuronGradient(self): layer = Linear(2, 1) x = np.random.rand(2) y = layer.forward(x) deriv_grad = layer.backward(np.ones(1)) numgrad = numerical_gradient.calc(layer.forward, x) numerical_gradient.assert_are_similar(deriv_grad, numgrad[0])
def test3layergradients(samples=1, dimensions=3072): print("\n\nTesting 3-layer gradients using a batch size of {}".format(samples)) trainingData, trainingLabels, encodedTrainingLabels = loadData("Datasets/cifar-10-batches-mat/data_batch_1.mat") trainingData = trainingData[0:dimensions, 0:samples] trainingLabels = trainingLabels[0:dimensions, 0:samples] encodedTrainingLabels = encodedTrainingLabels[0:dimensions, 0:samples] network = Model() linear = Linear(dimensions, 50, regularization=0.00, initializer="he") network.addLayer(linear) network.addLayer(Relu()) linear2 = Linear(50, 30, regularization=0.00, initializer="he") network.addLayer(linear2) network.addLayer(Relu()) linear3 = Linear(30, 10, regularization=0.00, initializer="he") network.addLayer(linear3) network.addLayer(Softmax()) sgd = SGD(lr=0.001, lr_decay=1.0, momentum=0.0, shuffle=True) network.compile(sgd, "cce") network.predict(trainingData, updateInternal=True) network.backpropagate(encodedTrainingLabels) timestamp = datetime.now().strftime('%Y-%b-%d--%H-%M-%S') numerical_gradW1 = compute_grads_w_BN(1e-4, linear.W, trainingData, encodedTrainingLabels, network) numerical_gradb1 = compute_grads_w_BN(1e-4, linear.b, trainingData, encodedTrainingLabels, network) numerical_gradW2 = compute_grads_w_BN(1e-4, linear2.W, trainingData, encodedTrainingLabels, network) numerical_gradb2 = compute_grads_w_BN(1e-4, linear2.b, trainingData, encodedTrainingLabels, network) numerical_gradW3 = compute_grads_w_BN(1e-4, linear3.W, trainingData, encodedTrainingLabels, network) numerical_gradb3 = compute_grads_w_BN(1e-4, linear3.b, trainingData, encodedTrainingLabels, network) print("W1") relative_errorW = grad_difference(linear.gradW, numerical_gradW1) print("b1") relative_errorb = grad_difference(linear.gradb, numerical_gradb1) print("W2") relative_errorW2 = grad_difference(linear2.gradW, numerical_gradW2) print("b2") relative_errorb2 = grad_difference(linear2.gradb, numerical_gradb2) print("W3") relative_errorW3 = grad_difference(linear3.gradW, numerical_gradW3) print("b3") relative_errorb3 = grad_difference(linear3.gradb, numerical_gradb3) print("\n")
def Model_Linear_Gelu_1_SoftmaxCrossEntropyLoss(): name = '1_Gelu_SoftmaxCrossEntropyLoss' model = Network() model.add(Linear('fc1', 784, 256, 0.01)) model.add(Gelu('a1')) model.add(Linear('fc2', 256, 10, 0.01)) loss = SoftmaxCrossEntropyLoss(name='loss') return name, model, loss
def test_TwoNeuronsGradient(self): layer = Linear(3, 2) x = np.random.rand(3) y = layer.forward(x) deriv_grad = layer.backward(np.ones(2)) numgrad = numerical_gradient.calc(layer.forward, x) numgrad = np.sum(numgrad, axis=0) numerical_gradient.assert_are_similar(deriv_grad, numgrad)
def Model_Linear_Gelu_1_HingeLoss(): name = '1_Gelu_HingeLoss' model = Network() model.add(Linear('fc1', 784, 256, 0.01)) model.add(Gelu('a1')) model.add(Linear('fc2', 256, 10, 0.01)) loss = HingeLoss(name='loss') return name, model, loss
def Model_Linear_Relu_1_EuclideanLoss(): name = '1_Relu_EuclideanLoss' model = Network() model.add(Linear('fc1', 784, 256, 0.01)) model.add(Relu('a1')) model.add(Linear('fc2', 256, 10, 0.01)) loss = EuclideanLoss(name='loss') return name, model, loss
def test_Expand(self): model = Seq([ Linear(2, 3, initialize='ones'), Linear(3, 2, initialize='ones'), ]) x = np.random.rand(2) model.forward(x) back = model.backward(np.ones(2))
def test_OneNeuronBackward(self): layer = Linear(2, 1, initialize='ones') x = np.array([2., 2.]) y = layer.forward(x) self.assertEqual(y, [5.]) dJdy = np.array([3]) dxdy = layer.backward(dJdy) assert_array_equal(dxdy, [3., 3.])
def __init__(self, device=torch.device('cuda:0')): super().__init__(device) self.fc1 = Linear(28*28, 50, noise_std=1e-0, device=self.device) self.act1 = Activation('TanH') self.fc2 = Linear(50, 10, noise_std=1e-0, device=self.device) self.softmax = Activation('Softmax') self.layers = [self.fc1, self.fc2]
def get_linear_logit(features, linear_feature_columns, units=1, use_bias=False, seed=1024, prefix='linear', l2_reg=0): features = features linear_feature_columns = linear_feature_columns units = 1 use_bias = False seed = 1024 prefix = 'linear' l2_reg = 0 for i in range(len(linear_feature_columns)): if linear_feature_columns[i]['feat_cat'] == 'sparse': linear_feature_columns[i]['embedding_dim'] = 3 linear_feature_columns[i]['embeddings_initializer'] = Zeros() linear_emb_list = [ input_from_feature_columns(features, linear_feature_columns, l2_reg, seed, prefix=prefix + str(i))[0] for i in range(units) ] _, dense_input_list = input_from_feature_columns(features, linear_feature_columns, l2_reg, seed, prefix=prefix) linear_logit_list = [] for i in range(units): if len(linear_emb_list[i]) > 0 and len(dense_input_list) > 0: sparse_input = concat_func(linear_emb_list[i]) dense_input = concat_func(dense_input_list) linear_logit = Linear(l2_reg, mode=2, use_bias=use_bias, seed=seed)([sparse_input, dense_input]) elif len(linear_emb_list[i]) > 0: sparse_input = concat_func(linear_emb_list[i]) linear_logit = Linear(l2_reg, mode=0, use_bias=use_bias, seed=seed)(sparse_input) elif len(dense_input_list) > 0: dense_input = concat_func(dense_input_list) linear_logit = Linear(l2_reg, mode=1, use_bias=use_bias, seed=seed)(dense_input) else: # raise NotImplementedError return add_func([]) linear_logit_list.append(linear_logit) return concat_func(linear_logit_list)