def test_backward(self): x = np.random.randn(10, 10) dout = np.random.randn(*x.shape) dx_num = eval_numerical_gradient_array(lambda x: self._relu_forward(x), x, dout) relu = ReLU() out = relu.forward(x) relu.backward(dout) dx = relu.dx self.assertAlmostEquals(rel_error(dx_num, dx), 0, places=7)
def main(): # optimizer = SGD(lr, weight_decay, mu=mu) optimizer = Adam(lr, weight_decay) model = ListModel(net=[ Linear(784, 400), ReLU(), Linear(400, 100), ReLU(), Linear(100, 10), Softmax() ], loss=CrossEntropyLoss()) for epoch in range(num_epochs): print('epoch number: {}'.format(epoch)) train(model, optimizer) valid(model)
def train(epochs, batch_size, hidden_size, learning_rate): """ Train a simple feed-forward network to classify MNIST digits, using vanilla SGD to minimize the categorical cross entropy between network outputs and ground truth labels. """ ff = Sequence(Linear(784, hidden_size), ReLU(), Linear(hidden_size, hidden_size), ReLU(), Linear(hidden_size, 10)) loss = cross_entropy_loss_with_logits loss_grad = cross_entropy_loss_with_logits_grad val_set = mnist(val=True) def val(): gen = val_set() val_sum = 0.0 for i, data in enumerate(gen): input, label = data output = ff.forward(input) val_sum += np.argmax(output) == label print "Val", val_sum / float(i) optim = GradientDescentOptimizer(ff, lr=learning_rate) train_set = mnist() print "Training .." for epoch in xrange(epochs): loss_sum = 0.0 gen = train_set() for i, data in enumerate(gen): input, label = data label = np.array(label, dtype=np.int32) output = ff.forward(input) ff.backward(loss_grad(label, output)) if i > 0 and (i % batch_size == 0): optim.step() loss_sum += loss(label, output) print epoch, "Loss", loss_sum / i val()
def test_forward(self): x = np.linspace(-0.5, 0.5, num=12).reshape(3, 4) relu = ReLU() out = relu.forward(x) correct_out = np.array([[ 0., 0., 0., 0., ], [ 0., 0., 0.04545455, 0.13636364, ], [ 0.22727273, 0.31818182, 0.40909091, 0.5, ]]) diff = rel_error(out, correct_out) self.assertAlmostEquals(diff, 0, places=7)
# normalize inputs train_input = (train_input - train_input.mean(dim=1)[:, None] ) / train_input.std(dim=1)[:, None] test_input = (test_input - test_input.mean(dim=1)[:, None]) / test_input.std(dim=1)[:, None] # In[] # training overallTestAcc = [] overallTrainAcc = [] for eva in range(evaluateIter): # create a model model = sequential(Linear(input_size=2, output_size=25), ReLU(), batchNormalization(batchSize, input_size=25), Linear(input_size=25, output_size=25), ReLU(), batchNormalization(batchSize, input_size=25), Linear(input_size=25, output_size=25), ReLU(), batchNormalization(batchSize, input_size=25), Linear(input_size=25, output_size=2)) # define criterion and optimizer criterion = MSELoss(method='mean') optimizer = SGD(model.parameters(), lr=learningRate) trainLossList = [] trainNumList = [] testLossList = [] testNumList = []
weights = weights[None, :, :] weights.transpose(1,2).shape input = torch.Tensor([[1, 2, 3, 4, 5], [1, 2, 3, 0, 0], [1, 1, 1, 1, 1]]) bias = torch.Tensor([1, 2, 3, 4]) bias.shape ''' input = input[:, :, None] weights.matmul(input).squeeze() + bias''' lin = Linear(5, 4, ReLU()) output = lin.forward(input) target = torch.Tensor([[0, 0, 1, 0], [0, 0, 0, 1], [0, 0, 1, 0]]) d_loss = dloss(output, target) prev_dl_dx = lin.backward(d_loss) prev_dl_dx.shape ex_dloss = torch.Tensor([[.1, .2, .2, .1], [.1, .2, .2, .1], [.1, .2, .2, .1]])
X = data['X'] y = data['y'] else: X, y = fetch_openml('mnist_784', version=1, return_X_y=True) # X, y = mnist.data / 255.0, mnist.target np.savez('mnist.npz', X=X, y=y) print("data shape:", X.shape, y.shape) X, Y = X / 255, one_hot(y) train_x, test_x, train_y, test_y = X[:60000], X[60000:], Y[:60000], Y[60000:] #### build model net = Sequential() net.add(Dense(784, 400)) net.add(ReLU()) # net.add(Sigmoid()) # net.add(SoftPlus()) # net.add(Dropout()) net.add(Dense(400, 128)) net.add(ReLU()) # net.add(BatchMeanSubtraction()) # net.add(ReLU()) # net.add(Dropout()) net.add(Dense(128, 10)) net.add(SoftMax()) # criterion = MultiLabelCriterion() # loss function criterion = CrossEntropyCriterion() ############################### #### optimizer config
# ----- Define the paramters for learning ----- nb_classes = train_labels.shape[0] features = train_features.size(1) nb_samples = train_features.size(0) epsilon = 0.1 eta = .2 #nb_samples is now defined in Sequential() batch_size = config.batch_size epochs = int(config.epochs / (nb_samples / batch_size)) # Zeta is to make it work correctly with Sigma activation function. # train_label = train_label.add(0.125).mul(0.8) # test_label = test_label.add(0.125).mul(0.8) # ----- Implementation of the architecture ----- architecture = Sequential(Linear(2, 25, ReLU()), Linear(25, 25, ReLU()), Linear(25, 25, ReLU()), Linear(25, 2, Sigma())) # ----- Training ----- round = 1 prev_loss = math.inf prev_prev_loss = math.inf errors = [] for epoch in range(epochs): for batch_start in range(0, nb_samples, batch_size): features = train_features[batch_start:batch_start + batch_size, :] labels = train_labels[batch_start:batch_start + batch_size] tr_loss, tr_error = architecture.forward(train_features, train_labels) architecture.backward() architecture.update(eta) loss, error = architecture.forward(test_features, test_labels)
# Default constants DNN_HIDDEN_UNITS_DEFAULT = '20' LEARNING_RATE_DEFAULT = 1e-2 MAX_EPOCHS_DEFAULT = 1500 EVAL_FREQ_DEFAULT = 20 training_data = None training_label = None test_data = None test_label = None training_label_encoded = None test_label_encoded = None FLAGS = None relu = ReLU() softmax = SoftMax() cross = CrossEntropy() plot_train = [] plot_test = [] plot_loss = [] test_loss = [] x = [] descent = 0 def encode(label): encoded = np.zeros((len(label), 2)) for i in range(len(label)): encoded[i][0] = label[i] encoded[i][1] = 1-label[i]
target = torch.zeros((nb, 2)) target[(input - 0.5).pow(2).sum(1) < 0.5 / pi, 1] = 1 target[(input - 0.5).pow(2).sum(1) >= 0.5 / pi, 0] = 1 return input, target train_input, train_target = generate_disc_set(1000) test_input, test_target = generate_disc_set(1000) batch_size = 100 num_batches = len(train_input) // batch_size # Reset the seeds before each model creation so that # parameters are initialized the same for a fair comparison. torch.manual_seed(0) relu = Sequential(Linear(2, 25), ReLU(), Linear(25, 25), ReLU(), Linear(25, 25), ReLU(), Linear(25, 2)) torch.manual_seed(0) tanh = Sequential(Linear(2, 25), Tanh(), Linear(25, 25), Tanh(), Linear(25, 25), Tanh(), Linear(25, 2)) criterion = MSE() def fit(model): optimizer = SGD(model.parameters(), model.grads(), lr=0.1) losses = [] print('Epoch | Loss') for epoch in range(500):
"""This file declares the models to be used for testing.""" from modules import Sequential, Linear, ReLU, Tanh, Sigmoid MODEL1 = Sequential("ReLu", Linear(2, 25), ReLU(), Linear(25, 25), ReLU(), Linear(25, 25), ReLU(), Linear(25, 2), Sigmoid()) MODEL2 = Sequential("Tanh", Linear(2, 25), Tanh(), Linear(25, 25), Tanh(), Linear(25, 25), Tanh(), Linear(25, 2), Sigmoid()) MODEL3 = Sequential("ReLu + He", Linear(2, 25, "He"), ReLU(), Linear(25, 25, "He"), ReLU(), Linear(25, 25, "He"), ReLU(), Linear(25, 2, "He"), Sigmoid()) MODEL4 = Sequential("Tanh + Xavier", Linear(2, 25, "Xavier"), Tanh(), Linear(25, 25, "Xavier"), Tanh(), Linear(25, 25, "Xavier"), Tanh(), Linear(25, 2, "Xavier"), Sigmoid()) # Best model is actually almost model 2 MODEL_BEST = Sequential("Best", Linear(2, 25), Tanh(), Linear(25, 25), Tanh(), Linear(25, 25), Tanh(), Linear(25, 2, "He"), Sigmoid())
def _relu_forward(self, x): relu = ReLU() return relu.forward(x)