def test_momentum(): data = saved_data[21] assert len(data) == 8 x = data[0] y = data[1] solW = data[2:5] solb = data[5:] reset_prng() mlp = hw1.MLP(784, 10, [64, 32], [activation.Sigmoid(), activation.Sigmoid(), activation.Identity()], weight_init, bias_init, loss.SoftmaxCrossEntropy(), 0.008, momentum=0.856, num_bn_layers=0) num_test_updates = 5 for u in range(num_test_updates): mlp.zero_grads() mlp.forward(x) mlp.backward(y) mlp.step() mlp.eval() W = [x.W for x in mlp.linear_layers] b = [x.b for x in mlp.linear_layers] for i, (pred, gt) in enumerate(zip(W, solW)): closeness_test(pred, gt, "mlp.linear_layers[%d].W" % i) for i, (pred, gt) in enumerate(zip(b, solb)): closeness_test(pred, gt, "mlp.linear_layers[%d].b" % i)
def visualize(outpath): # Configure the training visualization process below # Change these hyperparameters around to experiment with your implementation epochs = 200 batch_size = 100 thisdir = os.path.dirname(__file__) savepath = outpath train_data_path = os.path.join(thisdir, "data/train_data.npy") train_labels_path = os.path.join(thisdir, "data/train_labels.npy") val_data_path = os.path.join(thisdir, "data/val_data.npy") val_labels_path = os.path.join(thisdir, "data/val_labels.npy") test_data_path = os.path.join(thisdir, "data/test_data.npy") test_labels_path = os.path.join(thisdir, "data/test_labels.npy") dset = (process_dset_partition( (np.load(train_data_path), np.load(train_labels_path))), process_dset_partition( (np.load(val_data_path), np.load(val_labels_path))), process_dset_partition( (np.load(test_data_path), np.load(test_labels_path)))) mlp = nn.MLP( 784, 10, [32, 32, 32], [nn.Sigmoid(), nn.Sigmoid(), nn.Sigmoid(), nn.Identity()], np.random.randn, bias_init, nn.SoftmaxCrossEntropy(), 1e-3, momentum=0.856) visualize_training_statistics(mlp, dset, epochs, batch_size, savepath) print("Saved output to {}".format(savepath))
def test_batch_norm_inference(): num_examples = 1000 data = saved_data[20] assert len(data) == 15 x = data[0] y = data[1] soldbeta = data[2] soldgamma = data[3] xs = data[4] solground = data[5:] reset_prng() mlp = hw1.MLP(784, 10, [64, 32], [activation.Sigmoid(), activation.Sigmoid(), activation.Identity()], weight_init, bias_init, loss.SoftmaxCrossEntropy(), 0.008, momentum=0.0, num_bn_layers=1) batch_size = 100 mlp.train() for b in range(0, 1): mlp.zero_grads() mlp.forward(x[b:b + batch_size]) mlp.backward(y[b:b + batch_size]) mlp.step() closeness_test(mlp.bn_layers[0].dbeta, soldbeta, "mlp.bn_layers[0].dbeta") closeness_test(mlp.bn_layers[0].dgamma, soldgamma, "mlp.bn_layers[0].dgamma") for b in range(0, num_examples, batch_size): mlp.eval() student = mlp.forward(xs[b:b + batch_size]) ground = solground[b//batch_size] closeness_test(student, ground, "mlp.forward(x)")
def test_batch_norm_train(): data = saved_data[19] assert len(data) == 10 x = data[0] y = data[1] soldW = data[2:5] soldb = data[5:8] soldbeta = data[8] soldgamma = data[9] reset_prng() mlp = hw1.MLP(784, 10, [64, 32], [activation.Sigmoid(), activation.Sigmoid(), activation.Identity()], weight_init, bias_init, loss.SoftmaxCrossEntropy(), 0.008, momentum=0.0, num_bn_layers=1) mlp.forward(x) mlp.backward(y) dW = [x.dW for x in mlp.linear_layers] db = [x.db for x in mlp.linear_layers] for i, (pred, gt) in enumerate(zip(dW, soldW)): closeness_test(pred, gt, "mlp.dW[%d]" % i) for i, (pred, gt) in enumerate(zip(db, soldb)): closeness_test(pred, gt, "mlp.db[%d]" % i) closeness_test(mlp.bn_layers[0].dbeta, soldbeta, "mlp.bn_layers[0].dbeta") closeness_test(mlp.bn_layers[0].dgamma, soldgamma, "mlp.bn_layers[0].dgamma")
def test_mystery_hidden_backward3(): data = saved_data[18] assert len(data) == 6 x = data[0] y = data[1] soldW = data[2:4] soldb = data[4:] reset_prng() mlp = hw1.MLP( 784, 10, [32], [activation.Sigmoid(), activation.Identity()], weight_init, bias_init, loss.SoftmaxCrossEntropy(), 0.008, momentum=0.0, num_bn_layers=0) mlp.forward(x) mlp.backward(y) dW = [x.dW for x in mlp.linear_layers] db = [x.db for x in mlp.linear_layers] for i, (pred, gt) in enumerate(zip(dW, soldW)): closeness_test(pred, gt, "mlp.linear_layers[%d].dW" % i) for i, (pred, gt) in enumerate(zip(db, soldb)): closeness_test(pred, gt, "mlp.linear_layers[%d].db" % i)
def test_linear_classifier_forward(): data = saved_data[2] x = data[0] gt = data[1] reset_prng() mlp = hw1.MLP(784, 10, [], [activation.Identity()], weight_init, bias_init, loss.SoftmaxCrossEntropy(), 0.008, momentum=0.0, num_bn_layers=0) pred = mlp.forward(x) closeness_test(pred, gt, "mlp.forward(x)")
def test_mystery_hidden_forward3(): data = saved_data[15] x = data[0] gt = data[1] reset_prng() mlp = hw1.MLP(784, 10, [32], [activation.Sigmoid(), activation.Identity()], weight_init, bias_init, loss.SoftmaxCrossEntropy(), 0.008, momentum=0.0, num_bn_layers=0) pred = mlp.forward(x) closeness_test(pred, gt, "mlp.forward(x)")
def test_linear_classifier_backward(): data = saved_data[3] x = data[0] y = data[1] soldW = data[2] soldb = data[3] reset_prng() mlp = hw1.MLP(784, 10, [], [activation.Identity()], weight_init, bias_init, loss.SoftmaxCrossEntropy(), 0.008, momentum=0.0, num_bn_layers=0) mlp.forward(x) mlp.backward(y) closeness_test(mlp.linear_layers[0].dW, soldW, "mlp.linear_layers[0].dW") closeness_test(mlp.linear_layers[0].db, soldb, "mlp.linear_layers[0].db")
def test_linear_classifier_step(): data = saved_data[4] x = data[0] y = data[1] solW = data[2] solb = data[3] reset_prng() mlp = hw1.MLP(784, 10, [], [activation.Identity()], weight_init, bias_init, loss.SoftmaxCrossEntropy(), 0.008, momentum=0.0, num_bn_layers=0) num_test_updates = 5 for u in range(num_test_updates): mlp.zero_grads() mlp.forward(x) mlp.backward(y) mlp.step() closeness_test(mlp.linear_layers[0].W, solW, "mlp.linear_layers[0].W") closeness_test(mlp.linear_layers[0].b, solb, "mlp.linear_layers[0].b")
import hw1 as hw import numpy as np td, tl, vd, vl, tsd, tsl = hw.load_mnist_data_file("./../data/") init = 0 no = 1 weight_init = hw.random_normal_weight_init bias_init = hw.zeros_bias_init mlp = hw.MLP(784, 10, [], [hw.Identity()], weight_init, bias_init, hw.SoftmaxCrossEntropy(), 0.008, momentum=0.0, num_bn_layers=0) z = mlp.forward(td[init:init + no]) labels = np.array([np.zeros(10) for l in tl]) for i in range(tl.shape[0]): labels[i][tl[i]] = 1 #print("Actual:{} Prediction:{} Loss:{}".format(tl[init:init+no],z.argmax(1),loss)) #print("Predict_Accuracy:{} Loss_zero:{}".format(tl[init:init+no]==z.argmax(1),loss==0)) #print("Loss_accuracy:{}".format((tl[init:init+no]==z.argmax(1))==(loss==0))) mlp1 = hw.MLP(784, 10, [32], [hw.Sigmoid(), hw.Identity()], weight_init, bias_init,