def test_batch_norm_inference(): num_examples = 1000 data = saved_data[20] assert len(data) == 15 x = data[0] y = data[1] soldbeta = data[2] soldgamma = data[3] xs = data[4] solground = data[5:] reset_prng() mlp = hw1.MLP(784, 10, [64, 32], [activation.Sigmoid(), activation.Sigmoid(), activation.Identity()], weight_init, bias_init, loss.SoftmaxCrossEntropy(), 0.008, momentum=0.0, num_bn_layers=1) batch_size = 100 mlp.train() for b in range(0, 1): mlp.zero_grads() mlp.forward(x[b:b + batch_size]) mlp.backward(y[b:b + batch_size]) mlp.step() closeness_test(mlp.bn_layers[0].dbeta, soldbeta, "mlp.bn_layers[0].dbeta") closeness_test(mlp.bn_layers[0].dgamma, soldgamma, "mlp.bn_layers[0].dgamma") for b in range(0, num_examples, batch_size): mlp.eval() student = mlp.forward(xs[b:b + batch_size]) ground = solground[b//batch_size] closeness_test(student, ground, "mlp.forward(x)")
def test_batch_norm_train(): data = saved_data[19] assert len(data) == 10 x = data[0] y = data[1] soldW = data[2:5] soldb = data[5:8] soldbeta = data[8] soldgamma = data[9] reset_prng() mlp = hw1.MLP(784, 10, [64, 32], [activation.Sigmoid(), activation.Sigmoid(), activation.Identity()], weight_init, bias_init, loss.SoftmaxCrossEntropy(), 0.008, momentum=0.0, num_bn_layers=1) mlp.forward(x) mlp.backward(y) dW = [x.dW for x in mlp.linear_layers] db = [x.db for x in mlp.linear_layers] for i, (pred, gt) in enumerate(zip(dW, soldW)): closeness_test(pred, gt, "mlp.dW[%d]" % i) for i, (pred, gt) in enumerate(zip(db, soldb)): closeness_test(pred, gt, "mlp.db[%d]" % i) closeness_test(mlp.bn_layers[0].dbeta, soldbeta, "mlp.bn_layers[0].dbeta") closeness_test(mlp.bn_layers[0].dgamma, soldgamma, "mlp.bn_layers[0].dgamma")
def test_mystery_hidden_backward3(): data = saved_data[18] assert len(data) == 6 x = data[0] y = data[1] soldW = data[2:4] soldb = data[4:] reset_prng() mlp = hw1.MLP( 784, 10, [32], [activation.Sigmoid(), activation.Identity()], weight_init, bias_init, loss.SoftmaxCrossEntropy(), 0.008, momentum=0.0, num_bn_layers=0) mlp.forward(x) mlp.backward(y) dW = [x.dW for x in mlp.linear_layers] db = [x.db for x in mlp.linear_layers] for i, (pred, gt) in enumerate(zip(dW, soldW)): closeness_test(pred, gt, "mlp.linear_layers[%d].dW" % i) for i, (pred, gt) in enumerate(zip(db, soldb)): closeness_test(pred, gt, "mlp.linear_layers[%d].db" % i)
def test_softmax_cross_entropy_derivative(): data = saved_data[1] x = data[0] y = data[1] sol = data[2] ce = loss.SoftmaxCrossEntropy() ce(x, y) closeness_test(ce.derivative(), sol, "ce.derivative()")
def test_softmax_cross_entropy_forward(): data = saved_data[0] x = data[0] y = data[1] sol = data[2] ce = loss.SoftmaxCrossEntropy() closeness_test(ce(x, y), sol, "ce(x, y)")
def test_linear_classifier_forward(): data = saved_data[2] x = data[0] gt = data[1] reset_prng() mlp = hw1.MLP(784, 10, [], [activation.Identity()], weight_init, bias_init, loss.SoftmaxCrossEntropy(), 0.008, momentum=0.0, num_bn_layers=0) pred = mlp.forward(x) closeness_test(pred, gt, "mlp.forward(x)")
def test_mystery_hidden_forward3(): data = saved_data[15] x = data[0] gt = data[1] reset_prng() mlp = hw1.MLP(784, 10, [32], [activation.Sigmoid(), activation.Identity()], weight_init, bias_init, loss.SoftmaxCrossEntropy(), 0.008, momentum=0.0, num_bn_layers=0) pred = mlp.forward(x) closeness_test(pred, gt, "mlp.forward(x)")
def test_linear_classifier_backward(): data = saved_data[3] x = data[0] y = data[1] soldW = data[2] soldb = data[3] reset_prng() mlp = hw1.MLP(784, 10, [], [activation.Identity()], weight_init, bias_init, loss.SoftmaxCrossEntropy(), 0.008, momentum=0.0, num_bn_layers=0) mlp.forward(x) mlp.backward(y) closeness_test(mlp.linear_layers[0].dW, soldW, "mlp.linear_layers[0].dW") closeness_test(mlp.linear_layers[0].db, soldb, "mlp.linear_layers[0].db")
def test_linear_classifier_step(): data = saved_data[4] x = data[0] y = data[1] solW = data[2] solb = data[3] reset_prng() mlp = hw1.MLP(784, 10, [], [activation.Identity()], weight_init, bias_init, loss.SoftmaxCrossEntropy(), 0.008, momentum=0.0, num_bn_layers=0) num_test_updates = 5 for u in range(num_test_updates): mlp.zero_grads() mlp.forward(x) mlp.backward(y) mlp.step() closeness_test(mlp.linear_layers[0].W, solW, "mlp.linear_layers[0].W") closeness_test(mlp.linear_layers[0].b, solb, "mlp.linear_layers[0].b")
def test_momentum(): data = saved_data[21] assert len(data) == 8 x = data[0] y = data[1] solW = data[2:5] solb = data[5:] reset_prng() mlp = hw1.MLP( 784, 10, [64, 32], [activation.Sigmoid(), activation.Sigmoid(), activation.Identity()], weight_init, bias_init, loss.SoftmaxCrossEntropy(), 0.008, momentum=0.856, num_bn_layers=0) num_test_updates = 5 for u in range(num_test_updates): mlp.zero_grads() mlp.forward(x) mlp.backward(y) mlp.step() mlp.eval() W = [x.W for x in mlp.linear_layers] b = [x.b for x in mlp.linear_layers] for i, (pred, gt) in enumerate(zip(W, solW)): closeness_test(pred, gt, "mlp.linear_layers[%d].W" % i) for i, (pred, gt) in enumerate(zip(b, solb)): closeness_test(pred, gt, "mlp.linear_layers[%d].b" % i)