def test_lr_grad(): def sigmoid(x): return 0.5 * (np.tanh(x / 2) + 1) def predict(weights, inputs): return sigmoid(np.dot(inputs, weights)) def training_loss(inputs): preds = predict(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) l = -np.sum(np.log(label_probabilities)) return l def training_accuracy(weights, inputs): preds = predict(weights, inputs) error = np.count_nonzero( np.argmax(preds, axis=1) - np.argmax(targets, axis=1)) return (256 - error) * 100 / 256.0 wshape = (500, 250) weights = random.rand(*wshape) - 0.5 xshape = (256, 500) tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 gradient_checker.quick_grad_check(training_loss, inputs)
def test_lr_grad(): def sigmoid(x): return 0.5 * (np.tanh(x / 2) + 1) def predict(weights, inputs): return sigmoid(np.dot(inputs, weights)) def training_loss(inputs): preds = predict(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) l = -np.sum(np.log(label_probabilities)) return l def training_accuracy(weights, inputs): preds = predict(weights, inputs) error = np.count_nonzero(np.argmax(preds, axis=1) - np.argmax(targets, axis=1)) return (256 - error) * 100 / 256.0 wshape = (500, 250) weights = random.rand(*wshape) - 0.5 xshape = (256, 500) tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 gradient_checker.quick_grad_check(training_loss, inputs)
def test_affine(): x = rng.randn(20, 10) b = rng.randn(20, 1) fake_y = np.zeros([20, 5]) def check_fn(w): return layers.l2_loss(layers.affine(x, w, b), fake_y) w = rng.randn(10, 5) gradient_checker.quick_grad_check(check_fn, w, rs=rng)
def test_relu(): fake_y = np.zeros([2, 5]) def check_fn(x): return layers.l2_loss(layers.relu(x), fake_y) x = rng.randn(2, 5) gradient_checker.quick_grad_check(check_fn, x, rs=rng)
def test_softmax(): lbl = np.zeros([20]) def check_fn(x): return layers.softmax_loss(x, lbl) x = rng.randn(20, 10) gradient_checker.quick_grad_check(check_fn, x, rs=rng)
def test_relu(): fake_y = np.zeros([2, 5]) fake_y[:, 0] = 1 def check_fn(x): return layers.softmax_loss(layers.relu(x), fake_y) x = rng.randn(2, 5) gradient_checker.quick_grad_check(check_fn, x, rs=rng)
def test_affine(): x = rng.randn(20, 10) b = rng.randn(20, 1) fake_y = np.zeros([20, 5]) fake_y[:, 0] = 1 def check_fn(w): return layers.softmax_loss(layers.affine(x, w, b), fake_y) w = rng.randn(10, 5) gradient_checker.quick_grad_check(check_fn, w, rs=rng)
def test_mxnet_affine(): xshape = (10, 40) fake_y = np.zeros([10, 20]) weights = rng.randn(20, 40) - 0.5 inputs = mx.sym.Variable(name='x') fc = mx.sym.FullyConnected(name='fc', data=inputs, num_hidden=20) f = core.function(fc, [('x', xshape)]) def check_fn(x): return layers.l2_loss(f(x = x, fc_weight = weights), fake_y) x = rng.randn(*xshape) - 0.5 gradient_checker.quick_grad_check(check_fn, x, rs=rng)
def test_batchnorm(): x = rng.randn(20, 40) gamma = rng.randn(1, 40) beta = rng.randn(1, 40) fake_y = np.zeros([20, 40]) def check_gamma(g): y, _, _ = layers.batchnorm(x, g, beta) return layers.l2_loss(y, fake_y) gradient_checker.quick_grad_check(check_gamma, gamma, rs=rng) def check_beta(b): y, _, _ = layers.batchnorm(x, gamma, b) return layers.l2_loss(y, fake_y) gradient_checker.quick_grad_check(check_beta, beta, rs=rng)
def test_mxnet_affine(): xshape = (10, 40) fake_y = np.zeros([10, 20]) fake_y[:,0] = 1 x = rng.randn(*xshape) inputs = mx.sym.Variable(name='x') fc = mx.sym.FullyConnected(name='fc', data=inputs, num_hidden=20) f = core.Function(fc, {'x': xshape}) def check_fn(weights): return layers.softmax_loss(f(x=x, fc_weight=weights), fake_y) weights = rng.randn(20, 40) * 0.01 gradient_checker.quick_grad_check(check_fn, weights, rs=rng)
def test_mxnet_affine(): xshape = (10, 40) fake_y = np.zeros([10, 20]) fake_y[:, 0] = 1 x = rng.randn(*xshape) inputs = mx.sym.Variable(name='x') fc = mx.sym.FullyConnected(name='fc', data=inputs, num_hidden=20) f = core.Function(fc, {'x': xshape}) def check_fn(weights): return layers.softmax_loss(f(x=x, fc_weight=weights), fake_y) weights = rng.randn(20, 40) * 0.01 gradient_checker.quick_grad_check(check_fn, weights, rs=rng)
def test_forward_unused(): @core.convert_args def check_fn(x): y = x + 1 print(mp.exp(y)) return mp.sum(2 * y) x = rng.randn(10, 5) return gradient_checker.quick_grad_check(check_fn, x, rs=rng)
def test_mxnet_softmax(): xshape = (10, 40) fake_y = np.zeros([10,]) x = rng.randn(*xshape) net = mx.sym.Variable(name='x') net = mx.sym.FullyConnected(net, name='fc', num_hidden=20) net = mx.sym.SoftmaxOutput(net, name='softmax', normalization='batch') f = core.Function(net, {'x': xshape, 'softmax_label': fake_y.shape}) def check_fn(weights): return layers.softmax_cross_entropy( f(x=x, softmax_label=fake_y, fc_weight=weights), fake_y) weights = rng.randn(20, 40) * 0.01 gradient_checker.quick_grad_check(check_fn, weights, rs=rng)
def test_relu(): fake_y = np.zeros([2, 5]) fake_y[:,0] = 1 def check_fn(x): return layers.softmax_loss(layers.relu(x), fake_y) x = rng.randn(2, 5) return gradient_checker.quick_grad_check(check_fn, x, rs=rng)
def test_caffe_concat(): xshape_0 = (10, 40) xshape_1 = (10, 30) fake_y = np.zeros([10, 70]) fake_y[:, 0] = 1 x_1 = rng.randn(*xshape_1) - 0.5 inputs_0 = mx.sym.Variable(name="x_0") inputs_1 = mx.sym.Variable(name="x_1") concat = mx.symbol.CaffeOp(data_0=inputs_0, data_1=inputs_1, num_data=2, prototxt='layer {type:"Concat"}') f = core.function(concat, {"x_0": xshape_0, "x_1": xshape_1}) def check_fn(x_0): return layers.l2_loss(f(x_0=x_0, x_1=x_1), fake_y) x_0 = rng.randn(*xshape_0) - 0.5 gradient_checker.quick_grad_check(check_fn, x_0, rs=rng)
def test_batchnorm(): x = rng.randn(20, 40) gamma = rng.randn(1, 40) beta = rng.randn(1, 40) fake_y = np.zeros([20, 40]) fake_y[:, 0] = 1 def check_gamma(g): y, _, _ = layers.batchnorm(x, g, beta) return layers.softmax_loss(y, fake_y) gradient_checker.quick_grad_check(check_gamma, gamma, rs=rng) def check_beta(b): y, _, _ = layers.batchnorm(x, gamma, b) return layers.softmax_loss(y, fake_y) gradient_checker.quick_grad_check(check_beta, beta, rs=rng)
def test_mxnet_softmax(): xshape = (10, 40) fake_y = np.zeros([ 10, ]) x = rng.randn(*xshape) net = mx.sym.Variable(name='x') net = mx.sym.FullyConnected(net, name='fc', num_hidden=20) net = mx.sym.SoftmaxOutput(net, name='softmax', normalization='batch') f = core.Function(net, {'x': xshape, 'softmax_label': fake_y.shape}) def check_fn(weights): return layers.softmax_cross_entropy( f(x=x, softmax_label=fake_y, fc_weight=weights), fake_y) weights = rng.randn(20, 40) * 0.01 gradient_checker.quick_grad_check(check_fn, weights, rs=rng)
def test_batchnorm(): x = rng.randn(20, 40) gamma = rng.randn(1, 40) beta = rng.randn(1, 40) fake_y = np.zeros([20, 40]) fake_y[:,0] = 1 @core.convert_args def check_gamma(g): y, _, _ = layers.batchnorm(x, g, beta) return layers.softmax_loss(y, fake_y) @core.convert_args def check_beta(b): y, _, _ = layers.batchnorm(x, gamma, b) return layers.softmax_loss(y, fake_y) return (gradient_checker.quick_grad_check(check_gamma, gamma, rs=rng) and gradient_checker.quick_grad_check(check_beta, beta, rs=rng))
def test_affine(): x = rng.randn(20, 10) b = rng.randn(20, 1) fake_y = np.zeros([20, 5]) fake_y[:,0] = 1 @core.convert_args def check_fn(w): return layers.softmax_loss(layers.affine(x, w, b), fake_y) w = rng.randn(10, 5) return gradient_checker.quick_grad_check(check_fn, w, rs=rng)
def test_caffe_concat(): xshape_0 = (10, 40) xshape_1 = (10, 30) fake_y = np.zeros([10, 70]) fake_y[:, 0] = 1 x_1 = rng.randn(*xshape_1) - 0.5 inputs_0 = mx.sym.Variable(name='x_0') inputs_1 = mx.sym.Variable(name='x_1') concat = mx.symbol.CaffeOp(data_0=inputs_0, data_1=inputs_1, num_data=2, prototxt="layer {type:\"Concat\"}") f = core.function(concat, {'x_0': xshape_0, 'x_1': xshape_1}) def check_fn(x_0): return layers.l2_loss(f(x_0=x_0, x_1=x_1), fake_y) x_0 = rng.randn(*xshape_0) - 0.5 gradient_checker.quick_grad_check(check_fn, x_0, rs=rng)
def test_lr_grad(): inputs = rng.rand(32, 64) * 0.1 targets = np.zeros((32, 10)) truth = rng.randint(0, 10, 32) targets[np.arange(32), truth] = 1 def sigmoid(x): return 0.5 * (np.tanh(x / 2) + 1) def training_loss(weights): preds = sigmoid(np.dot(inputs, weights)) label_probabilities = preds * targets + (1 - preds) * (1 - targets) l = -np.sum(np.log(label_probabilities)) return l weights = rng.rand(64, 10) * 0.01 return gradient_checker.quick_grad_check(training_loss, weights, rs=rng)
def sigmoid(x): return 0.5 * (np.tanh(x / 2) + 1) def predict(weights, inputs): return sigmoid(np.dot(inputs, weights)) def training_loss(inputs): preds = predict(weights, inputs) label_probabilities = preds * targets + (1 - preds) * (1 - targets) l = -np.sum(np.log(label_probabilities)) return l def training_accuracy(weights, inputs): preds = predict(weights, inputs) error = np.count_nonzero( np.argmax(preds, axis=1) - np.argmax(targets, axis=1)) return (256 - error) * 100 / 256.0 xshape = (256, 500) tshape = (256, 250) inputs = random.rand(*xshape) - 0.5 targets = np.zeros(tshape) truth = random.randint(0, 250, 256) targets[np.arange(256), truth] = 1 gradient_checker.quick_grad_check(training_loss, inputs)
def test_basic(): @core.convert_args def check_fn(x): return mp.sum(x + x) x = rng.randn(10, 5) return gradient_checker.quick_grad_check(check_fn, x, rs=rng)