def test_backward_pass(): npr.seed(1) eps = 1e-5 N = 15 D = 10 data = 0.5*npr.rand(N,D) norm = Normalization(3) norm_inds = [1,3,5] bw = BetaWarp(2) bw_inds = [0,2] lin = Linear(3) lin_inds = [6,8,9] t = Transformer(D) # Add a layer and test the gradient t.add_layer((norm, norm_inds), (bw, bw_inds), (lin, lin_inds)) new_data = t.forward_pass(data) loss = np.sum(new_data**2) V = 2*new_data dloss = t.backward_pass(V) dloss_est = np.zeros(dloss.shape) for i in xrange(N): for j in xrange(D): data[i,j] += eps loss_1 = np.sum(t.forward_pass(data)**2) data[i,j] -= 2*eps loss_2 = np.sum(t.forward_pass(data)**2) data[i,j] += eps dloss_est[i,j] = ((loss_1 - loss_2) / (2*eps)) assert np.linalg.norm(dloss - dloss_est) < 1e-6 # Add a second layer and test the gradient t.add_layer(Linear(9)) new_data = t.forward_pass(data) loss = np.sum(new_data**2) V = 2*new_data dloss = t.backward_pass(V) dloss_est = np.zeros(dloss.shape) for i in xrange(N): for j in xrange(D): data[i,j] += eps loss_1 = np.sum(t.forward_pass(data)**2) data[i,j] -= 2*eps loss_2 = np.sum(t.forward_pass(data)**2) data[i,j] += eps dloss_est[i,j] = ((loss_1 - loss_2) / (2*eps)) assert np.linalg.norm(dloss - dloss_est) < 1e-6
def test_forward_pass(): npr.seed(1) N = 15 D = 10 data = 0.5*npr.rand(N,D) norm = Normalization(3) norm_inds = [1,3,5] bw = BetaWarp(2) bw_inds = [0,2] lin = Linear(3) lin_inds = [6,8,9] t = Transformer(D) t.add_layer((norm, norm_inds), (bw, bw_inds), (lin, lin_inds)) new_data = t.forward_pass(data) assert new_data.shape[1] == 9 assert np.all(new_data[:,7:] == data[:,[4,7]]) assert np.linalg.norm(new_data[:,0:3].sum(1) - 1) < 1e-10 bw = BetaWarp(9) t.add_layer(bw)
def test_backward_pass(): npr.seed(1) eps = 1e-5 N = 10 D = 5 lin = Linear(D) data = 0.5*npr.rand(N,D) new_data = lin.forward_pass(data) loss = np.sum(new_data**2) V = 2*new_data dloss = lin.backward_pass(V) dloss_est = np.zeros(dloss.shape) for i in xrange(N): for j in xrange(D): data[i,j] += eps loss_1 = np.sum(lin.forward_pass(data)**2) data[i,j] -= 2*eps loss_2 = np.sum(lin.forward_pass(data)**2) data[i,j] += eps dloss_est[i,j] = ((loss_1 - loss_2) / (2*eps)) assert np.linalg.norm(dloss - dloss_est) < 1e-6
def test_construction(): npr.seed(1) D = 10 norm = Normalization(3) norm_inds = [1,3,5] bw = BetaWarp(2) bw_inds = [0,2] lin = Linear(3) lin_inds = [6,8,9] t = Transformer(D) t.add_layer((norm, norm_inds), (bw, bw_inds), (lin, lin_inds))
def test_grad(): npr.seed(1) eps = 1e-5 N = 10 M = 5 D = 5 beta_warp = BetaWarp(2) norm = Normalization(2) lin = Linear(D) transformer = Transformer(D) # Each entry is a tuple, (transformation, indices_it_acts_on) transformer.add_layer( (beta_warp, [0, 2]), (norm, [1, 4])) # This is crazy. We would never do this. # One transformation means apply to all dimensions. transformer.add_layer(lin) kernel = TransformKernel(Matern52(lin.num_factors), transformer) data1 = npr.rand(N, D) data2 = npr.rand(M, D) loss = np.sum(kernel.cross_cov(data1, data2)) dloss = kernel.cross_cov_grad_data(data1, data2).sum(0) dloss_est = np.zeros(dloss.shape) for i in xrange(M): for j in xrange(D): data2[i, j] += eps loss_1 = np.sum(kernel.cross_cov(data1, data2)) data2[i, j] -= 2 * eps loss_2 = np.sum(kernel.cross_cov(data1, data2)) data2[i, j] += eps dloss_est[i, j] = ((loss_1 - loss_2) / (2 * eps)) assert np.linalg.norm(dloss - dloss_est) < 1e-6