def test_Linear(): T = 5 batch_size = 2 douput = 3 dinput = 4 unit = Linear(dinput, douput) W = unit.get_weights() X = np.random.randn(T, dinput, batch_size) acc_Y = unit.forward(X) wrand = np.random.randn(*acc_Y.shape) loss = np.sum(acc_Y * wrand) dY = wrand dX = unit.backward(dY) dW = unit.get_grads() def fwd(): unit.set_weights(W) h = unit.forward(X) return np.sum(h * wrand) delta = 1e-4 error_threshold = 1e-3 all_values = [X, W] backpropagated_gradients = [dX, dW] names = ['X', 'W'] error_count = 0 for v in range(len(names)): values = all_values[v] dvalues = backpropagated_gradients[v] name = names[v] for i in range(values.size): actual = values.flat[i] values.flat[i] = actual + delta loss_minus = fwd() values.flat[i] = actual - delta loss_plus = fwd() values.flat[i] = actual backpropagated_gradient = dvalues.flat[i] numerical_gradient = (loss_minus - loss_plus) / (2 * delta) if numerical_gradient == 0 and backpropagated_gradient == 0: error = 0 elif abs(numerical_gradient) < 1e-7 and abs(backpropagated_gradient) < 1e-7: error = 0 else: error = abs(backpropagated_gradient - numerical_gradient) / abs(numerical_gradient + backpropagated_gradient) if error > error_threshold: print 'FAILURE!!!\n' print '\tparameter: ', name, '\tindex: ', np.unravel_index(i, values.shape) print '\tvalues: ', actual print '\tbackpropagated_gradient: ', backpropagated_gradient print '\tnumerical_gradient', numerical_gradient print '\terror: ', error print '\n\n' error_count += 1 if error_count == 0: print 'Linear Gradient Check Passed' else: print 'Failed for {} parameters'.format(error_count)
class LogisticRegression: def __init__(self, n_samples, batch_size, n_bits, fwd_scale_factor, bck_scale_factor, loss_scale_factor, in_features, out_features, lr): self.lin_layer = Linear(n_samples=n_samples, batch_size=batch_size, n_bits=n_bits, fwd_scale_factor=fwd_scale_factor, bck_scale_factor=bck_scale_factor, in_features=in_features, out_features=out_features) self.loss_layer = CrossEntropy(n_samples, out_features, batch_size, n_bits, loss_scale_factor) self.lr = lr self.fwd_scale_factor = fwd_scale_factor self.bck_scale_factor = bck_scale_factor self.loss_scale_factor = loss_scale_factor def predict(self, x): fwd = self.lin_layer.forward(x, train=False) return fwd.argmax(axis=1) def recenter(self): self.lin_layer.recenter() ######################### Baseline Methods ################################# def forward(self, x, y): fwd = self.lin_layer.forward(x) return self.loss_layer.forward(fwd, y) def backward(self): self.lin_layer.backward(self.loss_layer.backward()) def step(self): self.lin_layer.step(self.lr) ############################################################################ ######################### LP Baseline Methods ############################## def forward_lp(self, x, y): fwd = self.lin_layer.forward_lp(x) return self.loss_layer.forward_interp(fwd, y) def backward_lp(self): self.lin_layer.backward_lp(self.loss_layer.backward_lp()) def step_lp(self): self.lin_layer.step_lp(self.lr) ############################################################################ ########################### Outer Methods ################################## def forward_store(self, x, y, batch_index): fwd = self.lin_layer.forward_store(x, batch_index) return self.loss_layer.forward_store(fwd, y, batch_index) def backward_store(self, batch_index): self.lin_layer.backward_store(self.loss_layer.backward(), batch_index) ############################################################################ ########################### Inner Methods ################################## def forward_inner(self, x, y, batch_index): fwd = self.lin_layer.forward_inner(SplitTensor(x), batch_index) return self.loss_layer.forward_interp(fwd, y) def backward_inner(self, batch_index): loss_bck = self.loss_layer.backward_inner(batch_index) self.lin_layer.backward_inner(loss_bck, batch_index) def step_inner(self): self.lin_layer.step_inner(self.lr) def predict_inner(self, x): fwd = np.dot(x, self.lin_layer.weight.data().T) return fwd.argmax(axis=1) ############################################################################ def step_svrg(self, w_tilde_grad, g_tilde): self.lin_layer.step_svrg(w_tilde_grad, g_tilde, self.lr) def step_svrg_inner(self, g_tilde, batch_index): self.lin_layer.step_svrg_inner(g_tilde, self.lr, batch_index) def step_svrg_lp(self, w_tilde_grad, g_tilde): self.lin_layer.step_svrg_lp(g_tilde, w_tilde_grad, self.lr)