def test_gradient(self): h = 0.01 slstm = SLSTM( num_rows=4, num_cols=3, batch_size=10, num_channels=3) # generate data inputs = randn(slstm.batch_size, slstm.num_rows, slstm.num_cols, slstm.num_channels) # assume we want to minimize squared hidden unit activity grad = slstm.backward(slstm.forward(inputs)) params = deepcopy(slstm.parameters()) params_copy = deepcopy(params) # numerical gradient grad_n = {} for key in grad: grad_n[key] = empty_like(grad[key]) for i in range(grad_n[key].size): # increase parameter value params_copy[key].ravel()[i] = params[key].ravel()[i] + h slstm.set_parameters(params_copy) loss_p = sum(square(slstm.forward(inputs))) / 2. # decrease parameter value params_copy[key].ravel()[i] = params[key].ravel()[i] - h slstm.set_parameters(params_copy) loss_n = sum(square(slstm.forward(inputs))) / 2. # reset parameter value params_copy[key].ravel()[i] = params[key].ravel()[i] # estimate gradient grad_n[key].ravel()[i] = (loss_p - loss_n) / (2. * h) self.assertLess(max(abs(grad_n[key] - grad[key])), 1e-3)
def test_gradient(self): h = 0.01 slstm = SLSTM(num_rows=4, num_cols=3, batch_size=10, num_channels=3) # generate data inputs = randn(slstm.batch_size, slstm.num_rows, slstm.num_cols, slstm.num_channels) # assume we want to minimize squared hidden unit activity grad = slstm.backward(slstm.forward(inputs)) params = deepcopy(slstm.parameters()) params_copy = deepcopy(params) # numerical gradient grad_n = {} for key in grad: grad_n[key] = empty_like(grad[key]) for i in range(grad_n[key].size): # increase parameter value params_copy[key].ravel()[i] = params[key].ravel()[i] + h slstm.set_parameters(params_copy) loss_p = sum(square(slstm.forward(inputs))) / 2. # decrease parameter value params_copy[key].ravel()[i] = params[key].ravel()[i] - h slstm.set_parameters(params_copy) loss_n = sum(square(slstm.forward(inputs))) / 2. # reset parameter value params_copy[key].ravel()[i] = params[key].ravel()[i] # estimate gradient grad_n[key].ravel()[i] = (loss_p - loss_n) / (2. * h) self.assertLess(max(abs(grad_n[key] - grad[key])), 1e-3)