def test_gradient(self, x, t, w_before_update, derivative_tolerance): """A debugging method that performs gradient testing. It will do nothing if self.test_gradient_flag is off """ if not self.test_gradient_flag: return total_dimension = self.get_weights_dimension() whole_gradient = s.zeros(total_dimension) # fill the gradient start_index = 0 for layer in self.layers: length = layer.get_weights_len() end_index = start_index + length dE_dw = s.asarray(layer.dE_dw).reshape(-1) dE_db = layer.dE_db whole_gradient[start_index:end_index] = s.concatenate((dE_dw, dE_db)) start_index += length # test the gradient: first with 30 random 'directions' nb_random_directions = 30 too_large_difference = False for k in xrange(nb_random_directions): direction = func.get_random_direction(total_dimension) derivative_approx = func.computeDirectionalDerivative(self, [x], [t], w_before_update, direction, derivative_tolerance) derivative_true = whole_gradient.dot(direction) if abs(derivative_true - derivative_approx) \ > 10 * derivative_tolerance: too_large_difference = True break # if some wrong gradient found, test gradient for every weight separately if too_large_difference: # ipdb.set_trace() print >> sys.stderr, "[WARNING]: Gradient of error function might be wrong", \ "\n\tTesting for all directions" direction = s.zeros(total_dimension) for i in xrange(total_dimension): direction[i] = 1.0 if i >= 0: direction[i-1] = 0.0 derivative_approx = func.computeDirectionalDerivative(self, [x], [t], w_before_update, direction, derivative_tolerance) derivative_true = whole_gradient.dot(direction) difference = abs(derivative_true - derivative_approx) if difference > 10 * derivative_tolerance: print >> sys.stderr, "gradient error\tweight index: %d;\tdifference: %.16f" \ % (i, difference)
def test_random_direction_norm(self): for d in xrange(1, 1001, 10): dir = func.get_random_direction(d) self.assertAlmostEqual(np.linalg.norm(dir), 1.)