def test_line_search_exceptions(self): testcases = [ # Invalid c1 values dict(c1=-1, c2=0.5, maxiter=1), dict(c1=0, c2=0.5, maxiter=1), dict(c1=1, c2=0.5, maxiter=1), # Invalid c2 values dict(c2=-1, c1=0.5, maxiter=1), dict(c2=0, c1=0.5, maxiter=1), dict(c2=1, c1=0.5, maxiter=1), # c1 > c2 dict(c1=0.5, c2=0.1, maxiter=1), # Invalid `maxiter` values dict(c1=0.05, c2=0.1, maxiter=-10), dict(c1=0.05, c2=0.1, maxiter=0), ] for testcase in testcases: error_desc = "Line search for {}".format(testcase) with self.assertRaises(ValueError, msg=error_desc): func = lambda x: x wolfe.line_search(f=func, f_deriv=func, **testcase)
def test_line_search_exceptions(self): testcases = [ # Invalid c1 values dict(c1=-1, c2=0.5, maxiter=1), dict(c1=0, c2=0.5, maxiter=1), dict(c1=1, c2=0.5, maxiter=1), # Invalid c2 values dict(c2=-1, c1=0.5, maxiter=1), dict(c2=0, c1=0.5, maxiter=1), dict(c2=1, c1=0.5, maxiter=1), # c1 > c2 dict(c1=0.5, c2=0.1, maxiter=1), # Invalid `maxiter` values dict(c1=0.05, c2=0.1, maxiter=-10), dict(c1=0.05, c2=0.1, maxiter=0), ] def func(x): return x for testcase in testcases: error_desc = "Line search for {}".format(testcase) with self.assertRaises(ValueError, msg=error_desc): wolfe.line_search(f=func, f_deriv=func, **testcase)
def init_train_updates(self): network_inputs = self.variables.network_inputs network_output = self.variables.network_output inv_hessian = self.variables.inv_hessian prev_params = self.variables.prev_params prev_full_gradient = self.variables.prev_full_gradient params = parameter_values(self.connection) param_vector = T.concatenate([param.flatten() for param in params]) gradients = T.grad(self.variables.error_func, wrt=params) full_gradient = T.concatenate([grad.flatten() for grad in gradients]) new_inv_hessian = ifelse( T.eq(self.variables.epoch, 1), inv_hessian, self.update_function(inv_hessian, param_vector - prev_params, full_gradient - prev_full_gradient)) param_delta = -new_inv_hessian.dot(full_gradient) layers_and_parameters = list(iter_parameters(self.layers)) def prediction(step): updated_params = param_vector + step * param_delta # This trick allow us to replace shared variables # with theano variables and get output from the network start_pos = 0 for layer, attrname, param in layers_and_parameters: end_pos = start_pos + param.size updated_param_value = T.reshape( updated_params[start_pos:end_pos], param.shape) setattr(layer, attrname, updated_param_value) start_pos = end_pos output = self.connection.output(*network_inputs) # Restore previous parameters for layer, attrname, param in layers_and_parameters: setattr(layer, attrname, param) return output def phi(step): return self.error(network_output, prediction(step)) def derphi(step): error_func = self.error(network_output, prediction(step)) return T.grad(error_func, wrt=step) step = asfloat(line_search(phi, derphi)) updated_params = param_vector + step * param_delta updates = setup_parameter_updates(params, updated_params) updates.extend([ (inv_hessian, new_inv_hessian), (prev_params, param_vector), (prev_full_gradient, full_gradient), ]) return updates
def init_train_updates(self): network_input = self.variables.network_input network_output = self.variables.network_output inv_hessian = self.variables.inv_hessian prev_params = self.variables.prev_params prev_full_gradient = self.variables.prev_full_gradient params = list(iter_parameters(self)) param_vector = parameters2vector(self) gradients = T.grad(self.variables.error_func, wrt=params) full_gradient = T.concatenate([grad.flatten() for grad in gradients]) new_inv_hessian = ifelse( T.eq(self.variables.epoch, 1), inv_hessian, self.update_function(inv_hessian, param_vector - prev_params, full_gradient - prev_full_gradient) ) param_delta = -new_inv_hessian.dot(full_gradient) def prediction(step): # TODO: I need to update this ugly solution later updated_params = param_vector + step * param_delta layer_input = network_input start_pos = 0 for layer in self.layers: for param in layer.parameters: end_pos = start_pos + param.size parameter_name, parameter_id = param.name.split('_') setattr(layer, parameter_name, T.reshape( updated_params[start_pos:end_pos], param.shape )) start_pos = end_pos layer_input = layer.output(layer_input) return layer_input def phi(step): return self.error(network_output, prediction(step)) def derphi(step): error_func = self.error(network_output, prediction(step)) return T.grad(error_func, wrt=step) step = asfloat(line_search(phi, derphi)) updated_params = param_vector + step * param_delta updates = setup_parameter_updates(params, updated_params) updates.extend([ (inv_hessian, new_inv_hessian), (prev_params, param_vector), (prev_full_gradient, full_gradient), ]) return updates
def test_wolfe_linear_search(self): x_current = 3 grad = 2 * (x_current - 5.5) def square(step): x_new = x_current - step * grad return (x_new - 5.5)**2 def square_deriv(step): # Derivative with respect to the step return -grad * 2 * ((x_current - step * grad) - 5.5) x_star = wolfe.line_search(square, square_deriv) x_star = self.eval(x_star) self.assertEqual(square(0), 6.25) self.assertAlmostEqual(square(x_star), 0, places=2) self.assertAlmostEqual(x_star, 0.5, places=2)
def find_optimal_step(self, parameter_vector, parameter_update): network_inputs = self.variables.network_inputs network_output = self.variables.network_output layers_and_parameters = list(iter_parameters(self.layers)) def prediction(step): step = asfloat(step) updated_params = parameter_vector + step * parameter_update # This trick allow us to replace shared variables # with tensorflow variables and get output from the network start_pos = 0 for layer, attrname, param in layers_and_parameters: end_pos = start_pos + get_variable_size(param) updated_param_value = tf.reshape( updated_params[start_pos:end_pos], param.shape) setattr(layer, attrname, updated_param_value) start_pos = end_pos output = self.connection.output(*network_inputs) # Restore previous parameters for layer, attrname, param in layers_and_parameters: setattr(layer, attrname, param) return output def phi(step): return self.error(network_output, prediction(step)) def derphi(step): error_func = self.error(network_output, prediction(step)) gradient, = tf.gradients(error_func, step) return gradient return line_search(phi, derphi, self.wolfe_maxiter, self.wolfe_c1, self.wolfe_c2)
def init_train_updates(self): network_input = self.variables.network_input network_output = self.variables.network_output inv_hessian = self.variables.inv_hessian prev_params = self.variables.prev_params prev_full_gradient = self.variables.prev_full_gradient params = parameter_values(self.connection) param_vector = parameters2vector(self) gradients = T.grad(self.variables.error_func, wrt=params) full_gradient = T.concatenate([grad.flatten() for grad in gradients]) new_inv_hessian = ifelse( T.eq(self.variables.epoch, 1), inv_hessian, self.update_function(inv_hessian, param_vector - prev_params, full_gradient - prev_full_gradient) ) param_delta = -new_inv_hessian.dot(full_gradient) layers_and_parameters = list(iter_parameters(self.layers)) def prediction(step): updated_params = param_vector + step * param_delta # This trick allow us to replace shared variables # with theano variables and get output from the network start_pos = 0 for layer, attrname, param in layers_and_parameters: end_pos = start_pos + param.size updated_param_value = T.reshape( updated_params[start_pos:end_pos], param.shape ) setattr(layer, attrname, updated_param_value) start_pos = end_pos output = self.connection.output(network_input) # We need to replace back parameter to shared variable for layer, attrname, param in layers_and_parameters: setattr(layer, attrname, param) return output def phi(step): return self.error(network_output, prediction(step)) def derphi(step): error_func = self.error(network_output, prediction(step)) return T.grad(error_func, wrt=step) step = asfloat(line_search(phi, derphi)) updated_params = param_vector + step * param_delta updates = setup_parameter_updates(params, updated_params) updates.extend([ (inv_hessian, new_inv_hessian), (prev_params, param_vector), (prev_full_gradient, full_gradient), ]) return updates