def init_param_updates(self, layer, parameter): epoch = self.variables.epoch prev_first_moment = parameter.prev_first_moment prev_second_moment = parameter.prev_second_moment # step = asfloat(self.variables.step) step = 0.001 beta1 = asfloat(self.beta1) beta2 = asfloat(self.beta2) epsilon = asfloat(self.epsilon) gradient = T.grad(self.variables.error_func, wrt=parameter) n_parameters = count_parameters(self) self.variables.hessian = theano.shared(value=asfloat( np.zeros((n_parameters, n_parameters))), name='hessian_inverse') parameters = list(iter_parameters(self)) hessian_matrix, full_gradient = find_hessian_and_gradient( self.variables.error_func, parameters) first_moment = (beta1 * prev_first_moment + asfloat(1. - beta1) * gradient) second_moment = (beta2 * prev_second_moment + asfloat(1. - beta2) * gradient**2) first_moment_bias_corrected = first_moment / (1. - beta1**epoch) second_moment_bias_corrected = second_moment / (1. - beta2**epoch) parameter_delta = first_moment_bias_corrected * ( T.sqrt(second_moment_bias_corrected) + epsilon) return [(prev_first_moment, first_moment), (prev_second_moment, second_moment), (parameter, parameter - step * parameter_delta), (self.variables.hessian, hessian_matrix)]
def init_train_updates(self): step = self.variables.step previous_delta = self.variables.prev_delta previous_gradient = self.variables.prev_gradient n_parameters = count_parameters(self) parameters = list(iter_parameters(self)) param_vector = parameters2vector(self) gradients = T.grad(self.variables.error_func, wrt=parameters) full_gradient = T.concatenate([grad.flatten() for grad in gradients]) beta = self.update_function(previous_gradient, full_gradient, previous_delta) parameter_delta = ifelse( T.eq(T.mod(self.variables.epoch, n_parameters), 1), -full_gradient, -full_gradient + beta * previous_delta) updated_parameters = param_vector + step * parameter_delta updates = [ (previous_gradient, full_gradient), (previous_delta, parameter_delta), ] parameter_updates = setup_parameter_updates(parameters, updated_parameters) updates.extend(parameter_updates) return updates
def init_param_updates(self, layer, parameter): epoch = self.variables.epoch prev_first_moment = parameter.prev_first_moment prev_weighted_inf_norm = parameter.prev_weighted_inf_norm step = self.variables.step beta1 = self.beta1 beta2 = self.beta2 n_parameters = count_parameters(self) self.variables.hessian = theano.shared( value=asfloat(np.zeros((n_parameters, n_parameters))), name='hessian_inverse') parameters = list(iter_parameters(self)) hessian_matrix, full_gradient = find_hessian_and_gradient( self.variables.error_func, parameters ) gradient = T.grad(self.variables.error_func, wrt=parameter) first_moment = beta1 * prev_first_moment + (1 - beta1) * gradient weighted_inf_norm = T.maximum(beta2 * prev_weighted_inf_norm, T.abs_(gradient)) parameter_delta = ( (1 / (1 - beta1 ** epoch)) * (first_moment / (weighted_inf_norm + self.epsilon)) ) return [ (prev_first_moment, first_moment), (prev_weighted_inf_norm, weighted_inf_norm), (parameter, parameter - step * parameter_delta),(self.variables.hessian, hessian_matrix) ]
def init_train_updates(self): step = self.variables.step previous_delta = self.variables.prev_delta previous_gradient = self.variables.prev_gradient n_parameters = count_parameters(self) parameters = list(iter_parameters(self)) param_vector = parameters2vector(self) gradients = T.grad(self.variables.error_func, wrt=parameters) full_gradient = T.concatenate([grad.flatten() for grad in gradients]) beta = self.update_function(previous_gradient, full_gradient, previous_delta) parameter_delta = ifelse( T.eq(T.mod(self.variables.epoch, n_parameters), 1), -full_gradient, -full_gradient + beta * previous_delta ) updated_parameters = param_vector + step * parameter_delta updates = [ (previous_gradient, full_gradient), (previous_delta, parameter_delta), ] parameter_updates = setup_parameter_updates(parameters, updated_parameters) updates.extend(parameter_updates) return updates
def init_variables(self): super(LeakStepAdaptation, self).init_variables() n_parameters = count_parameters(self) self.variables.leak_average = theano.shared( value=asfloat(np.zeros(n_parameters)), name='leak_average' )
def init_variables(self): super(ConjugateGradient, self).init_variables() n_parameters = count_parameters(self) self.variables.update(prev_delta=theano.shared( name="prev_delta", value=asfloat(np.zeros(n_parameters)), ), prev_gradient=theano.shared( name="prev_gradient", value=asfloat(np.zeros(n_parameters)), ))
def init_variables(self): super(ConjugateGradient, self).init_variables() n_parameters = count_parameters(self) self.variables.update( prev_delta=theano.shared( name="prev_delta", value=asfloat(np.zeros(n_parameters)), ), prev_gradient=theano.shared( name="prev_gradient", value=asfloat(np.zeros(n_parameters)), ) )
def init_train_updates(self): n_parameters = count_parameters(self) parameters = list(iter_parameters(self)) param_vector = parameters2vector(self) penalty_const = asfloat(self.penalty_const) hessian_matrix, full_gradient = find_hessian_and_gradient( self.variables.error_func, parameters) hessian_inverse = T.nlinalg.matrix_inverse(hessian_matrix + penalty_const * T.eye(n_parameters)) updated_parameters = param_vector - hessian_inverse.dot(full_gradient) updates = setup_parameter_updates(parameters, updated_parameters) return updates
def init_train_updates(self): n_parameters = count_parameters(self) parameters = list(iter_parameters(self)) param_vector = parameters2vector(self) penalty_const = asfloat(self.penalty_const) hessian_matrix, full_gradient = find_hessian_and_gradient( self.variables.error_func, parameters ) hessian_inverse = T.nlinalg.matrix_inverse( hessian_matrix + penalty_const * T.eye(n_parameters) ) updated_parameters = param_vector - hessian_inverse.dot(full_gradient) updates = setup_parameter_updates(parameters, updated_parameters) return updates
def init_train_updates(self): network_output = self.variables.network_output prediction_func = self.variables.train_prediction_func last_error = self.variables.last_error error_func = self.variables.error_func mu = self.variables.mu new_mu = ifelse( T.lt(last_error, error_func), mu * self.mu_update_factor, mu / self.mu_update_factor, ) mse_for_each_sample = T.mean((network_output - prediction_func)**2, axis=1) params = list(iter_parameters(self)) param_vector = parameters2vector(self) ####################################################################################### n_parameters = count_parameters(self) self.variables.hessian = theano.shared(value=asfloat( np.zeros((n_parameters, n_parameters))), name='hessian_inverse') parameters = list(iter_parameters(self)) hessian_matrix, full_gradient = find_hessian_and_gradient( self.variables.error_func, parameters) ####################################################################################### J = compute_jaccobian(mse_for_each_sample, params) n_params = J.shape[1] updated_params = param_vector - T.nlinalg.matrix_inverse( J.T.dot(J) + new_mu * T.eye(n_params)).dot( J.T).dot(mse_for_each_sample) updates = [(mu, new_mu), [(self.variables.hessian, hessian_matrix)]] parameter_updates = setup_parameter_updates(params, updated_params) updates.extend(parameter_updates) return updates
def init_param_updates(self, layer, parameter): n_parameters = count_parameters(self) self.variables.hessian = theano.shared(value=asfloat( np.zeros((n_parameters, n_parameters))), name='hessian_inverse') parameters = list(iter_parameters(self)) hessian_matrix, full_gradient = find_hessian_and_gradient( self.variables.error_func, parameters) prev_mean_squred_grad = parameter.prev_mean_squred_grad step = self.variables.step gradient = T.grad(self.variables.error_func, wrt=parameter) mean_squred_grad = (self.decay * prev_mean_squred_grad + (1 - self.decay) * gradient**2) parameter_delta = gradient / T.sqrt(mean_squred_grad + self.epsilon) return [ (prev_mean_squred_grad, mean_squred_grad), (parameter, parameter - step * parameter_delta), ]
def init_variables(self): super(LeakStepAdaptation, self).init_variables() n_parameters = count_parameters(self) self.variables.leak_average = theano.shared(value=asfloat( np.zeros(n_parameters)), name='leak_average')
def init_variables(self): super(Hessian, self).init_variables() n_parameters = count_parameters(self) self.variables.hessian = theano.shared(value=asfloat( np.zeros((n_parameters, n_parameters))), name='hessian_inverse')