def init_train_updates(self): network_output = self.variables.network_output prediction_func = self.variables.train_prediction_func last_error = self.variables.last_error error_func = self.variables.error_func mu = self.variables.mu new_mu = tf.where( tf.less(last_error, error_func), mu * self.mu_update_factor, mu / self.mu_update_factor, ) err_for_each_sample = flatten((network_output - prediction_func) ** 2) params = parameter_values(self.connection) param_vector = make_single_vector(params) J = compute_jacobian(err_for_each_sample, params) J_T = tf.transpose(J) n_params = J.shape[1] parameter_update = tf.matrix_solve( tf.matmul(J_T, J) + new_mu * tf.eye(n_params.value), tf.matmul(J_T, tf.expand_dims(err_for_each_sample, 1)) ) updated_params = param_vector - flatten(parameter_update) updates = [(mu, new_mu)] parameter_updates = setup_parameter_updates(params, updated_params) updates.extend(parameter_updates) return updates
def init_train_updates(self): training_outputs = self.network.training_outputs last_error = self.variables.last_error error_func = self.variables.loss mu = self.variables.mu new_mu = tf.where( tf.less(last_error, error_func), mu * self.mu_update_factor, mu / self.mu_update_factor, ) err_for_each_sample = flatten((self.target - training_outputs)**2) variables = self.network.variables params = [var for var in variables.values() if var.trainable] param_vector = make_single_vector(params) J = compute_jacobian(err_for_each_sample, params) J_T = tf.transpose(J) n_params = J.shape[1] parameter_update = tf.matrix_solve( tf.matmul(J_T, J) + new_mu * tf.eye(n_params.value), tf.matmul(J_T, tf.expand_dims(err_for_each_sample, 1))) updated_params = param_vector - flatten(parameter_update) updates = [(mu, new_mu)] parameter_updates = setup_parameter_updates(params, updated_params) updates.extend(parameter_updates) return updates
def init_train_updates(self): updates = super(LeakStepAdaptation, self).init_train_updates() alpha = asfloat(self.alpha) beta = asfloat(self.beta) leak_size = asfloat(self.leak_size) step = self.variables.step leak_average = self.variables.leak_average parameters = parameter_values(self.connection) gradients = tf.gradients(self.variables.error_func, parameters) full_gradient = tf.concat([flatten(grad) for grad in gradients], axis=0) leak_avarage_update = ((1 - leak_size) * leak_average + leak_size * full_gradient) new_step = step + alpha * step * (beta * tf.norm(leak_avarage_update) - step) updates.extend([ (leak_average, leak_avarage_update), (step, new_step), ]) return updates
def init_train_updates(self): step = self.step inv_min_eigval = 1 / self.min_eigval variables = self.network.variables parameters = [var for var in variables.values() if var.trainable] param_vector = make_single_vector(parameters) gradients = tf.gradients(self.variables.loss, parameters) full_gradient = make_single_vector(gradients) second_derivatives = [] for parameter, gradient in zip(parameters, gradients): second_derivative, = tf.gradients(gradient, parameter) second_derivatives.append(flatten(second_derivative)) hessian_diag = tf.concat(second_derivatives, axis=0) # it's easier to clip inverse hessian rather than the hessian,. inv_hessian_diag = tf.clip_by_value( # inverse for diagonal matrix easy to compute with # elementwise inverse operation. 1 / hessian_diag, -inv_min_eigval, inv_min_eigval, ) updates = setup_parameter_updates( parameters, param_vector - step * full_gradient * inv_hessian_diag) return updates
def loss_function(expected, predicted): epsilon = 1e-7 # for 32-bit float predicted = tf.clip_by_value(predicted, epsilon, 1.0 - epsilon) expected = tf.cast(flatten(expected), tf.int32) log_predicted = tf.log(predicted) indeces = tf.stack([tf.range(tf.size(expected)), expected]) indeces = tf.transpose(indeces, [1, 0]) errors = tf.gather_nd(log_predicted, indeces) return -tf.reduce_mean(errors)
def output(self, Q, input_state_1, input_state_2): with tf.name_scope("Q-output"): # Number of samples depend on the state's batch size. # Each iteration we can try to predict direction from # multiple different starting points at the same time. input_shape = tf.shape(input_state_1) n_states = input_shape[1] Q_shape = tf.shape(Q) indeces = tf.stack([ # Numer of repetitions depends on the size of # the state batch tf_repeat(tf.range(Q_shape[0]), n_states), # Each state is a coordinate (x and y) # that point to some place on a grid. tf.cast(flatten(input_state_1), tf.int32), tf.cast(flatten(input_state_2), tf.int32), ]) indeces = tf.transpose(indeces, [1, 0]) # Output is a matrix that has n_samples * n_states rows # and n_filters (which is Q.shape[1]) columns. return tf.gather_nd(Q, indeces)
def init_train_updates(self): penalty_const = asfloat(self.penalty_const) n_parameters = count_parameters(self.connection) parameters = parameter_values(self.connection) param_vector = make_single_vector(parameters) hessian_matrix, full_gradient = find_hessian_and_gradient( self.variables.error_func, parameters) parameter_update = tf.matrix_solve( hessian_matrix + penalty_const * tf.eye(n_parameters), tf.reshape(full_gradient, [-1, 1])) updated_parameters = param_vector - flatten(parameter_update) updates = setup_parameter_updates(parameters, updated_parameters) return updates
def init_train_updates(self): penalty_const = asfloat(self.penalty_const) n_parameters = self.network.n_parameters variables = self.network.variables parameters = [var for var in variables.values() if var.trainable] param_vector = make_single_vector(parameters) hessian_matrix, full_gradient = find_hessian_and_gradient( self.variables.loss, parameters) parameter_update = tf.matrix_solve( hessian_matrix + penalty_const * tf.eye(n_parameters), tf.reshape(full_gradient, [-1, 1])) updated_parameters = param_vector - flatten(parameter_update) updates = setup_parameter_updates(parameters, updated_parameters) return updates
def make_single_vector(parameters): with tf.name_scope('make-single-vector'): return tf.concat([flatten(param) for param in parameters], axis=0)