Esempio n. 1
0
    def init_train_updates(self):
        network_output = self.variables.network_output
        prediction_func = self.variables.train_prediction_func
        last_error = self.variables.last_error
        error_func = self.variables.error_func
        mu = self.variables.mu

        new_mu = tf.where(
            tf.less(last_error, error_func),
            mu * self.mu_update_factor,
            mu / self.mu_update_factor,
        )

        err_for_each_sample = flatten((network_output - prediction_func) ** 2)

        params = parameter_values(self.connection)
        param_vector = make_single_vector(params)

        J = compute_jacobian(err_for_each_sample, params)
        J_T = tf.transpose(J)
        n_params = J.shape[1]

        parameter_update = tf.matrix_solve(
            tf.matmul(J_T, J) + new_mu * tf.eye(n_params.value),
            tf.matmul(J_T, tf.expand_dims(err_for_each_sample, 1))
        )
        updated_params = param_vector - flatten(parameter_update)

        updates = [(mu, new_mu)]
        parameter_updates = setup_parameter_updates(params, updated_params)
        updates.extend(parameter_updates)

        return updates
Esempio n. 2
0
    def init_train_updates(self):
        training_outputs = self.network.training_outputs
        last_error = self.variables.last_error
        error_func = self.variables.loss
        mu = self.variables.mu

        new_mu = tf.where(
            tf.less(last_error, error_func),
            mu * self.mu_update_factor,
            mu / self.mu_update_factor,
        )

        err_for_each_sample = flatten((self.target - training_outputs)**2)

        variables = self.network.variables
        params = [var for var in variables.values() if var.trainable]
        param_vector = make_single_vector(params)

        J = compute_jacobian(err_for_each_sample, params)
        J_T = tf.transpose(J)
        n_params = J.shape[1]

        parameter_update = tf.matrix_solve(
            tf.matmul(J_T, J) + new_mu * tf.eye(n_params.value),
            tf.matmul(J_T, tf.expand_dims(err_for_each_sample, 1)))
        updated_params = param_vector - flatten(parameter_update)

        updates = [(mu, new_mu)]
        parameter_updates = setup_parameter_updates(params, updated_params)
        updates.extend(parameter_updates)

        return updates
Esempio n. 3
0
    def init_train_updates(self):
        updates = super(LeakStepAdaptation, self).init_train_updates()

        alpha = asfloat(self.alpha)
        beta = asfloat(self.beta)
        leak_size = asfloat(self.leak_size)

        step = self.variables.step
        leak_average = self.variables.leak_average

        parameters = parameter_values(self.connection)
        gradients = tf.gradients(self.variables.error_func, parameters)
        full_gradient = tf.concat([flatten(grad) for grad in gradients],
                                  axis=0)

        leak_avarage_update = ((1 - leak_size) * leak_average +
                               leak_size * full_gradient)
        new_step = step + alpha * step * (beta * tf.norm(leak_avarage_update) -
                                          step)

        updates.extend([
            (leak_average, leak_avarage_update),
            (step, new_step),
        ])

        return updates
Esempio n. 4
0
    def init_train_updates(self):
        step = self.step
        inv_min_eigval = 1 / self.min_eigval
        variables = self.network.variables
        parameters = [var for var in variables.values() if var.trainable]
        param_vector = make_single_vector(parameters)

        gradients = tf.gradients(self.variables.loss, parameters)
        full_gradient = make_single_vector(gradients)

        second_derivatives = []
        for parameter, gradient in zip(parameters, gradients):
            second_derivative, = tf.gradients(gradient, parameter)
            second_derivatives.append(flatten(second_derivative))

        hessian_diag = tf.concat(second_derivatives, axis=0)

        # it's easier to clip inverse hessian rather than the hessian,.
        inv_hessian_diag = tf.clip_by_value(
            # inverse for diagonal matrix easy to compute with
            # elementwise inverse operation.
            1 / hessian_diag,
            -inv_min_eigval,
            inv_min_eigval,
        )
        updates = setup_parameter_updates(
            parameters, param_vector - step * full_gradient * inv_hessian_diag)
        return updates
Esempio n. 5
0
def loss_function(expected, predicted):
    epsilon = 1e-7  # for 32-bit float

    predicted = tf.clip_by_value(predicted, epsilon, 1.0 - epsilon)
    expected = tf.cast(flatten(expected), tf.int32)

    log_predicted = tf.log(predicted)
    indeces = tf.stack([tf.range(tf.size(expected)), expected])
    indeces = tf.transpose(indeces, [1, 0])
    errors = tf.gather_nd(log_predicted, indeces)
    return -tf.reduce_mean(errors)
Esempio n. 6
0
    def output(self, Q, input_state_1, input_state_2):
        with tf.name_scope("Q-output"):
            # Number of samples depend on the state's batch size.
            # Each iteration we can try to predict direction from
            # multiple different starting points at the same time.
            input_shape = tf.shape(input_state_1)
            n_states = input_shape[1]
            Q_shape = tf.shape(Q)

            indeces = tf.stack([
                # Numer of repetitions depends on the size of
                # the state batch
                tf_repeat(tf.range(Q_shape[0]), n_states),

                # Each state is a coordinate (x and y)
                # that point to some place on a grid.
                tf.cast(flatten(input_state_1), tf.int32),
                tf.cast(flatten(input_state_2), tf.int32),
            ])
            indeces = tf.transpose(indeces, [1, 0])

            # Output is a matrix that has n_samples * n_states rows
            # and n_filters (which is Q.shape[1]) columns.
            return tf.gather_nd(Q, indeces)
Esempio n. 7
0
    def init_train_updates(self):
        penalty_const = asfloat(self.penalty_const)

        n_parameters = count_parameters(self.connection)
        parameters = parameter_values(self.connection)
        param_vector = make_single_vector(parameters)

        hessian_matrix, full_gradient = find_hessian_and_gradient(
            self.variables.error_func, parameters)
        parameter_update = tf.matrix_solve(
            hessian_matrix + penalty_const * tf.eye(n_parameters),
            tf.reshape(full_gradient, [-1, 1]))
        updated_parameters = param_vector - flatten(parameter_update)
        updates = setup_parameter_updates(parameters, updated_parameters)

        return updates
Esempio n. 8
0
    def init_train_updates(self):
        penalty_const = asfloat(self.penalty_const)

        n_parameters = self.network.n_parameters
        variables = self.network.variables
        parameters = [var for var in variables.values() if var.trainable]
        param_vector = make_single_vector(parameters)

        hessian_matrix, full_gradient = find_hessian_and_gradient(
            self.variables.loss, parameters)
        parameter_update = tf.matrix_solve(
            hessian_matrix + penalty_const * tf.eye(n_parameters),
            tf.reshape(full_gradient, [-1, 1]))
        updated_parameters = param_vector - flatten(parameter_update)
        updates = setup_parameter_updates(parameters, updated_parameters)

        return updates
Esempio n. 9
0
def make_single_vector(parameters):
    with tf.name_scope('make-single-vector'):
        return tf.concat([flatten(param) for param in parameters], axis=0)