コード例 #1
0
ファイル: hessdiag.py プロジェクト: degerli/neupy
    def init_train_updates(self):
        step = self.variables.step
        inv_min_eigval = 1 / self.min_eigval
        parameters = parameter_values(self.connection)
        param_vector = make_single_vector(parameters)

        gradients = tf.gradients(self.variables.error_func, parameters)
        full_gradient = make_single_vector(gradients)

        second_derivatives = []
        for parameter, gradient in zip(parameters, gradients):
            second_derivative, = tf.gradients(gradient, parameter)
            second_derivatives.append(flatten(second_derivative))

        hessian_diag = tf.concat(second_derivatives, axis=0)

        # it's easier to clip inverse hessian rather than the hessian,.
        inv_hessian_diag = tf.clip_by_value(
            # inverse for diagonal matrix easy to compute with
            # elementwise inverse operation.
            1 / hessian_diag,
            -inv_min_eigval,
            inv_min_eigval,
        )
        updates = setup_parameter_updates(
            parameters, param_vector - step * full_gradient * inv_hessian_diag)
        return updates
コード例 #2
0
ファイル: lev_marq.py プロジェクト: degerli/neupy
    def init_train_updates(self):
        network_output = self.variables.network_output
        prediction_func = self.variables.train_prediction_func
        last_error = self.variables.last_error
        error_func = self.variables.error_func
        mu = self.variables.mu

        new_mu = tf.where(
            tf.less(last_error, error_func),
            mu * self.mu_update_factor,
            mu / self.mu_update_factor,
        )

        err_for_each_sample = flatten((network_output - prediction_func) ** 2)

        params = parameter_values(self.connection)
        param_vector = make_single_vector(params)

        J = compute_jacobian(err_for_each_sample, params)
        J_T = tf.transpose(J)
        n_params = J.shape[1]

        parameter_update = tf.matrix_solve(
            tf.matmul(J_T, J) + new_mu * tf.eye(n_params.value),
            tf.matmul(J_T, tf.expand_dims(err_for_each_sample, 1))
        )
        updated_params = param_vector - flatten(parameter_update)

        updates = [(mu, new_mu)]
        parameter_updates = setup_parameter_updates(params, updated_params)
        updates.extend(parameter_updates)

        return updates
コード例 #3
0
ファイル: hessian.py プロジェクト: degerli/neupy
def find_hessian_and_gradient(error_function, parameters):
    """
    Compute jacobian.

    Parameters
    ----------
    values : Tensorfow variable
        Computed MSE for each sample separetly.

    parameters : list of Tensorfow variable
        Neural network parameters (e.g. weights, biases).

    Returns
    -------
    Tensorfow variable
    """
    gradients = tf.gradients(error_function, parameters)
    full_gradient = make_single_vector(gradients)

    full_gradient_shape = tf.shape(full_gradient)
    n_samples = full_gradient_shape[0]

    def compute_gradient_per_value(index, result):
        gradients = tf.gradients(full_gradient[index], parameters)
        hessian = make_single_vector(gradients)
        return (index + 1, result.write(index, hessian))

    _, hessian = tf.while_loop(lambda index, _: index < n_samples,
                               compute_gradient_per_value, [
                                   tf.constant(0, tf.int32),
                                   tf.TensorArray(tf.float32, size=n_samples),
                               ])

    return hessian.stack(), full_gradient
コード例 #4
0
ファイル: quasi_newton.py プロジェクト: degerli/neupy
    def init_train_updates(self):
        inv_hessian = self.variables.inv_hessian
        prev_params = self.variables.prev_params
        prev_full_gradient = self.variables.prev_full_gradient

        params = parameter_values(self.connection)
        param_vector = make_single_vector(params)

        gradients = tf.gradients(self.variables.error_func, params)
        full_gradient = make_single_vector(gradients)

        new_inv_hessian = tf.where(
            tf.equal(self.variables.epoch, 1), inv_hessian,
            self.update_function(inv_H=inv_hessian,
                                 delta_w=param_vector - prev_params,
                                 delta_grad=full_gradient - prev_full_gradient,
                                 epsilon=self.epsilon))
        param_delta = -dot(new_inv_hessian, full_gradient)
        step = self.find_optimal_step(param_vector, param_delta)
        updated_params = param_vector + step * param_delta
        updates = setup_parameter_updates(params, updated_params)

        # We have to compute these values first, otherwise
        # parallelization in tensorflow can mix update order
        # and, for example, previous gradient can be equal to
        # current gradient value. It happens because tensorflow
        # try to execute operations in parallel.
        required_variables = [new_inv_hessian, param_vector, full_gradient]
        with tf.control_dependencies(required_variables):
            updates.extend([
                inv_hessian.assign(new_inv_hessian),
                prev_params.assign(param_vector),
                prev_full_gradient.assign(full_gradient),
            ])

        return updates
コード例 #5
0
ファイル: conjgrad.py プロジェクト: degerli/neupy
    def init_train_updates(self):
        step = self.variables.step
        epoch = self.variables.epoch
        previous_delta = self.variables.prev_delta
        previous_gradient = self.variables.prev_gradient

        n_parameters = count_parameters(self.connection)
        parameters = parameter_values(self.connection)
        param_vector = make_single_vector(parameters)

        gradients = tf.gradients(self.variables.error_func, parameters)
        full_gradient = make_single_vector(gradients)

        beta = self.update_function(previous_gradient, full_gradient,
                                    previous_delta, self.epsilon)

        parameter_delta = tf.where(tf.equal(tf.mod(epoch, n_parameters),
                                            1), -full_gradient,
                                   -full_gradient + beta * previous_delta)

        step = self.find_optimal_step(param_vector, parameter_delta)
        updated_parameters = param_vector + step * parameter_delta
        updates = setup_parameter_updates(parameters, updated_parameters)

        # We have to compute these values first, otherwise
        # parallelization in tensorflow can mix update order
        # and, for example, previous gradient can be equal to
        # current gradient value. It happens because tensorflow
        # try to execute operations in parallel.
        with tf.control_dependencies([full_gradient, parameter_delta]):
            updates.extend([
                previous_gradient.assign(full_gradient),
                previous_delta.assign(parameter_delta),
            ])

        return updates
コード例 #6
0
ファイル: hessian.py プロジェクト: degerli/neupy
    def init_train_updates(self):
        penalty_const = asfloat(self.penalty_const)

        n_parameters = count_parameters(self.connection)
        parameters = parameter_values(self.connection)
        param_vector = make_single_vector(parameters)

        hessian_matrix, full_gradient = find_hessian_and_gradient(
            self.variables.error_func, parameters)
        parameter_update = tf.matrix_solve(
            hessian_matrix + penalty_const * tf.eye(n_parameters),
            tf.reshape(full_gradient, [-1, 1]))
        updated_parameters = param_vector - flatten(parameter_update)
        updates = setup_parameter_updates(parameters, updated_parameters)

        return updates
コード例 #7
0
ファイル: lev_marq.py プロジェクト: degerli/neupy
 def compute_gradient_per_value(index, result):
     gradients = tf.gradients(values[index], parameters)
     full_gradient = make_single_vector(gradients)
     return (index + 1, result.write(index, full_gradient))
コード例 #8
0
ファイル: hessian.py プロジェクト: degerli/neupy
 def compute_gradient_per_value(index, result):
     gradients = tf.gradients(full_gradient[index], parameters)
     hessian = make_single_vector(gradients)
     return (index + 1, result.write(index, hessian))