Esempio n. 1
0
def dfp(inv_H, delta_w, delta_grad, epsilon=1e-7):
    """
    DFP is a method very similar to BFGS. It's rank 2 formula update.
    It can suffer from round-off error and inaccurate line searches.
    """
    inv_H_dot_grad = dot(inv_H, delta_grad)

    x = safe_division(outer(delta_w, delta_w), dot(delta_grad, delta_w),
                      epsilon)
    y = safe_division(tf.matmul(outer(inv_H_dot_grad, delta_grad), inv_H),
                      dot(delta_grad, inv_H_dot_grad), epsilon)

    return inv_H - y + x
Esempio n. 2
0
def bfgs(inv_H, delta_w, delta_grad, epsilon=1e-7):
    """
    It can suffer from round-off error and inaccurate line searches.
    """
    n_parameters = int(inv_H.shape[0])

    I = tf.eye(n_parameters)
    rho = safe_reciprocal(dot(delta_grad, delta_w), epsilon)

    X = I - outer(delta_w, delta_grad) * rho
    X_T = tf.transpose(X)
    Z = rho * outer(delta_w, delta_w)

    return tf.matmul(X, tf.matmul(inv_H, X_T)) + Z
Esempio n. 3
0
def sr1(inv_H, delta_w, delta_grad, epsilon=1e-7):
    """
    Symmetric rank 1 (SR1). Generates update for the inverse hessian
    matrix adding symmetric rank-1 matrix. It's possible that there is no
    rank 1 updates for the matrix and in this case update won't be applied
    and original inverse hessian will be returned.
    """
    param = delta_w - dot(inv_H, delta_grad)
    denominator = dot(param, delta_grad)

    return tf.where(
        # This check protects from the cases when update
        # doesn't exist. It's possible that during certain
        # iteration there is no rank-1 update for the matrix.
        tf.less(tf.abs(denominator),
                epsilon * tf.norm(param) * tf.norm(delta_grad)),
        inv_H,
        inv_H + outer(param, param) / denominator)