def dfp(inv_H, delta_w, delta_grad, epsilon=1e-7): """ DFP is a method very similar to BFGS. It's rank 2 formula update. It can suffer from round-off error and inaccurate line searches. """ inv_H_dot_grad = dot(inv_H, delta_grad) x = safe_division(outer(delta_w, delta_w), dot(delta_grad, delta_w), epsilon) y = safe_division(tf.matmul(outer(inv_H_dot_grad, delta_grad), inv_H), dot(delta_grad, inv_H_dot_grad), epsilon) return inv_H - y + x
def bfgs(inv_H, delta_w, delta_grad, epsilon=1e-7): """ It can suffer from round-off error and inaccurate line searches. """ n_parameters = int(inv_H.shape[0]) I = tf.eye(n_parameters) rho = safe_reciprocal(dot(delta_grad, delta_w), epsilon) X = I - outer(delta_w, delta_grad) * rho X_T = tf.transpose(X) Z = rho * outer(delta_w, delta_w) return tf.matmul(X, tf.matmul(inv_H, X_T)) + Z
def sr1(inv_H, delta_w, delta_grad, epsilon=1e-7): """ Symmetric rank 1 (SR1). Generates update for the inverse hessian matrix adding symmetric rank-1 matrix. It's possible that there is no rank 1 updates for the matrix and in this case update won't be applied and original inverse hessian will be returned. """ param = delta_w - dot(inv_H, delta_grad) denominator = dot(param, delta_grad) return tf.where( # This check protects from the cases when update # doesn't exist. It's possible that during certain # iteration there is no rank-1 update for the matrix. tf.less(tf.abs(denominator), epsilon * tf.norm(param) * tf.norm(delta_grad)), inv_H, inv_H + outer(param, param) / denominator)