def gradient_descent(func, num, step_size=0.1, tol=10e-8, max_iter=10000): ''' INPUTS: func: callable (function) the function we would like to perform gradient descent on num: np.ndarray of AutoDiff objects vector of inputs a list of AutoDiff objects step_size: float, optional (default = 0.1) step size of algorithm tol: float, optional (default = 1e-10) convergence critera. if max_iter: int, optional (default = 10000) number of iterations allowed before no convergence is declared OUTPUTS: min: np.ndarray of AutoDiff objects the minimum to which gradient descent converged converged: boolean True if the algorithm comverge, else False iterations: int The number of iterations performed, whether or not the algorithm converged NOTE: This algorithm terminates when the gradient is zero, whether or not that point is a minimum or maximum. The algorithm is fairly robust and should converge eventually, depending on step size. Default parameters are set so that most functions used for input should converge. ''' # Set up values n_func = len(num) # Number of functions in the vector default_der = np.copy([num[i].der for i in range(n_func)]) # Used for casting back to AD last = np.copy(func(num).val) # Last value, used to check convergence for i in range(steps): # Evaluate function to get derivatives f_val = func(num) # Update values by stepping along derivative for j in range(n_func): num[j] -= ad(step_size*f_val.der[j]) # Recast values as autodiff objects for j in range(n_func): num[j] = ad(num[j].val, default_der[j]) # Check if converged if(np.linalg.norm(num - last) < tol): return [num, True, i] last = np.copy(num) return [x, False, i]
def test_plot(): x = ad(1, [1., 0.]) y = ad(1, [0., 1.]) func = lambda x: x[0]**2 + x[1]**2 output = opt.gradient_descent(func, [x, y], return_trace=True) plt.plot_2dtrajectory(func, output[3], output[1]) assert 'trace.png' in os.listdir("./") os.remove("./trace.png")
def newton(func, num, tol=1e-10): ''' This function runs Newton's method of root finding. INPUTS: num: an autodiff number object func: the function we are trying to find the root of tol: Convergence tolerance OUTPUT: The value of the root. NOTE: This function is currently only implemented for the 1D case. When a function has no root, or a root that cannot be reached, the function returns "NO ROOT FOUND" When the derivative is 0 the functions raises a floating point error with the message "ZERO DERIVATIVE" When a the starting point is the root, this function returns the root immediately. ''' last = np.array([-999] * len(num)) root = [n.val for n in num] default_der = np.copy([num[i].der for i in range(len(num))]) # Started at root case n_val = np.copy([n.val for n in num]) f_val = np.copy([f.val for f in func(num)]) if (np.linalg.norm(f_val) < tol): return num # Root finding algorithm. Terminates after 200 steps with error message iterations = 0 while (np.linalg.norm(f_val) > 1e-10): last = np.copy([n.val for n in num]) num = func(num) # Catch zero derivatives if (np.linalg.norm([n.der for n in num]) == 0): raise FloatingPointError("ZERO DERIVATIVE") root -= np.dot(inv_jacobian(num), num) num = np.copy([ad(v.val, default_der[j]) for j, v in enumerate(root)]) n_val = np.copy([n.val for n in num]) f_val = np.copy([f.val for f in func(num)]) iterations += 1 if (iterations == 2000): return num, False, iterations return num, True, iterations
def fixed_point(func, num, tol=1e-10, max_iter=10000): ''' This function runs fixed point iteration for root finding. INPUTS: num: an autodiff number object func: the function we are trying to find the root of tol: Convergence tolerance OUTPUT: The value of the root. NOTE: This function is currently only implemented for the 1D case. When a function has no root, or a root that cannot be reached, the function returns "NO ROOT FOUND" When the derivative is 0 the functions raises a floating point error with the message "ZERO DERIVATIVE" When a the starting point is the root, this function returns the root immediately. ''' #last = np.array([-999]*len(num)) root = [n.val for n in num] default_der = np.copy([num[i].der for i in range(len(num))]) # Started at root case n_val = np.copy([n.val for n in num]) f_val = np.copy([f.val for f in func(num)]) if (np.linalg.norm(f_val) < tol): return num # Comparison used to determine convergence last = np.ones(len(num)) * 999 # Root finding algorithm. Terminates after 200 steps with error message iterations = 0 while (np.linalg.norm(last - num) > 1e-30): last = np.copy(num) num = func(num) print(num) num = np.copy([ad(n.val, default_der[j]) for j, n in enumerate(num)]) iterations += 1 if (iterations == max_iter): return num, False, iterations return num, True, iterations
def test_newton(): x = ad(1, [1., 0.]) y = ad(1, [0., 1.]) fn = lambda x: [x[0], x[1]] output = rf.newton(fn, [x,y], tol=1e-10) o_vals = [o.val for o in output[0]] assert np.linalg.norm(o_vals) < 10e-10 x = ad(2, [1., 0.]) y = ad(3, [0., 1.]) fn = lambda x: [x[0]**2 + x[1]**2] output = rf.newton(fn, [x,y], tol=1e-10) o_vals = [o.val for o in output[0]] assert np.linalg.norm(o_vals) < 10e-6 x = ad(0.8, [1.]) fn = lambda x: [x[0].cos()] output = rf.newton(fn, [x], tol=1e-10) assert np.isclose(output[0][0].val, np.pi/2) x = ad(1, [1.]) func = lambda x: x[0] try: _ = rf.newton(func, [x]) except TypeError: assert True x = ad(0.8, [1.]) fn = lambda x: [x[0].cos()] output = rf.newton(fn, [x], tol=1e-10, return_trace=True) assert np.isclose(output[0][0].val, np.pi/2) assert output[3][0] == x assert output[3][-1] == output[0]
def test_conjugate_gradient(): # Checks easy function x = ad(1., [1., 0.,]) y = ad(1., [0., 1.,]) fn = lambda x: x[0]**2 + x[1]**2 output = opt.conjugate_gradient(fn, [x, y], tol=1e-12) assert np.linalg.norm(output[0]) < 10e-12 # Checks complicated function x = ad(1., [1., 0.,]) y = ad(1., [0., 1.,]) fn = lambda x: x[0].cos()**2 + x[1].sin()**2 output = opt.conjugate_gradient(fn, [x, y], tol=1e-10) assert np.linalg.norm(output[0] - np.array([np.pi/2, 0])) < 10e-10 # Checks trace x = ad(1., [1., 0.,]) y = ad(1., [0., 1.,]) fn = lambda x: x[0]**2 + x[1]**2 output = opt.conjugate_gradient(fn, [x, y], tol=1e-10, return_trace=True) assert np.linalg.norm(output[0]) < 10e-10 assert all(output[3][0] == [x,y]) assert np.linalg.norm(output[3][-1]) < 10e-10
while (np.linalg.norm(last - num) > tol): last = np.copy(num) # Update current value num += step_size * s # Update gradient new_g = f(num).der # Udate intermediate values of algorithm beta = np.outer(new_g, new_g) / np.dot(g, g) s = -g + np.dot(beta, s) g = np.copy(new_g) iterations += 1 # Break out of loop if we have reached maximum iterations if (iterations > max_iter): return num, False, iterations return num, True, iterations if __name__ == '__main__': x = ad(1, [1., 0.]) y = ad(1, [0., 1.]) fn = lambda x: (x[0] - 3)**2 + (x[1] + 1)**2 print(conjugate_gradient(fn, [x, y]))
iterations: NOTES: pass ''' if (init_jacobian == 1): init_jacobian = np.zeros((len(num), len(num[0].der))) smaller = min(init_jacobian.shape) init_jacobian[:smaller, :smaller] += np.eye(smaller) print(init_jacobian) if __name__ == '__main__': x = ad(1, [ 1., 0., 0., ]) y = ad(1, [ 0., 1., 0., ]) z = ad(1, [ 0., 0., 1., ]) fn = lambda x: [x[0] + 2 * x[1] - 1, x[2]]
def gradient_descent(func, num, step_size=0.1, tol=10e-8, max_iter=10000, return_trace=False): ''' INPUTS: func: callable (function) the function we would like to perform gradient descent on num: np.ndarray of AutoDiff objects vector of inputs a list of AutoDiff objects step_size: float, optional (default = 0.1) step size of algorithm tol: float, optional (default = 1e-10) convergence critera. if max_iter: int, optional (default = 10000) number of iterations allowed before no convergence is declared return_trace: boolean, optional (default = False) Returns the trace of points if True. Useful for plotting OUTPUTS: min: np.ndarray of AutoDiff objects the minimum to which gradient descent converged converged: boolean True if the algorithm comverge, else False iterations: int The number of iterations performed, whether or not the algorithm converged NOTE: This algorithm terminates when the gradient is zero, whether or not that point is a minimum or maximum. The algorithm is fairly robust and should converge eventually, depending on step size. Default parameters are set so that most functions used for input should converge. ''' # Set up values n_func = len(num) # Number of functions in the vector default_der = np.copy([num[i].der for i in range(n_func) ]) # Used for casting back to AD last = np.ones(len(num)) * 999 iterations = 0 if (return_trace): trace = [np.copy(num)] if (all(func(num).der == np.zeros(len(func(num).der)))): # Zero derivative return num, False, 0 while (np.linalg.norm(num - last) > tol): last = np.copy([n.val for n in num]) # Evaluate function to get derivatives f_val = func(num) # Update values by stepping along derivative for j in range(n_func): num[j] -= ad(step_size * f_val.der[j]) # Recast values as autodiff objects for j in range(n_func): num[j] = ad(num[j].val, default_der[j]) if (return_trace): trace.append(np.copy(num)) iterations += 1 if (iterations == max_iter): if (return_trace): return num, False, iterations, np.array(trace) else: return num, False, iterations if (return_trace): return num, True, iterations, np.array(trace) else: return num, True, iterations
def newton(func, num, tol=1e-10, max_iter=10000, return_trace=False): ''' This function runs Newton's method of root finding. INPUTS: num: np.ndarray of AutoDiff objects an autodiff number object func: callable, input is array of AutoDiff objects the function we are trying to find the root of tol: float, optional (default = 1e-10) Convergence tolerance max_iter: int, optional (default = 1e-10) Maximum number of iterations before no convergence is declared return_trace: boolean, optional (default = False) Returns trace of minimization procedure if True OUTPUT: root: AutoDiff object The value of the root and derivative at the root. NOTE: This function is currently only implemented for the 1D case. When a function has no root, or a root that cannot be reached, the function returns "NO ROOT FOUND" When the derivative is 0 the functions raises a floating point error with the message "ZERO DERIVATIVE" When a the starting point is the root, this function returns the root immediately. All of the same failure modes apply here as in the mathematical forumlation. First, you must start in a good starting position, otherwise the algorithm may not converge. Also, the function must approach the root smooth enough to converge. Function output must be a list or array, even for scalar valued functions. For example: >>> lambda x: x[0] # This throws an error because the output is a scalar >>> lambda x: [x[0]] # This works because the output is a list ''' last = np.array([-999] * len(num)) root = [n.val for n in num] default_der = np.copy([num[i].der for i in range(len(num))]) if (return_trace): trace = [np.copy([n for n in num])] # Started at root case if (isinstance(func(num), ad)): err_str = "Function output must be list, even for scalar functions." err_str += "\nTry returning your function output as a list: return [output]." raise TypeError(err_str) f_val = np.copy([f.val for f in func(num)]) if (np.linalg.norm(f_val) < tol): print("WHAT") return num, False, 0, np.array(trace) # Root finding algorithm. Terminates after 200 steps with error message iterations = 0 while (np.linalg.norm(f_val) > 1e-10): last = np.copy([n.val for n in num]) num = func(num) # Catch zero derivatives if (len(num) == 1 and np.linalg.norm([n.der for n in num]) == 0): raise FloatingPointError("ZERO DERIVATIVE") root -= np.dot(_inv_jacobian(num), num) num = np.copy([ad(v.val, default_der[j]) for j, v in enumerate(root)]) f_val = np.copy([f.val for f in func(num)]) if (return_trace): trace.append(np.copy([n for n in num])) iterations += 1 if (iterations == max_iter): if (return_trace): return num, False, iterations, np.array(trace) else: return num, False, iterations if (return_trace): return num, True, iterations, np.array(trace) else: return num, True, iterations
n_val = np.copy([n.val for n in num]) f_val = np.copy([f.val for f in func(num)]) if (np.linalg.norm(f_val) < tol): return num # Comparison used to determine convergence last = np.ones(len(num)) * 999 # Root finding algorithm. Terminates after 200 steps with error message iterations = 0 while (np.linalg.norm(last - num) > 1e-30): last = np.copy(num) num = func(num) print(num) num = np.copy([ad(n.val, default_der[j]) for j, n in enumerate(num)]) iterations += 1 if (iterations == max_iter): return num, False, iterations return num, True, iterations if __name__ == '__main__': x = ad(0.3, [1., 0., 0.]) y = ad(0.9, [0., 1., 0.]) #z = ad(0.9, [0., 0., 1.]) fn = lambda x: [(x[0] - 0.2)**2] #, (x[1])**2]#, (x[2]+0.1)**2] print(fixed_point(fn, [x]))