def test_mul():
    v1 = ad.create_vector('v', [1, 2])
    v2 = ad.create_vector('w', [3, 5])
    v3 = v1 * v2 
    assert(v3[0].getValue() == 3)
    assert(v3[1].getValue() == 10)
    jacobian = ad.get_jacobian(v3, ['v1', 'v2', 'w1', 'w2'])
    assert(np.array_equal(jacobian, np.array([[3, 0, 1, 0], [0, 5, 0, 2]])))    

    x = ad.Scalar('x', 1)
    y = ad.Scalar('y', 2)
    v = ad.Scalar('v', 3)
    v1 = np.array([x, y])
    v2 = np.array([v, 3 * v])
    v3 = v1 * v2
    assert(v3[0].getValue() == 3)
    assert(v3[1].getValue() == 18)  
    jacobian = ad.get_jacobian(v3, ['x', 'y', 'v'])
    assert(np.array_equal(jacobian, np.array([[3, 0, 1], [0, 9, 6]])))    


    v1 = ad.create_vector('v', [2, 3])
    v3 = v1 * v1
    assert(v3[0].getValue() == 4)
    assert(v3[1].getValue() == 9)
    jacobian = ad.get_jacobian(v3, ['v1', 'v2'])
    assert(np.array_equal(jacobian, np.array([[4, 0], [0, 6]])))    


    v1 = ad.create_vector('v', [1, 2])
    v2 = v1 * 10
    assert(v2[0].getValue() == 10)
    assert(v2[1].getValue() == 20)
    jacobian = ad.get_jacobian(v2, ['v1', 'v2'])
    assert(np.array_equal(jacobian, np.array([[10, 0], [0, 10]])))    


    x = ad.Scalar('x', 5)
    y = ad.Scalar('y', 2)
    v1 = np.array([x, y])
    v2 = np.array([x * y, (x + y)])
    v3 = v1 * v2
    assert(v3[0].getValue() == 50)
    assert(v3[1].getValue() == 14)   
    jacobian = ad.get_jacobian(v3, ['x', 'y'])
    assert(np.array_equal(jacobian, np.array([[20, 25], [2, 9]])))    

    x = ad.Scalar('x', 1)
    y = ad.Scalar('y', 2)
    v1 = np.array([x, y])
    v2 = np.array([y, 10])
    v3 = v1 * v2
    assert(v3[0].getValue() == 2)
    assert(v3[1].getValue() == 20)  
    jacobian = ad.get_jacobian(v3, ['x', 'y'])
    assert(np.array_equal(jacobian, np.array([[2, 1], [0, 10]])))    
def test_add():
    v1 = ad.create_vector('v', [1, 2])
    v2 = ad.create_vector('v', [1, 5])
    v3 = v1 + v2 
    assert(v3[0].getValue() == 2)
    assert(v3[1].getValue() == 7)
    jacobian = ad.get_jacobian(v3, ['v1', 'v2'])
    assert(np.array_equal(jacobian, np.array([[2, 0], [0, 2]])))

    v1 = ad.create_vector('v', [1, 2])
    v2 = v1 + 10
    assert(v2[0].getValue() == 11)
    assert(v2[1].getValue() == 12)
    jacobian = ad.get_jacobian(v2, ['v1', 'v2'])
    assert(np.array_equal(jacobian, np.array([[1, 0], [0, 1]])))

    v1 = ad.create_vector('v', [1, 2])
    v2 = ad.Scalar('v2', 4)
    v3 = ad.Scalar('v1', 7)
    v4 = v1 + np.array([v2, v3])
    assert(v4[0].getValue() == 5)
    assert(v4[1].getValue() == 9)
    jacobian = ad.get_jacobian(v4, ['v1', 'v2'])
    assert(np.array_equal(jacobian, np.array([[1, 1], [1, 1]])))    

    x = ad.Scalar('x', 1)
    y = ad.Scalar('y', 2)
    v1 = np.array([x, y])
    v2 = ad.create_vector('v', [1, 5])
    v3 = v1 + v2
    assert(v3[0].getValue() == 2)
    assert(v3[1].getValue() == 7)   
    jacobian = ad.get_jacobian(v3, ['x', 'y'])
    assert(np.array_equal(jacobian, np.array([[1, 0], [0, 1]])))    

    x = ad.Scalar('x', 1)
    y = ad.Scalar('y', 2)
    v1 = np.array([x, y])
    v2 = np.array([x + y, x])
    v3 = v1 + v2
    assert(v3[0].getValue() == 4)
    assert(v3[1].getValue() == 3)   
    jacobian = ad.get_jacobian(v3, ['x', 'y'])
    assert(np.array_equal(jacobian, np.array([[2, 1], [1, 1]])))    

    x = ad.Scalar('x', 1)
    y = ad.Scalar('y', 2)
    v1 = np.array([x, y])
    v2 = np.array([y, 10])
    v3 = v1 + v2
    assert(v3[0].getValue() == 3)
    assert(v3[1].getValue() == 12)  
    jacobian = ad.get_jacobian(v3, ['x', 'y'])
    assert(np.array_equal(jacobian, np.array([[1, 1], [0, 1]])))    
def test_create_vector():
    v = ad.create_vector('v', [1, 2])
    assert(v[0].getValue() == 1)
    assert(v[1].getValue() == 2)
    derivs = ad.get_deriv(v) 
    assert(np.array_equal(np.array([deriv.get('v1', 0) for deriv in derivs]), np.array([1, 0])))
    assert(np.array_equal(np.array([deriv.get('v2', 0) for deriv in derivs]), np.array([0, 1])))
    jacobian = ad.get_jacobian(v, ['v1', 'v2'])
    assert(np.array_equal(jacobian, np.array([[1, 0], [0, 1]])))
    jacobian = ad.get_jacobian(v, ['v1', 'v2', 'hello'])
    assert(np.array_equal(jacobian, np.array([[1, 0, 0], [0, 1, 0]])))

    v = ad.create_vector('v', [1, 2], [3, 4])
    assert(v[0].getValue() == 1)
    assert(v[1].getValue() == 2)
    derivs = ad.get_deriv(v) 
    assert(np.array_equal(np.array([deriv.get('v1', 0) for deriv in derivs]), np.array([3, 0])))
    assert(np.array_equal(np.array([deriv.get('v2', 0) for deriv in derivs]), np.array([0, 4])))
    jacobian = ad.get_jacobian(v, ['v1', 'v2'])
    assert(np.array_equal(jacobian, np.array([[3, 0], [0, 4]])))
    jacobian = ad.get_jacobian(v, ['v1', 'v2', 'hello'])
    assert(np.array_equal(jacobian, np.array([[3, 0, 0], [0, 4, 0]])))

    with pytest.raises(Exception):    
        v = ad.create_vector('v', [1, 2], [3, 4, 5])
    
    x = ad.Scalar('x', 1)
    y = ad.Scalar('y', 2)
    v = np.array([x, y])
    assert(np.array_equal(ad.get_value(v), np.array([1, 2])))
    jacobian = ad.get_jacobian(v, ['v1', 'v2'])
    assert(np.array_equal(jacobian, np.array([[0, 0], [0, 0]])))
    jacobian = ad.get_jacobian(v, ['x', 'y'])
    assert(np.array_equal(jacobian, np.array([[1, 0], [0, 1]])))

    x = ad.Scalar('x', 1)
    y = ad.Scalar('y', 2)
    v = np.array([x, 2 * y])
    assert(np.array_equal(ad.get_value(v), np.array([1, 4])))
    jacobian = ad.get_jacobian(v, ['x', 'y'])
    assert(np.array_equal(jacobian, np.array([[1, 0], [0, 2]])))
    jacobian = ad.get_jacobian(v, ['y', 'x'])
    assert(np.array_equal(jacobian, np.array([[0, 1], [2, 0]])))
    
    x = ad.Scalar('x', 1)
    y = ad.Scalar('y', 2)
    v = np.array([x + y, 2 * y])
    assert(np.array_equal(ad.get_value(v), np.array([3, 4])))
    jacobian = ad.get_jacobian(v, ['x', 'y'])
    assert(np.array_equal(jacobian, np.array([[1, 1], [0, 2]])))
    jacobian = ad.get_jacobian(v, ['y', 'x'])
    assert(np.array_equal(jacobian, np.array([[1, 1], [2, 0]])))
def test_tan():
    v1 = ad.create_vector('v', [0, 100])
    v2 = ad.tan(v1)
    assert(v2[0].getValue() == 0)
    assert(np.isclose(v2[1].getValue(), np.tan(100)))
    jacobian = ad.get_jacobian(v2, ['v1', 'v2'])
    assert(np.isclose(jacobian, np.array([[1, 0], [0, 1 / (np.cos(100) ** 2)]])).all())
def test_sin():
    v1 = ad.create_vector('v', [0, 100])
    v2 = ad.sin(v1)
    assert(v2[0].getValue() == 0)
    assert(np.isclose(v2[1].getValue(), np.sin(100)))
    jacobian = ad.get_jacobian(v2, ['v1', 'v2'])
    assert(np.array_equal(jacobian, np.array([[1, 0], [0, np.cos(100)]])))

    v1 = ad.Scalar('x', 4)
    v2 = ad.Scalar('y', 10)
    v3 = ad.sin(np.array([v1, v2])) / ad.sin(np.array([v1, v2]))
    assert(np.isclose(v3[0].getValue(), 1))
    assert(np.isclose(v3[1].getValue(), 1))
    jacobian = ad.get_jacobian(v3, ['x', 'y'])
    assert(np.isclose(jacobian, np.array([[0, 0], [0, 0]])).all())

    v1 = ad.Scalar('x', 4)
    v2 = ad.Scalar('y', 10)
    v3 = ad.sin(np.array([v1, v2])) ** 2
    assert(np.isclose(v3[0].getValue(), np.sin(4) ** 2))
    assert(np.isclose(v3[1].getValue(), np.sin(10) ** 2))
    jacobian = ad.get_jacobian(v3, ['x', 'y'])
    assert(np.isclose(jacobian, np.array([[2 * np.sin(4) * np.cos(4), 0], [0, 2 * np.sin(10) * np.cos(10)]])).all())

    v1 = ad.Scalar('x', 4)
    v2 = ad.Scalar('y', 10)
    v3 = ad.sin(np.array([v1 * v2, v1 + v2])) ** 2
    assert(np.isclose(v3[0].getValue(), np.sin(40) ** 2))
    assert(np.isclose(v3[1].getValue(), np.sin(14) ** 2))
    jacobian = ad.get_jacobian(v3, ['x', 'y'])
    assert(np.isclose(jacobian, np.array([[2 * np.sin(40) * np.cos(40)  * 10, 2 * np.sin(40) * np.cos(40)  * 4], 
            [2 * np.sin(14) * np.cos(14), 2 * np.sin(14) * np.cos(14)]])).all())
def test_pow():
    v1 = ad.create_vector('v', [2, 5])
    v2 = v1 ** 2
    assert(v2[0].getValue() == 4)
    assert(v2[1].getValue() == 25)
    jacobian = ad.get_jacobian(v2, ['v1', 'v2'])
    assert(np.array_equal(jacobian, np.array([[4, 0], [0, 10]])))

    x = ad.Scalar('x', 2)
    y = ad.Scalar('y', 5)
    v1 = np.array([x, y])
    v2 = v1 ** 2
    assert(v2[0].getValue() == 4)
    assert(v2[1].getValue() == 25)
    jacobian = ad.get_jacobian(v2, ['x', 'y'])
    assert(np.array_equal(jacobian, np.array([[4, 0], [0, 10]])))

    x = ad.Scalar('x', 2)
    y = ad.Scalar('y', 3)
    v1 = np.array([x, y])
    v2 = (v1 ** 2) ** 3
    assert(v2[0].getValue() == 64)
    assert(v2[1].getValue() == 729)
    jacobian = ad.get_jacobian(v2, ['x', 'y'])
    assert(np.array_equal(jacobian, np.array([[6 * (2 ** 5), 0], [0, 6 * (3 ** 5)]])))

    x = ad.Scalar('x', 2)
    y = ad.Scalar('y', 3)
    v1 = np.array([x, y])
    v2 = np.array([y, 2])
    v3 = v1 ** v2
    assert(v3[0].getValue() == 8)
    assert(v3[1].getValue() == 9)
    jacobian = ad.get_jacobian(v3, ['x', 'y'])
    assert(np.array_equal(jacobian, np.array([[12, np.log(2) * 8], [0, 6]])))
def test_cos():
    #Similar to sin. 
    v1 = ad.create_vector('v', [0, 100])
    v2 = ad.cos(v1)
    assert(v2[0].getValue() == 1)
    assert(np.isclose(v2[1].getValue(), np.cos(100)))
    jacobian = ad.get_jacobian(v2, ['v1', 'v2'])
    assert(np.isclose(jacobian, np.array([[0, 0], [0, -np.sin(100)]])).all())
        def L_fun(x):
            """
            Action
            Returns J_f(x0)*x by setting the values of 'x' as the initial derivatives for the variables in x0.
            """

            f_x0 = f(ad.create_vector('x0', x0, seed_vector=x))
            f_x0 = np.array(f_x0)  #ensure that f_x0 is np.array
            action = sum_values(ad.get_deriv(f_x0))
            return action
def test_neg():
    v1 = ad.create_vector('v', [1, 2])
    v2 = -v1
    assert(v2[0].getValue() == -1)
    assert(v2[1].getValue() == -2)
    jacobian = ad.get_jacobian(v2, ['v1', 'v2'])
    assert(np.array_equal(jacobian, np.array([[-1, 0], [0, -1]])))
    v3 = -v2
    assert(v3[0].getValue() == 1)
    assert(v3[1].getValue() == 2)
    jacobian = ad.get_jacobian(v3, ['v1', 'v2'])
    assert(np.array_equal(jacobian, np.array([[1, 0], [0, 1]])))

    v1 = ad.create_vector('v', [1, 2])
    v2 = -1 * -v1
    assert(v2[0].getValue() == 1)
    assert(v2[1].getValue() == 2)
    jacobian = ad.get_jacobian(v2, ['v1', 'v2'])
    assert(np.array_equal(jacobian, np.array([[1, 0], [0, 1]])))
def test_rpow():
    v1 = ad.create_vector('v', [2, 5])
    v2 = 2 ** v1
    assert(v2[0].getValue() == 4)
    assert(v2[1].getValue() == 32)
    jacobian = ad.get_jacobian(v2, ['v1', 'v2'])
    assert(np.array_equal(jacobian, np.array([[np.log(2) * 4, 0], [0, np.log(2) * 32]])))

    x = ad.Scalar('x', 2)
    y = ad.Scalar('y', 5)
    v1 = np.array([x, y])
    v2 = 2 ** v1
    assert(v2[0].getValue() == 4)
    assert(v2[1].getValue() == 32)
    jacobian = ad.get_jacobian(v2, ['x', 'y'])
    assert(np.array_equal(jacobian, np.array([[np.log(2) * 4, 0], [0, np.log(2) * 32]])))

    x = ad.Scalar('x', 2)
    y = ad.Scalar('y', 3)
    v1 = np.array([x, y])
    v2 = 2 ** (2 * v1)
    assert(v2[0].getValue() == 16)
    assert(v2[1].getValue() == 64)
    jacobian = ad.get_jacobian(v2, ['x', 'y'])
    assert(np.array_equal(jacobian, np.array([[np.log(2) * 32, 0], [0, np.log(2) * 128]])))

    x = ad.Scalar('x', 2)
    y = ad.Scalar('y', 3)
    v1 = np.array([x, y])
    v2 = (2 ** 2) ** v1
    assert(v2[0].getValue() == 16)
    assert(v2[1].getValue() == 64)
    jacobian = ad.get_jacobian(v2, ['x', 'y'])
    assert(np.array_equal(jacobian, np.array([[np.log(2) * (2 ** 4) * 2, 0], [0, np.log(2) * (2 ** 6) * 2]])))

    x = ad.Scalar('x', 2)
    y = ad.Scalar('y', 3)
    v1 = np.array([x + y, x])
    v2 = (2 ** 2) ** v1
    assert(v2[0].getValue() == 2 ** 10)
    assert(v2[1].getValue() == 16)
    jacobian = ad.get_jacobian(v2, ['x', 'y'])
    assert(np.array_equal(jacobian, np.array([[np.log(2) * (2 ** 10) * 2, np.log(2) * (2 ** 10) * 2], [np.log(2) * (2 ** 4) * 2, 0]])))

    x = ad.Scalar('x', 2)
    y = ad.Scalar('y', 3)
    v1 = np.array([x + y, x])
    v2 = 2 ** (2 * v1)
    assert(v2[0].getValue() == 2 ** 10)
    assert(v2[1].getValue() == 16)
    jacobian = ad.get_jacobian(v2, ['x', 'y'])
    assert(np.array_equal(jacobian, np.array([[np.log(2) * (2 ** 10) * 2, np.log(2) * (2 ** 10) * 2], [np.log(2) * (2 ** 4) * 2, 0]])))
def test_exp():
    v1 = ad.create_vector('v', [2, 5])
    v2 = ad.exp(v1)
    assert(np.isclose(v2[0].getValue(), np.exp(2)))
    assert(np.isclose(v2[1].getValue(), np.exp(5)))
    jacobian = ad.get_jacobian(v2, ['v1', 'v2'])
    assert(np.array_equal(jacobian, np.array([[np.exp(2), 0], [0, np.exp(5)]])))

    v1 = ad.create_vector('v', [2, 5])
    v2 = ad.exp(2 * v1)
    assert(np.isclose(v2[0].getValue(), np.exp(4)))
    assert(np.isclose(v2[1].getValue(), np.exp(10)))
    jacobian = ad.get_jacobian(v2, ['v1', 'v2'])
    assert(np.array_equal(jacobian, 2 * np.array([[np.exp(4), 0], [0, np.exp(10)]])))

    x = ad.Scalar('x', 2)
    y = ad.Scalar('y', 3)
    v1 = ad.exp(np.array([x + y, x * y]))
    assert(np.isclose(v1[0].getValue(), np.exp(5)))
    assert(np.isclose(v1[1].getValue(), np.exp(6)))
    jacobian = ad.get_jacobian(v1, ['x', 'y'])
    assert(np.array_equal(jacobian, np.array([[np.exp(5), np.exp(5)], [3 * np.exp(6), 2 * np.exp(6)]])))
def test_sub():
    x = ad.Scalar('x', 1)
    y = ad.Scalar('y', 2)
    v1 = np.array([x, y])
    v2 = np.array([y, x])
    v3 = v1 - v2 
    assert(v3[0].getValue() == -1)
    assert(v3[1].getValue() == 1)
    jacobian = ad.get_jacobian(v3, ['x', 'y'])
    assert(np.array_equal(jacobian, np.array([[1, -1], [-1, 1]])))

    v1 = ad.create_vector('v', [1, 2])
    v2 = v1 - 10
    assert(v2[0].getValue() == -9)
    assert(v2[1].getValue() == -8)
    jacobian = ad.get_jacobian(v2, ['v1', 'v2'])
    assert(np.array_equal(jacobian, np.array([[1, 0], [0, 1]])))

    x = ad.Scalar('x', 1)
    y = ad.Scalar('y', 2)
    v1 = np.array([x, y])
    v2 = ad.create_vector('v', [1, 5])
    v3 = v1 - v2
    assert(v3[0].getValue() == 0)
    assert(v3[1].getValue() == -3)  
    jacobian = ad.get_jacobian(v3, ['x', 'y', 'v1', 'v2'])
    assert(np.array_equal(jacobian, np.array([[1, 0, -1, 0], [0, 1, 0, -1]])))

    x = ad.Scalar('x', 1)
    y = ad.Scalar('y', 2)
    v1 = np.array([x, y])
    v2 = np.array([y, 10])
    v3 = v1 - v2
    assert(v3[0].getValue() == -1)
    assert(v3[1].getValue() == -8)  
    jacobian = ad.get_jacobian(v3, ['x', 'y'])
    assert(np.array_equal(jacobian, np.array([[1, -1], [0, 1]])))
def gradient_descent(f,
                     intial_guess,
                     step_size=0.01,
                     max_iter=10000,
                     tol=1e-12):
    """
    Implements gradient descent

    INPUTS
    ======= 
    f: function 
    The function that we are trying to find the minimum of. The function must take in single list/array that has the same dimension as len(initial_guess).
    
    initial_guess: List or array of ints/floats
    The initial position to begin the search for the minimum of the function 'f'.

    step_size: float
    The step size. In this case the step size will be constant
    
    max_iter: int
    The max number of iterations
    
    tol: float
    The tolerance. If the norm of the gradient is less than the tolerance, the algorithm will stop
    
    RETURNS
    ========
    Tuple
    A tuple with first entry which maps to the position of the minimum and second entry which maps to the number of iterations it took for the algorithm to stop
    """

    x = np.array(intial_guess)
    for i in range(max_iter):
        x_vector = ad.create_vector('x', x)
        fn_at_x = f(x_vector)
        gradient = fn_at_x.getGradient(
            ['x{}'.format(i) for i in range(1,
                                            len(x) + 1)])
        if np.sqrt(np.abs(gradient).sum()) < tol:
            break
        x = x - step_size * gradient
    return (x, i + 1)
def line_search(f, x, p, tau=0.1, c=0.1, alpha=1):
    """
    Implements Backtracking Line Search.  https://en.wikipedia.org/wiki/Backtracking_line_search

    INPUTS
    ======= 
    fn: Function 
    The function that we are trying to find the minimum of. The function must take in the same number of arguments as len(x)
    
    x: List or array of ints/floats
    The initial position 
    
    p: numpy array
    Descent direction

   	tau: float
   	Search control parameter tau

   	c: float
   	Search control parameter 

	alpha: float
	Starting alpha
    
    RETURNS
    ========
    float
    The alpha we found through backtracking line search
    """

    x = ad.create_vector('x', x)
    fn_val1 = f(x)
    fn_val2 = f(x + alpha * p)
    gradient = fn_val1.getGradient(
        ['x{}'.format(i) for i in range(1,
                                        len(x) + 1)])
    m = (p * gradient).sum()
    t = -c * m
    while ad.get_value(fn_val1 - fn_val2) < alpha * t:
        alpha = tau * alpha
        fn_val2 = f(x + alpha * p)
    return alpha
def quasi_newtons_method(f,
                         initial_guess,
                         max_iter=10000,
                         method='BFGS',
                         tol=1e-12):
    """
    Implements Quasi-Newton methods with different methods to estimate the inverse of the Hessian.
    Utilizes backtracking line search to determine step size.     
    https://en.wikipedia.org/wiki/Quasi-Newton_method

    INPUTS
    ======= 
    f: function 
    The function that we are trying to find the minimum of. The function must take in single list/array that has the same dimension as len(initial_guess).
    
    initial_guess: List or array of ints/floats
    The initial position to begin the search for the minimum of the function 'f'.
    
    max_iter: int
    The max number of iterations
    
    method: String
    The update method to update the estimate of the inverse of the Hessian.
    Currently, BFGS, DFP, and Broyden are implemented.
    
    tol: float
    The tolerance. If the norm of the gradient is less than the tolerance, the algorithm will stop
    
    RETURNS
    ========
    Tuple
    A tuple with first entry which maps to the position of the minimum and second entry which maps to the number of iterations it took for the algorithm to stop
    """

    if method not in ['BFGS', 'DFP', 'Broyden']:
        raise Exception("Not a valid method.")
    x = initial_guess
    H = np.identity(len(x))
    for i in range(max_iter):
        x_vector = ad.create_vector('x', x)
        fn_at_x = f(x_vector)
        gradient = fn_at_x.getGradient(
            ['x{}'.format(i) for i in range(1,
                                            len(x) + 1)])

        p = -H @ gradient

        alpha = line_search(f, x, p)
        delta_x = alpha * p

        x = x + delta_x
        x_vector2 = ad.create_vector('x', x)
        fn_at_x2 = f(x_vector2)
        gradient2 = fn_at_x2.getGradient(
            ['x{}'.format(i) for i in range(1,
                                            len(x) + 1)])
        if np.sqrt(np.abs(gradient2).sum()) < tol:
            break
        y = (gradient2 - gradient).reshape(-1, 1)
        delta_x = delta_x.reshape(-1, 1)
        if method == 'BFGS':
            H = (np.identity(len(H)) - (delta_x @ y.T) / (y.T @ delta_x)) @ H \
                @ (np.identity(len(H)) - (y @ delta_x.T) / (y.T @ delta_x)) + (delta_x @ delta_x.T) / (y.T @ delta_x)
        elif method == 'DFP':
            H = H + (delta_x @ delta_x.T) / (delta_x.T @ y) - (
                H @ y @ y.T @ H) / (y.T @ H @ y)
        elif method == 'Broyden':
            H = H + ((delta_x - H @ y) @ delta_x.T @ H) / (delta_x.T @ H @ y)

    return (x, i + 1)
def newtons_method(f, initial_guess, max_iter=1000, method='exact', tol=1e-12):
    """
    Implements Newton's method for root-finding with different methods to find the step at each iteration
    
    INPUTS
    ======= 
    f: Function 
    The function that we are trying to find a root of. The function must take in single list/array that has the same dimension as len(initial_guess).
    
    initial_guess: List or array of ints/floats
    The initial position to begin the search for the roots of the function 'f'.
    
    max_iter: int
    The max number of iterations
    
    method: String
    The method to solve Ax=b to find the step 'x' at each iteration.
    Options:
        'inverse' : calculate (A^-1)*b = x
        'exact' : Use np.linalg.solve(A, b)
        'gmres" : Use scipy.sparse.linalg.gmres(A, b), which finds a solution iteratively
        'gmres_action' :  Use np.linalg.gmres(L, b), where 'L' is a linear operator used to efficiently calculate A*x. Works well for functions with sparse Jacobian matrices.
    
    tol: float
    The tolerance. If the abs value of the steps for one iteration are less than the tol, then the algorithm stops
    
    RETURNS
    ========
    Tuple
    A tuple with first entry which maps to the position of the minimum and second entry which maps to the number of iterations it took for the algorithm to stop.
    
    NOTES
    =====
    POST:
        - Returns a tuple. The first entry maps to the position of the minimum, and the second entry is the number of iterations it took for the algorithm to stop.
        - If the convergence is not reached by 'max_iter', then a RuntimeError is thrown to alert the user.
    """

    if method not in ['inverse', 'exact', 'gmres', 'gmres_action']:
        raise Exception("Not a valid method.")
    if len(f(initial_guess)) != len(initial_guess):
        raise Exception(
            'Output dimension of f should be the same as the input dimension of f.'
        )
    if method == 'gmres_action':
        return _newtons_method_gmres_action(f, initial_guess, max_iter, tol)
    x0 = ad.create_vector('x0', initial_guess)
    for iter_num in range(max_iter):
        fn = np.array(f(x0))
        #need convert the list/array that is passed back from function, so downstream autodiff functions for vectors work properly
        jacob = ad.get_jacobian(
            fn, ['x0{}'.format(i) for i in range(1,
                                                 len(fn) + 1)])
        if method == 'inverse':
            step = np.linalg.inv(-jacob).dot(ad.get_value(fn))
        if method == 'exact':
            step = np.linalg.solve(-jacob, ad.get_value(fn))
        elif method == 'gmres':
            step, _ = gmres(jacob, -ad.get_value(fn), tol=tol, atol='legacy')
        xnext = x0 + step

        #check if we have converged
        if np.all(np.abs(ad.get_value(xnext) - ad.get_value(x0)) < tol):
            return (ad.get_value(xnext), iter_num + 1)

        #update x0 because we have not converged yet
        x0 = xnext

    raise RuntimeError(
        "Failed to converge after {0} iterations, value is {1}".format(
            max_iter, ad.get_value(x0)))