def gd(f, theta, gradient, num_iterations=5000, alpha=0.001): algorithm = "GD" print_head(algorithm, {'num_iterations': num_iterations, 'alpha': alpha}) x_data, y_data, z_data = [theta[0]], [theta[1]], [f(theta[0], theta[1])] g = np.zeros(shape=2) t = 0 while t < num_iterations: t = t + 1 g[0] = gradient['x'](theta[0], theta[1]) g[1] = gradient['y'](theta[0], theta[1]) theta = theta - (alpha * g) x_data.append(theta[0]) y_data.append(theta[1]) z_data.append(f(theta[0], theta[1])) if t % (num_iterations / 10) == 0: print_iteration(theta, t) print_found_minimum(theta, t) return x_data, y_data, z_data
def nadam(f, theta, gradient, num_iterations=5000, alpha=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8): algorithm = "Nadam" print_head( algorithm, { 'num_iterations': num_iterations, 'alpha': alpha, 'beta_1': beta_1, 'beta_2': beta_2, 'epsilon': epsilon }) x_data, y_data, z_data = [theta[0]], [theta[1]], [f(theta[0], theta[1])] m = np.zeros(shape=2) v = np.zeros(shape=2) g = np.zeros(shape=2) t = 0 while t < num_iterations: t = t + 1 g[0] = gradient['x'](theta[0], theta[1]) g[1] = gradient['y'](theta[0], theta[1]) m = beta_1 * m + (1 - beta_1) * g v = beta_2 * v + (1 - beta_2) * np.power(g, 2) m_hat = m / (1 - np.power(beta_1, t)) + (1 - beta_1) * g / ( 1 - np.power(beta_1, t)) v_hat = v / (1 - np.power(beta_2, t)) theta = theta - alpha * m_hat / (np.sqrt(v_hat) + epsilon) x_data.append(theta[0]) y_data.append(theta[1]) z_data.append(f(theta[0], theta[1])) if t % (num_iterations / 10) == 0: print_iteration(theta, t) print_found_minimum(theta, t) return x_data, y_data, z_data
def adamax(f, theta, gradient, num_iterations=5000, alpha=0.001, beta_1=0.9, beta_2=0.999): algorithm = "AdaMax" print_head( algorithm, { 'num_iterations': num_iterations, 'alpha': alpha, 'beta_1': beta_1, 'beta_2': beta_2 }) x_data, y_data, z_data = [theta[0]], [theta[1]], [f(theta[0], theta[1])] m = np.zeros(shape=2) v = np.zeros(shape=2) g = np.zeros(shape=2) t = 0 while t < num_iterations: t = t + 1 g[0] = gradient['x'](theta[0], theta[1]) g[1] = gradient['y'](theta[0], theta[1]) m = beta_1 * m + (1 - beta_1) * g m_hat = m / (1 - np.power(beta_1, t)) v = np.maximum(beta_2 * v, np.abs(g)) theta = theta - alpha * m_hat / v x_data.append(theta[0]) y_data.append(theta[1]) z_data.append(f(theta[0], theta[1])) if t % (num_iterations / 10) == 0: print_iteration(theta, t) print_found_minimum(theta, t) return x_data, y_data, z_data