def test_plot_decision_boundary(self): negatives = self.data[self.data[:, 2] == 0] # SELECT * FROM self.data WHERE col2 == 0 positives = self.data[self.data[:, 2] == 1] # SELECT * FROM self.data WHERE col2 == 1 plt.xlabel("Microchip Test 1") plt.ylabel("Microchip Test 2") plt.scatter(negatives[:, 0], negatives[:, 1], c='y', marker='o', s=40, linewidths=1, label="y=0") plt.scatter(positives[:, 0], positives[:, 1], c='b', marker='+', s=40, linewidths=2, label="y=1") dimension = 6 X_mapped = map_feature(self.X[:, 0], self.X[:, 1], dimension) m, n = X_mapped.shape X_mapped = np.hstack((np.ones((m, 1)), X_mapped)) theta = np.zeros((n + 1, 1)) lamda = 1.0 theta_optimized, min_cost = regularized_gradient_descent( X_mapped, self.y, theta, lamda) x1 = np.linspace(-1, 1.5, 50) x2 = np.linspace(-1, 1.5, 50) X1, X2 = np.meshgrid(x1, x2) hypo = np.zeros((len(x1), len(x2))) for i in range(0, len(x1)): for j in range(0, len(x2)): mapped = map_feature( np.array([X1[i][j]]).reshape((1, 1)), np.array([X2[i][j]]).reshape((1, 1)), dimension) mapped = np.hstack((np.ones((1, 1)), mapped)) hypo[i][j] = hypothesis(mapped, theta_optimized)[0] plt.contour(X1, X2, hypo, [0.5], label='Decision Boundary') plt.legend() plt.show()
def test_regularized_cost_function(self): dimension = 6 X_mapped = map_feature(self.X[:, 0], self.X[:, 1], dimension) m, n = X_mapped.shape X_mapped = np.hstack((np.ones((m, 1)), X_mapped)) theta = np.zeros((n + 1, 1)) lamda = 1.0 cost_loop = regularized_cost_function_loop(X_mapped, self.y, theta, lamda) cost = regularized_cost_function(X_mapped, self.y, theta, lamda) self.assertAlmostEqual(cost_loop, 0.69314718056, places=5) self.assertAlmostEqual(cost, 0.69314718056, places=5)
def test_regularized_cost_gradient(self): dimension = 6 X_mapped = map_feature(self.X[:, 0], self.X[:, 1], dimension) m, n = X_mapped.shape X_mapped = np.hstack((np.ones((m, 1)), X_mapped)) theta = np.zeros((n + 1, 1)) lamda = 1.0 gradient = regularized_cost_gradient(X_mapped, self.y, theta, lamda) expected_gradient = np.array([ 0.00847, 0.01879, 8e-05, 0.05034, 0.0115, 0.03766, 0.01836, 0.00732, 0.00819, 0.02348, 0.03935, 0.00224, 0.01286, 0.0031, 0.0393, 0.01997, 0.00433, 0.00339, 0.00584, 0.00448, 0.03101, 0.03103, 0.0011, 0.00632, 0.00041, 0.00727, 0.00138, 0.03879 ]).reshape((n + 1, 1)) np.testing.assert_almost_equal(gradient, expected_gradient, decimal=5)
def test_regularized_gradient_descent(self): dimension = 6 X_mapped = map_feature(self.X[:, 0], self.X[:, 1], dimension) m, n = X_mapped.shape X_mapped = np.hstack((np.ones((m, 1)), X_mapped)) theta = np.zeros((n + 1, 1)) lamda = 1.0 theta_optimized, min_cost = regularized_gradient_descent( X_mapped, self.y, theta, lamda) expected_theta_optimized = np.array([ 1.27268726, 0.62557024, 1.18096643, -2.01919814, -0.91761464, -1.43194196, 0.12375928, -0.36513066, -0.35703386, -0.17485797, -1.4584374, -0.05129691, -0.6160397, -0.27464158, -1.19282551, -0.24270352, -0.20570051, -0.04499796, -0.27782728, -0.29525866, -0.45613268, -1.0437783, 0.02762813, -0.29265655, 0.01543383, -0.32759297, -0.14389219, -0.92460139 ]) expected_min_cost = 0.5290027422883413 np.testing.assert_almost_equal(theta_optimized, expected_theta_optimized, decimal=5) self.assertAlmostEqual(min_cost, expected_min_cost, places=3)
import scipy.optimize as opt import pandas as pd from numpy import zeros from gradient_descent import gradient_descent from cost_function import cost_function_regularized if __name__ == "__main__": orig_settings = termios.tcgetattr(sys.stdin) tty.setcbreak(sys.stdin) # load and init data data = pd.read_csv("data/ex2data1.txt", ",") X = data.iloc[:, :-1].to_numpy() y = data.iloc[:, -1].to_numpy() X = utils.map_feature(X[:, 0], X[:, 1], 2) weights = zeros(X.shape[1]) print( 'Menu:\n1) Press G for gradient descent.\n2) Press O for optimized algorithm.\n3) Press Q for quit.' ) key = sys.stdin.read(1)[0] if key == 'g' or key == 'G': print("Run gradient descent") weights = gradient_descent(weights, X, y, 500000, 0.00101, 0.003) utils.calculate_precision(weights, X, y) elif key == 'o' or key == 'O': print("Run optimized algorithm") weights, _, _ = opt.fmin_tnc(func=cost_function_regularized, x0=weights,
def test_map_feature(self): dimension = 6 X_mapped = map_feature(self.X[:, 0], self.X[:, 1], dimension) expected_X_mapped = np.loadtxt( os.path.join(DATA_DIR, 'ex2data2.mapped.txt')) np.testing.assert_almost_equal(X_mapped, expected_X_mapped, decimal=2)