def plot_Decision_Boundary(X, Y, theta): plot_data(X[:, 1:3], Y) if X.shape[1] <= 3: plot_x = np.array([np.min(X[:, 1]) - 2, np.max(X[:, 1]) + 2]) plot_y = (-1 / theta[2]) * (theta[1] * plot_x + theta[0]) plt.plot(plot_x, plot_y) plt.legend(['Decision Boundary', 'Admitted', 'Not admitted']) plt.axis([30, 100, 30, 100]) else: u = np.linspace(-1, 1.5, 50) v = np.linspace(-1, 1.5, 50) z = np.zeros((u.size, v.size)) for i in range(0, u.size): for j in range(0, v.size): z[i, j] = np.dot( map_feature(u[i], v[j], 6), theta, ) z = z.T # 转置 cs = plt.contour(u, v, z, level=[0], colors='b', label='Decision Boundary') plt.legend( [cs.collections[0]], ['Decision Boundary']) # [cs.collections[0]] 显示里面的分界线,与后面列表中的字符串对应
def plot_decesion_boundary(theta, x_data, y_data): # plotData focus : x is 100 * 3 matrix pos = np.where(y_data == 1) neg = np.where(y_data == 0) plt.plot(x_data[pos[0], 1], x_data[pos[0], 2], 'k+') plt.plot(x_data[neg[0], 1], x_data[neg[0], 2], 'ko', color="y") plt.xlabel("exam1 score") plt.ylabel("exam2 score") plt.legend(["Admitted", "Not admitted"], loc='upper right') # ignore the number of colum = 2 # when the number of colum = 3 if x_data.shape[1] == 3: # two points can define a line plot_x = np.zeros((2, )) plot_x[0] = np.min(x_data[:, 1]) plot_x[1] = np.max(x_data[:, 2]) # calculate plot_y plot_y = -(theta[1]*plot_x+theta[0])/theta[2] plt.plot(plot_x, plot_y) plt.show() elif x_data.shape[1] > 3: u = np.linspace(-1, 1.5, 50) v = np.linspace(-1, 1.5, 50) z = np.ones((u.shape[0], v.shape[0])) for i in range(u.shape[0]): for j in range(v.shape[0]): tempt = np.column_stack((np.ones((1, 1)), mapFeature.map_feature(np.array([u[i]]), np.array([v[j]])))) a = np.dot(tempt, theta) z[i, j] = a plt.contour(u, v, z, [0], colors='k') plt.show()
def plot_decision_boundary(theta, X, y): """ to plot classifier's decision boundary """ plot_data(X[:, 1:3], y) if X.shape[1] <= 3: # Only need two points to define a line, so choose two endpoints plot_x = np.array([np.min(X[:, 1])-2, np.max(X[:, 1])+2]) # Calculate the decision boundary line plot_y = (-1/theta[2])*(theta[1]*plot_x+theta[0]) plt.plot(plot_x, plot_y) plt.legend(['Decision Boundary', 'Admitted', 'Not admitted'], loc=1) plt.axis([30, 100, 30, 100]) else: # Here is the grid range u = np.linspace(-1, 1.5, 50) v = np.linspace(-1, 1.5, 50) z = np.zeros((u.size, v.size)) # Evaluate z = theta*x over the grid for i in range(0, u.size): for j in range(0, v.size): z[i, j] = np.dot(map_feature(u[i], v[j]), theta) z = z.T # Plot z = 0 # Notice you need to specify the range [0, 0] cs = plt.contour(u, v, z, levels=[0], colors='r', label='Decision Boundary') plt.legend([cs.collections[0]], ['Decision Boundary'])
def plotDecisionBoundary(theta, X, y): if X.shape[0] == 100: plot_x = np.array([min(X[:, 1]) - 2, max(X[:, 2]) + 2]) plot_y = (-1.0 / theta[2]) * (theta[1] * plot_x + theta[0]) plot(plot_x, plot_y) #legend(['Decision Boundary', 'Not admitted', 'Admitted']) show() else: u = np.linspace(-1, 1.5, 50) v = np.linspace(-1, 1.5, 50) z = np.zeros(shape=(len(u), len(v))) for i in range(len(u)): for j in range(len(v)): z[i, j] = (mapFeature.map_feature(np.array(u[i]), np.array(v[j])).dot( np.array(theta))) z = z.T contour(u, v, z, 1, colors='green') title('lambda = 1.0') xlabel('Microchip Test 1') ylabel('Microchip Test 2') legend(['y = 1', 'y = 0', 'Decision boundary']) show()
def predit(X, y, theta): n = len(theta) pos = np.where(y[:, 0] == 1) neg = np.where(y[:, 0] == 0) plt.figure(figsize=(6, 6)) pos_data = X[pos] x_pos_data = pos_data[:, 1] y_pos_data = pos_data[:, 2] plt.scatter(x_pos_data, y_pos_data, marker='x', linewidths=2, color='red', label="Admitted") neg_data = X[neg] x_neg_data = neg_data[:, 1] y_neg_data = neg_data[:, 2] plt.scatter(x_neg_data, y_neg_data, marker='*', linewidths=2, color='blue', label="Not admitted") if n <= 3: x_data = np.linspace(30, 100) y_data = np.linspace(30, 100) x_data, y_data = np.meshgrid(x_data, y_data) z = theta[0][0] + theta[1][0] * x_data + theta[2][0] * y_data # 50*50 print(z.shape) # h=1/1+np.exp(-z) # levels参数:确定轮廓线/区域的数量和位置。如果int Ñ,使用Ñ数据间隔; 即绘制n + 1个等高线。水平高度自动选择。 # 如果是数组,则在指定的级别绘制轮廓线。值必须按递增顺序排列。 # 分界线的理解:z>>0为1;z<<0,为0 plt.contour(x_data, y_data, z, levels=[-1, 0, 1]) else: x_data = np.linspace(-1, 1) y_data = np.linspace(-1, 1) z = np.zeros((len(x_data), len(y_data))) # z=theta[0][0]+theta[1][0]*x0+theta[2][0]*x1 for i in range(len(x_data)): for j in range(len(y_data)): features = map_feature( np.append(x_data[i].reshape(-1, 1), y_data[j].reshape(-1, 1), axis=1)) z[i, j] = np.dot(features, theta) # 50*50 # x_data, y_data = np.meshgrid(x_data, y_data) plt.contour(x_data, y_data, z, levels=[-1, 0, 1]).collections[0].set_label("Decision boundary") plt.xlabel('Exam1 Score') plt.ylabel('Exam2 Score') plt.show()
def plot_decision_boundary(theta, x, y): # Plot Data pos = np.where(y[:, 0] == 1.0) neg = np.where(y[:, 0] == 0.0) temp_x = x[:, [1, 2]] plt.ion() plt.figure() plt.scatter(temp_x[pos, 0], temp_x[pos, 1], marker="+", label="Admitted") plt.scatter(temp_x[neg, 0], temp_x[neg, 1], marker="o", label="Not admitted") m, n = x.shape theta = theta.reshape(theta.shape[0], 1) if n <= 3: # Only need 2 points to define a line, so choose two endpoints plot_x = np.array(([x[:, 1].min(), x[:, 1].max()])).reshape(2, 1) plot_y = ((-1 / theta[2]) * (theta[1] * plot_x + theta[0])).reshape( 2, 1) # Plot, and adjust axes for better viewing plt.plot(plot_x[0], plot_y[0], 'rx', markersize=10) plt.plot(plot_x[1], plot_y[1], 'rx', markersize=10) plt.plot(plot_x, plot_y, '-', label="Linear regression") plt.axis([15, 120, 15, 120]) else: u = np.linspace(-1, 1.5, 50) v = np.linspace(-1, 1.5, 50) z = np.zeros((len(u), len(v))) for i in range(len(u)): for j in range(len(v)): z[i, j] = np.dot(map_feature(np.array([u[i]]), np.array([v[j]])), theta) plt.contour(u, v, z.T, [0]).collections[0].set_label("Decision boundary")
raw_input('Program paused. Press ENTER to continue') # ===================== Part 1: Regularized Logistic Regression ===================== # In this part, you are given a dataset with data points that are not # linearly separable. However, you would still like to use logistic # regression to classify the data points. # To do so, you introduce more feature to use -- in particular, you add # polynomial features to our data matrix (similar to polynomial regression) # # Add polynomial features # Note that mapFeature also adds a column of ones for us, so the intercept # term is handled X = mf.map_feature(X[:, 0], X[:, 1]) # Initialize fitting parameters initial_theta = np.zeros(X.shape[1]) # Set regularization parameter lambda to 1 lmd = 1 # Compute and display initial cost and gradient for regularized logistic regression cost, grad = cfr.cost_function_reg(initial_theta, X, y, lmd) np.set_printoptions(formatter={'float': '{: 0.4f}\n'.format}) print('Cost at initial theta (zeros): {}'.format(cost)) print('Expected cost (approx): 0.693') print( 'Gradient at initial theta (zeros) - first five values only: \n{}'.format(
import sigmoid import mapFeature import multiprocessing from multiprocessing import Pool, Value import math import pylab as pl import time #load the dataset data = np.loadtxt('ex2data2.txt', delimiter=',') X = data[:, 0:2] #print (X.shape) y = data[:, 2] m, n = X.shape new_data = mapFeature.map_feature(X[:, 0], X[:, 1]) #########data standarized######### ################################## for i in range(2, new_data.shape[1]): m = np.mean(new_data[:, i]) s = np.std(new_data[:, i]) new_data[:, i] = (new_data[:, i] - m) / s ################################## ################################## ############parameters############ ################################## lamda = 0.1 learning_rate = 0.1 max_iters = 400
return X, y def normal_data(x): data = x.copy() mean = np.mean(data, axis=0) std = np.std(data, axis=0, ddof=1) for i in range(len(mean)): data[:, i] = (data[:, i] - mean[i]) / std[i] return data if __name__ == '__main__': X, y = load_data() print(X[0], y[0]) m = len(X) # plot_data(X,y) x = map_feature(X) X_input = np.append(np.ones((m, 1)), X, axis=1) n = len(x[0]) theta = np.ones((n, 1)).reshape(-1, 1) # loss=cost_function_reg(x,y,theta,1) # print(loss) ret = my_fminunc(x, y, theta, 2) print(ret['success']) print(np.round(ret['x'], 2)) theta = ret['x'].reshape(-1, 1) predit(x, y, theta)
# linearly separable. However, you would still like to use logistic # regression to classify the data points. # To do so, you introduce more feature to use -- in particular, you add # polynomial features to our data matrix (similar to polynomial regression) # # Add polynomial features # Note that mapFeature also adds a column of ones for us, so the intercept # term is handled cols = data.shape[1] X = data.iloc[:, :cols - 1] y = data.iloc[:, cols - 1:] X = mf.map_feature(X) # Initialize fitting parameters initial_theta = np.zeros(X.shape[1]) # Set regularization parameter lambda to 1 lmd = 1 # Compute and display initial cost and gradient for regularized logistic regression cost, grad = cfr.cost_function_reg(initial_theta, X, y, lmd) print('Cost at initial theta (zeros): {}'.format(cost)) print('Expected cost (approx): 0.693') print( 'Gradient at initial theta (zeros) - first five values only: \n{}'.format( grad[0:5]))
line_list = file.readlines() m = len(line_list) n = len(line_list[0].split(",")) - 1 # ignore x0 x_data = np.zeros((m, n)) y_data = np.zeros((m, 1)) for i in range(m): line_tempt = line_list[i].split(",") x_data[i, :] = line_tempt[:2] y_data[i, :] = line_tempt[-1] file.close() # ================== plotData =================== plotData_reg.plot_data(x_data, y_data) # ================== plotData end =============== # ================== regularized logistic regression ============== # ================== map feature ================================ map_x = mapFeature.map_feature(x_data[:, 0], x_data[:, 1]) x_data = np.column_stack((np.ones((m, 1)), map_x)) # ================== costFunctionReg ========================== # set regulation params lambd = 10 # you can try to change the lambda(0, 1, 10 ,100 or others),when the lambda is 1,the result is best theta = np.ones((x_data.shape[1])) # ====== test cost and grad ========= lambda = 10 # cost = costFunctionReg.cost_function(theta, x_data, y_data, lambd) # print("cost is {}".format(cost)) # print("expected cost is 3.16") grad = costFunctionReg.gradient(theta, x_data, y_data, lambd) print("grad is {}".format(grad[0:5])) print("0.3460\n 0.1614\n 0.1948\n 0.2269\n 0.0922\n") # ====== test end ==================================== result = op.minimize(costFunctionReg.cost_function, theta,