def d_dimensional_comparison(beta_star, num_points, l): d = len(beta_star) - 1 X_list = [np.random.uniform(0, 256, num_points) for _ in range(d)] X = np.column_stack(X_list) X = np.column_stack((np.ones(num_points), X)) distances = np.array([xi.dot(beta_star) for xi in X]) Y = [1 if sigmoid(dist) > random.random() else -1 for dist in distances] x1_pos, x2_pos = zip(*[xi[1:] for xi, yi in zip(X, Y) if yi == 1]) x1_neg, x2_neg = zip(*[xi[1:] for xi, yi in zip(X, Y) if yi == -1]) plt.scatter(x1_pos, x2_pos, marker='+', color='red') plt.scatter(x1_neg, x2_neg, marker='o', color='blue') x2_star = calculate_x2s(np.column_stack([np.ones(256), np.arange(256)]), beta_star) beta_hat = gradient_descent(X, Y, l=l, epsilon=1e-8, step_size=1e-2, max_steps=100) x2_hat = calculate_x2s(np.column_stack([np.ones(256), np.arange(256)]), beta_hat) plt.plot(np.arange(256), x2_star, color='purple', label='true boundary') plt.plot(np.arange(256), x2_hat, color='green', label='predicted boundary') plt.legend() plt.show()
def cost(): data = pd.read_csv('student_score.txt', names=['Exam1', 'Exam2', 'admission']) x = data[['Exam1', 'Exam2']] y = data['admission'] w = np.zeros(3) x = np.hstack((np.ones((y.size, 1)), x)) p = sigmoid(x.dot(w)) cost = -np.mean(y * np.log(p) + (1 - y) * np.log(1 - p)) print('Cost %.3f when w is zero' % cost)
def predict_nn(Theta1, Theta2, X): p = X.shape[0] num_labels = Theta2.shape[0] X = np.concatenate((np.ones((p, 1)), X), axis=1) prob = lr.sigmoid(np.dot(X, Theta1.T)) t = prob.shape[0] prob = np.concatenate((np.ones((t, 1)), prob), axis=1) prob = lr.sigmoid(np.dot(prob, Theta2.T)) pred = np.zeros((p, 1)) # our prediction vector # step thru each digit in the training set, and get the column with highest probability # the column number corresponds to the digit ... with 0 as column 10 # note that 1 is in the 0th column, 2 in the 1st, and 0 and in the 9th [[ 0 ... 9] for i in range(p): col = prob[i, :].argmax() if (col == 9): pred[i] = 0 else: pred[i] = col + 1 return (pred)
def stocGradAscent0(dataMatrix, classLabels): m, n = shape(dataMatrix) alpha = 0.5 weights = ones(n) #initialize to all ones weightsHistory = zeros((500 * m, n)) for j in range(500): for i in range(m): h = logistic_regression.sigmoid(sum(dataMatrix[i] * weights)) error = classLabels[i] - h weights = weights + alpha * error * dataMatrix[i] weightsHistory[j * m + i, :] = weights return weightsHistory
def predict_one_vs_all(theta, X): (p, q) = X.shape num_labels = theta.shape[0] X = np.concatenate((np.ones((p, 1)), X), axis=1) # add in the 1-vector prob = lr.sigmoid(np.dot(X, theta.T)) pred = np.zeros((p, 1)) for i in range(p): col = prob[i, :].argmax() if (col): pred[i] = col else: pred[i] = 10 return (pred)
def compute_cost_reg_nn(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lbd): m = X.shape[0] (Theta1, Theta2) = roll_parameters(nn_params, hidden_layer_size, input_layer_size, num_labels) # compute the hypothesis X1 = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1) ax = lr.sigmoid(np.dot(X1, Theta1.T)) # prefix a0, and then repeat # the two steps if there are additional hidden layers ax = np.concatenate((np.ones((ax.shape[0], 1)), ax), axis=1) ax = lr.sigmoid(np.dot(ax, Theta2.T)) # now we're at the final / output layer # convert y to a collection of vectors... in the oneVsAll format # each vector has a 1 for its classification, and a 0 for all else y_all = np.zeros((m, num_labels)) for i in np.arange(1, num_labels + 1): pos = np.where(y == i)[0] if (i == 10): y_all[pos, 9] = 1 else: y_all[pos, i - 1] = 1 # calculate the cost function, by retaining the classified value, and # discarding all the rest.... element-wise multiplication by 1s and 0s will ensure that J = sum(sum((-y_all * np.log(ax)) - ((1 - y_all) * np.log(1 - ax)))) / m # compute the regularization Theta1_Reg = vectorize_theta(Theta1) Theta2_Reg = vectorize_theta(Theta2) reg = (lbd / (2 * m)) * (sum(sum(Theta1_Reg * Theta1_Reg)) + sum(sum(Theta2_Reg * Theta2_Reg))) # update the cost function J = J + reg return (J)
def _cal_residual(self, f_values, targets): ''' 计算残差 f_values 当前所有树对所有训练样本的预测值 对于回归任务和二分类任务,f_values为一个长度为训练样本数的一维array 对于多分类任务,f_values为一个二维array,第一维为训练样本数, 第二维为类别数, ''' if self.predict_type == 'classification' and self.class_count > 2: # 多分类任务一次需要构建多棵树,每棵树拟合一个类别 p_hat = softmax(f_values) return targets - p_hat elif self.predict_type == 'classification': p_hat = sigmoid(f_values) # 计算预测为类别1的概率 return targets - p_hat else: return targets - f_values
def stocGradAscent1(dataMatrix, classLabels): dataMatrix = array(dataMatrix) classLabels = array(classLabels) m, n = shape(dataMatrix) alpha = 0.4 weights = ones(n) #initialize to all ones weightsHistory = zeros((40 * m, n)) for j in range(40): dataIndex = list(range(m)) for i in range(m): alpha = 4 / (1.0 + j + i) + 0.01 randIndex = int(random.uniform(0, len(dataIndex))) h = logistic_regression.sigmoid( sum(dataMatrix[randIndex] * weights)) error = classLabels[randIndex].astype('float64') - h.astype( 'float64') # .astype() 变量类型转换 #print error weights = weights + alpha * error * dataMatrix[randIndex] weightsHistory[j * m + i, :] = weights del (dataIndex[randIndex]) print(weights) return weightsHistory
def forward_propagation(layer_coefficients, input_data): """ Calculate neural network output based on input data and layer coefficients. Forward propagation algorithm. :param layer_coefficients: 1 x (L - 1) array of layer coefficients vectors, where L - layers count :param input_data: S0 x m input layer vector, where S0 - input layer units count, m - experiments count :return: 1 x l vector of layer activation vectors Sl x m, where Sl - l'th layer units count, m - experiments count """ data = [input_data] # S0 x m for theta in layer_coefficients: data.append( sigmoid( np.dot( theta, # Sl x (S[l-1] + 1) np.vstack(([np.ones(data[-1].shape[1])], data[-1])) # (S[l-1] + 1) x m )) # Sl x m ) return data
print("Calculated GD = \n", grad) # Compute and display cost and gradient with non-zero theta test_theta = np.array([[-24], [0.2], [0.2]]) J = lr.compute_cost(test_theta, X1, y) grad = lr.gradient_descent(test_theta, X1, y) print('Cost at test theta: {:7.3f}'.format(J)) # ans = 0.218 print('Gradient at test theta: \n', grad) # ans = [[0.043], [2.566], [2.647]] # overlay the decision boundary on the data # but, first compute the optimized theta for global min :: ans = [[-25.161], [0.206], [0.201]] theta = lr.optimizer_func(initial_theta, X1, y) print('Computed theta: ', theta) theta = np.vstack(theta) # now compute the decision boundary lr.decision_boundary(theta, X1, y) # test the model by running a prediction :: ans = 0.775 +/- 0.002 # for a student with score 45 on exam 1 and score 85 on exam 2 X_test = np.array([1, 45, 85]) prob = lr.sigmoid(np.dot(X_test, theta)) print("Probability of student with scores {} getting admitted = {}".format( X_test[[1, 2]], prob)) # calculate the overall accuracy of our model :: ans = 89.0 p = lr.predict(theta, X1) accuracy = np.sum(np.equal(p, y)) / m print("Accuracy of the model = {:7.3f}%".format(accuracy * 100))
def gradient_descent_nn(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lbd): # an now, onto back propagation # but, first lets compute gradient using forward propagation m = X.shape[0] X1 = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1) (Theta1, Theta2) = roll_parameters(nn_params, hidden_layer_size, input_layer_size, num_labels) Theta1_grad = np.zeros(Theta1.shape) Theta2_grad = np.zeros(Theta2.shape) for t in range(m): # step #1 :: perform forward propagation, stepping thru each layer a1 = X1[t, :] # X1 already has X0 (the ones column) added z2 = np.dot(a1, Theta1.T) a2 = lr.sigmoid(z2) a2 = np.insert(a2, 0, 1) # insert a 1 at the beginning of the np.array z3 = np.dot(a2, Theta2.T) a3 = lr.sigmoid(z3) # step #2 :: calculate the resultant error ytemp = y[t, :] # save the labels y3 = np.zeros( (1, num_labels)) # create a column of labels for oneVsAll if (ytemp[0] < 10): y3[0, ytemp[0] - 1] = 1 # update the appropriate column based on the label else: y3[0, 9] = 1 # account for '0' is represented as 10 delta_3 = a3 - y3 # compute the error at the final layer delta_3 = delta_3.T # transform into a vector # step #3 :: propagate back, and calculate the gradient error # δ(2) = Θ(2) T δ(3). ∗ g′(z(2)) # don't have to worry about delta_1 since that's the input layer... no error # transform the output of the sigmoidGradient into a column vector # Add the bias node to z2, while calculating the gradient. delta_2 = np.dot(Theta2.T, delta_3) delta_2 = delta_2 * sigmoid_gradient(np.insert(z2, 0, 1)) # strip out the delta for the 0th unit... we had added that unit in delta_2 = delta_2[1:, 0] # step #4 :: accumulate the gradient # ∆(l) = ∆(l) + δ(l+1)(a(l))T # add in the bias for a2 ... but, not for a1 since it is the input layer #Theta2_grad = Theta2_grad + np.dot(delta_3, np.matrix(a2)) #Theta1_grad = Theta1_grad + np.dot(delta_2, np.matrix(a1)) Theta2_grad = Theta2_grad + np.dot(delta_3, np.reshape(a2, (1, a2.shape[0]))) Theta1_grad = Theta1_grad + np.dot(delta_2, np.reshape(a1, (1, a1.shape[0]))) Theta1_grad = Theta1_grad / m Theta2_grad = Theta2_grad / m # regularized gradient # add the term (lambda / m) .* Theta(layer) to each term # no need to regularize for the first layer Theta1_Reg = vectorize_theta(Theta1) Theta2_Reg = vectorize_theta(Theta2) Theta1_grad = Theta1_grad + ((lbd / m) * Theta1_Reg) Theta2_grad = Theta2_grad + ((lbd / m) * Theta2_Reg) # unroll the gradients grad = unroll_parameters(Theta1_grad, Theta2_grad) return (grad)
def sigmoid_gradient(z): # g'(z) = d/dz of g(z) = g(z) . (1 - g(z)) g = lr.sigmoid(z) g = g * (1 - g) return (np.matrix(g))
def test_result_shape(self): """ Test the function returns the right shape, and the bounds of the sigmoid function. """ result = sigmoid(self.theta, self.x) self.assertEqual(result.shape, self.expected_shape)
res = optimize.minimize(lr.compute_cost_and_grad, theta, (X, y), jac=True, method='TNC', options=options) # the fun property of `OptimizeResult` object returns # the value of costFunction at optimized theta cost = res.fun # the optimized theta is in the x property theta = res.x # Print theta to screen print('Cost at theta found by optimize.minimize: {:.3f}'.format(cost)) print('Expected cost (approx): 0.203\n'); print('theta:') print('\t[{:.3f}, {:.3f}, {:.3f}]'.format(*theta)) print('Expected theta (approx):\n\t[-25.161, 0.206, 0.201]') prob = lr.sigmoid(np.dot([1, 45, 85], theta)) print('For a student with scores 45 and 85,' 'we predict an admission probability of {:.3f}'.format(prob)) print('Expected value: 0.775 +/- 0.002\n') # Compute accuracy on our training set p = lr.predict(theta, X) print('Train Accuracy: {:.2f} %'.format(np.mean(p == y) * 100)) print('Expected accuracy (approx): 89.00 %')
def compute_cost(X, Y, W, b, Lambda): Z = np.dot(X, W) + b A = sigmoid(Z) cost = (-1 / M) * np.sum(Y * np.log(A) + (1 - Y) * np.log(1 - A)) regularisation_cost = (Lambda * np.sum(np.square(W))) / (2 * M) return cost + regularisation_cost
def cross_validation(y, x, k_fold, function_name, lambda_=0, max_iters=0, gamma=0,threshold=0.5, seed=1, print_=False): """return the loss of ridge regression.""" #rmse_tr = [] #rmse_te = [] losses_tr = [] losses_te = [] acc_te = [] acc_tr = [] k_indices = build_k_indices(y, k_fold, seed) initial_w = np.ones((x.shape[1]))*(-0.01) # get k'th subgroup in test, others in train for i in range(k_fold): x_tr = np.empty((0,x.shape[1])) y_tr = np.empty((0)) # test data is taken from k'th (i) group x_te = x[k_indices[i]] y_te = y[k_indices[i]] # all the other subgroups are in train data for j in range(k_fold): if j != i: x_tr = np.r_[(x_tr, x[k_indices[j]])] y_tr = np.r_[(y_tr, y[k_indices[j]])] # form data with polynomial degree #x_tr = build_poly(x_tr, degree) #x_te = build_poly(x_te, degree) # select function to execute f = get_function(function_name) if function_name == 'least_squares': w,loss = f(y_tr, x_tr) elif function_name == 'reg_logistic_regression': w,loss = f(y_tr, x_tr, lambda_, initial_w, max_iters, gamma) elif function_name == 'ridge_regression': w,loss = f(y_tr, x_tr, lambda_) else: w,loss = f(y_tr, x_tr, initial_w, max_iters, gamma, print_) # calculate the error for train and test data #rmse_tr.append(2*compute_mse(y_tr, x_tr, w)) #rmse_te.append(2*compute_mse(y_te, x_te, w)) # calculate predictions for train and test data y_tr_prb = x_tr.dot(w) y_te_prb = x_te.dot(w) if function_name == 'logistic_regression' or function_name == 'reg_logistic_regression': y_tr_prb = sigmoid(y_tr_prb) y_te_prb = sigmoid(y_te_prb) # calculate accuracy for train and test data y_tr_pr = probability_to_prediction(y_tr_prb,threshold) y_te_pr = probability_to_prediction(y_te_prb,threshold) y_te_real = probability_to_prediction(y_te,0.5) y_tr_real = probability_to_prediction(y_tr,0.5) # getting accuracy acc_tr.append(get_prediction_accuracy(y_tr_real, y_tr_pr)) acc_te.append(get_prediction_accuracy(y_te_real, y_te_pr)) return np.mean(acc_tr), np.mean(acc_te)
def classifyVector(inX, weights): prob = lr.sigmoid(sum(inX * weights)) if prob > 0.5: return 1.0 else: return 0.0
def test_result_values(self): """ Test the bounds of the result, [should be very small, 0.5, very close to 1] """ result = sigmoid(self.theta, self.x) self.assertLess(result[0], self.expected_answer[0]) self.assertEqual(result[1], self.expected_answer[1]) self.assertGreater(result[2], self.expected_answer[2])
def costFunctionReg(theta, X, y, lambda_): """ Compute cost and gradient for logistic regression with regularization. Parameters ---------- theta : array_like Logistic regression parameters. A vector with shape (n, ). n is the number of features including any intercept. If we have mapped our initial features into polynomial features, then n is the total number of polynomial features. X : array_like The data set with shape (m x n). m is the number of examples, and n is the number of features (after feature mapping). y : array_like The data labels. A vector with shape (m, ). lambda_ : float The regularization parameter. Returns ------- J : float The computed value for the regularized cost function. grad : array_like A vector of shape (n, ) which is the gradient of the cost function with respect to theta, at the current values of theta. Instructions ------------ Compute the cost `J` of a particular choice of theta. Compute the partial derivatives and set `grad` to the partial derivatives of the cost w.r.t. each parameter in theta. """ # Initialize some useful values m = y.size # number of training examples # You need to return the following variables correctly J = 0 grad = np.zeros(theta.shape) # ===================== YOUR CODE HERE ====================== h = sigmoid(theta.dot(X.T)) """ ## X : m row n feature(col). => matrix m * n dim. ## theta: n feature(col) 1 row. theta*X.T remember multiple matrix ## remember sigmoid(z) = 1 / (1 + e ^ -z) """ J = 1 / m * (- y.dot(np.log(h)) - (1 - y).dot(np.log(1 - h))) \ + lambda_ /(2 * m) * np.sum(theta[1:]**2) """ =============================================================================== ## J (cost function) = 1/m * (-y * log(sigmoid(h)) - ((1 - y) * log(1 - sigmoid(h)) ## + lambda_ /(2 * m) * np.sum(theta[1:]**2) ; lamda/(2*m) * sichma(theta[1:] ** 2) ## gradients = 1 / m * (h(x) - y) * X ## + lambda_ / m * np.array([0 if i == 0 else theta[i] for i in range(len(theta))]) ## np.array([0 if i == 0 else theta[i] for i in range(len(theta))]) ================================================================================""" grad = 1 / m * (h - y).dot(X) \ + lambda_ / m * np.array([0 if i == 0 else theta[i] for i in range(len(theta))]) # ============================================================= return J, grad
def test_sigmoid(): from logistic_regression import sigmoid logits = np.float32([[-3, 0, 3]]).T desired = np.float32([[0.04742587317756678, 0.5, 0.9525741268224334]]).T actual = sigmoid(logits) np.testing.assert_allclose(actual, desired, rtol=1e-3, atol=1e-3)
def test_sigmoid(): assert abs(lr.sigmoid(0) - 0.5) < 1e-8 assert abs(lr.sigmoid(-1e5)) < 1e-8 assert abs(lr.sigmoid(1e5) - 1) < 1e-8
cost = result.fun # scipy.optimizeを利用して指定回数ループして探した最小値のcostを表示 print('Cost at theta found by scipy.optimize.minimize: %f' % cost) # 決定境界の表示 plot_data(X_original, y, show=False) u = np.linspace(-1, 1.5, 50) v = np.linspace(-1, 1.5, 50) z = np.zeros((u.size, v.size)) for i in range(u.size): for j in range(v.size): z[i, j] = map_feature(u[i], v[j]).dot(theta) plot.contour(u, v, z.T, 0) plot.show() # 予測値の出力 (plot図から、試験結果は中央の0に近ければ合格率が高い) test_data = [[0.1, -0.1], [-0.7, 0.2], [0.8, -0.1], [1.0, -1.0]] for data in test_data: test1 = data[0] test2 = data[1] x = map_feature(np.array(test1), np.array(test2)) prob = sigmoid(np.array(x).dot(theta)) print('試験結果が{}と{}だったマイクロチップが品質保証に合格している確率は{}'.format( test1, test2, prob)) # 精度の表示 predictions = predict(theta, X) accuracy = 100 * np.mean(predictions == y) print('Train accuracy: %0.2f %%' % accuracy)