Example #1
0
def d_dimensional_comparison(beta_star, num_points, l):
    d = len(beta_star) - 1
    X_list = [np.random.uniform(0, 256, num_points) for _ in range(d)]
    X = np.column_stack(X_list)
    X = np.column_stack((np.ones(num_points), X))

    distances = np.array([xi.dot(beta_star) for xi in X])
    Y = [1 if sigmoid(dist) > random.random() else -1 for dist in distances]

    x1_pos, x2_pos = zip(*[xi[1:] for xi, yi in zip(X, Y) if yi == 1])
    x1_neg, x2_neg = zip(*[xi[1:] for xi, yi in zip(X, Y) if yi == -1])

    plt.scatter(x1_pos, x2_pos, marker='+', color='red')
    plt.scatter(x1_neg, x2_neg, marker='o', color='blue')
    x2_star = calculate_x2s(np.column_stack([np.ones(256),
                                             np.arange(256)]), beta_star)
    beta_hat = gradient_descent(X,
                                Y,
                                l=l,
                                epsilon=1e-8,
                                step_size=1e-2,
                                max_steps=100)
    x2_hat = calculate_x2s(np.column_stack([np.ones(256),
                                            np.arange(256)]), beta_hat)
    plt.plot(np.arange(256), x2_star, color='purple', label='true boundary')
    plt.plot(np.arange(256), x2_hat, color='green', label='predicted boundary')
    plt.legend()
    plt.show()
Example #2
0
def cost():
    data = pd.read_csv('student_score.txt',
                       names=['Exam1', 'Exam2', 'admission'])
    x = data[['Exam1', 'Exam2']]
    y = data['admission']
    w = np.zeros(3)
    x = np.hstack((np.ones((y.size, 1)), x))
    p = sigmoid(x.dot(w))
    cost = -np.mean(y * np.log(p) + (1 - y) * np.log(1 - p))
    print('Cost %.3f when w is zero' % cost)
Example #3
0
def predict_nn(Theta1, Theta2, X):
    p = X.shape[0]
    num_labels = Theta2.shape[0]

    X = np.concatenate((np.ones((p, 1)), X), axis=1)
    prob = lr.sigmoid(np.dot(X, Theta1.T))
    t = prob.shape[0]
    prob = np.concatenate((np.ones((t, 1)), prob), axis=1)
    prob = lr.sigmoid(np.dot(prob, Theta2.T))

    pred = np.zeros((p, 1))  # our prediction vector

    # step thru each digit in the training set, and get the column with highest probability
    # the column number corresponds to the digit ... with 0 as column 10
    # note that 1 is in the 0th column, 2 in the 1st, and 0 and in the 9th [[ 0 ... 9]
    for i in range(p):
        col = prob[i, :].argmax()
        if (col == 9): pred[i] = 0
        else: pred[i] = col + 1
    return (pred)
Example #4
0
def stocGradAscent0(dataMatrix, classLabels):
    m, n = shape(dataMatrix)
    alpha = 0.5
    weights = ones(n)  #initialize to all ones
    weightsHistory = zeros((500 * m, n))
    for j in range(500):
        for i in range(m):
            h = logistic_regression.sigmoid(sum(dataMatrix[i] * weights))
            error = classLabels[i] - h
            weights = weights + alpha * error * dataMatrix[i]
            weightsHistory[j * m + i, :] = weights
    return weightsHistory
Example #5
0
def predict_one_vs_all(theta, X):
    (p, q) = X.shape
    num_labels = theta.shape[0]

    X = np.concatenate((np.ones((p, 1)), X), axis=1)  # add in the 1-vector
    prob = lr.sigmoid(np.dot(X, theta.T))
    pred = np.zeros((p, 1))
    for i in range(p):
        col = prob[i, :].argmax()
        if (col): pred[i] = col
        else: pred[i] = 10
    return (pred)
def compute_cost_reg_nn(nn_params, input_layer_size, hidden_layer_size,
                        num_labels, X, y, lbd):
    m = X.shape[0]
    (Theta1, Theta2) = roll_parameters(nn_params, hidden_layer_size,
                                       input_layer_size, num_labels)

    # compute the hypothesis
    X1 = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)
    ax = lr.sigmoid(np.dot(X1, Theta1.T))

    # prefix a0, and then repeat
    # the two steps if there are additional hidden layers
    ax = np.concatenate((np.ones((ax.shape[0], 1)), ax), axis=1)
    ax = lr.sigmoid(np.dot(ax, Theta2.T))

    # now we're at the final / output layer
    # convert y to a collection of vectors... in the oneVsAll format
    # each vector has a 1 for its classification, and a 0 for all else
    y_all = np.zeros((m, num_labels))
    for i in np.arange(1, num_labels + 1):
        pos = np.where(y == i)[0]
        if (i == 10): y_all[pos, 9] = 1
        else: y_all[pos, i - 1] = 1

    # calculate the cost function, by retaining the classified value, and
    # discarding all the rest.... element-wise multiplication by 1s and 0s will ensure that
    J = sum(sum((-y_all * np.log(ax)) - ((1 - y_all) * np.log(1 - ax)))) / m

    # compute the regularization
    Theta1_Reg = vectorize_theta(Theta1)
    Theta2_Reg = vectorize_theta(Theta2)
    reg = (lbd / (2 * m)) * (sum(sum(Theta1_Reg * Theta1_Reg)) +
                             sum(sum(Theta2_Reg * Theta2_Reg)))

    # update the cost function
    J = J + reg
    return (J)
Example #7
0
 def _cal_residual(self, f_values, targets):
     '''
     计算残差
     f_values 当前所有树对所有训练样本的预测值
              对于回归任务和二分类任务,f_values为一个长度为训练样本数的一维array
              对于多分类任务,f_values为一个二维array,第一维为训练样本数, 第二维为类别数,
     '''
     if self.predict_type == 'classification' and self.class_count > 2:  # 多分类任务一次需要构建多棵树,每棵树拟合一个类别
         p_hat = softmax(f_values)
         return targets - p_hat
     elif self.predict_type == 'classification':
         p_hat = sigmoid(f_values)  # 计算预测为类别1的概率
         return targets - p_hat
     else:
         return targets - f_values
Example #8
0
def stocGradAscent1(dataMatrix, classLabels):
    dataMatrix = array(dataMatrix)
    classLabels = array(classLabels)
    m, n = shape(dataMatrix)
    alpha = 0.4
    weights = ones(n)  #initialize to all ones
    weightsHistory = zeros((40 * m, n))
    for j in range(40):
        dataIndex = list(range(m))
        for i in range(m):
            alpha = 4 / (1.0 + j + i) + 0.01
            randIndex = int(random.uniform(0, len(dataIndex)))
            h = logistic_regression.sigmoid(
                sum(dataMatrix[randIndex] * weights))
            error = classLabels[randIndex].astype('float64') - h.astype(
                'float64')  # .astype() 变量类型转换
            #print error
            weights = weights + alpha * error * dataMatrix[randIndex]
            weightsHistory[j * m + i, :] = weights
            del (dataIndex[randIndex])
    print(weights)
    return weightsHistory
def forward_propagation(layer_coefficients, input_data):
    """
    Calculate neural network output based on input data and layer coefficients.
    Forward propagation algorithm.

    :param layer_coefficients: 1 x (L - 1) array of layer coefficients vectors, where L - layers count
    :param input_data: S0 x m input layer vector, where S0 - input layer units count, m - experiments count
    :return: 1 x l vector of layer activation vectors Sl x m, where Sl - l'th layer units count,
             m - experiments count
    """
    data = [input_data]  # S0 x m

    for theta in layer_coefficients:
        data.append(
            sigmoid(
                np.dot(
                    theta,  # Sl x (S[l-1] + 1)
                    np.vstack(([np.ones(data[-1].shape[1])],
                               data[-1]))  # (S[l-1] + 1) x m
                ))  # Sl x m
        )

    return data
Example #10
0
print("Calculated GD = \n", grad)

# Compute and display cost and gradient with non-zero theta
test_theta = np.array([[-24], [0.2], [0.2]])
J = lr.compute_cost(test_theta, X1, y)
grad = lr.gradient_descent(test_theta, X1, y)

print('Cost at test theta: {:7.3f}'.format(J))  # ans = 0.218
print('Gradient at test theta: \n', grad)  # ans = [[0.043], [2.566], [2.647]]

# overlay the decision boundary on the data
# but, first compute the optimized theta for global min :: ans = [[-25.161], [0.206], [0.201]]
theta = lr.optimizer_func(initial_theta, X1, y)
print('Computed theta: ', theta)
theta = np.vstack(theta)

# now compute the decision boundary
lr.decision_boundary(theta, X1, y)

# test the model by running a prediction  :: ans = 0.775 +/- 0.002
# for a student with score 45 on exam 1 and score 85 on exam 2
X_test = np.array([1, 45, 85])
prob = lr.sigmoid(np.dot(X_test, theta))
print("Probability of student with scores {} getting admitted = {}".format(
    X_test[[1, 2]], prob))

# calculate the overall accuracy of our model :: ans = 89.0
p = lr.predict(theta, X1)
accuracy = np.sum(np.equal(p, y)) / m
print("Accuracy of the model = {:7.3f}%".format(accuracy * 100))
def gradient_descent_nn(nn_params, input_layer_size, hidden_layer_size,
                        num_labels, X, y, lbd):
    # an now, onto back propagation
    # but, first lets compute gradient using forward propagation

    m = X.shape[0]
    X1 = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)
    (Theta1, Theta2) = roll_parameters(nn_params, hidden_layer_size,
                                       input_layer_size, num_labels)

    Theta1_grad = np.zeros(Theta1.shape)
    Theta2_grad = np.zeros(Theta2.shape)

    for t in range(m):
        # step #1 :: perform forward propagation, stepping thru each layer
        a1 = X1[t, :]  # X1 already has X0 (the ones column) added

        z2 = np.dot(a1, Theta1.T)
        a2 = lr.sigmoid(z2)

        a2 = np.insert(a2, 0, 1)  # insert a 1 at the beginning of the np.array
        z3 = np.dot(a2, Theta2.T)
        a3 = lr.sigmoid(z3)

        # step #2 :: calculate the resultant error
        ytemp = y[t, :]  # save the labels
        y3 = np.zeros(
            (1, num_labels))  # create a column of labels for oneVsAll

        if (ytemp[0] < 10):
            y3[0, ytemp[0] -
               1] = 1  # update the appropriate column based on the label
        else:
            y3[0, 9] = 1  # account for '0' is represented as 10

        delta_3 = a3 - y3  # compute the error at the final layer
        delta_3 = delta_3.T  # transform into a vector

        # step #3 :: propagate back, and calculate the gradient error
        # δ(2) =  Θ(2) T δ(3). ∗ g′(z(2))
        # don't have to worry about delta_1 since that's the input layer... no error
        # transform the output of the sigmoidGradient into a column vector
        # Add the bias node to z2, while calculating the gradient.
        delta_2 = np.dot(Theta2.T, delta_3)
        delta_2 = delta_2 * sigmoid_gradient(np.insert(z2, 0, 1))
        # strip out the delta for the 0th unit... we had added that unit in
        delta_2 = delta_2[1:, 0]

        # step #4 :: accumulate the gradient
        # ∆(l) = ∆(l) + δ(l+1)(a(l))T
        # add in the bias for a2 ... but, not for a1 since it is the input layer
        #Theta2_grad = Theta2_grad + np.dot(delta_3, np.matrix(a2))
        #Theta1_grad = Theta1_grad + np.dot(delta_2, np.matrix(a1))

        Theta2_grad = Theta2_grad + np.dot(delta_3,
                                           np.reshape(a2, (1, a2.shape[0])))
        Theta1_grad = Theta1_grad + np.dot(delta_2,
                                           np.reshape(a1, (1, a1.shape[0])))

    Theta1_grad = Theta1_grad / m
    Theta2_grad = Theta2_grad / m

    # regularized gradient
    # add the term (lambda / m) .* Theta(layer) to each term
    # no need to regularize for the first layer
    Theta1_Reg = vectorize_theta(Theta1)
    Theta2_Reg = vectorize_theta(Theta2)
    Theta1_grad = Theta1_grad + ((lbd / m) * Theta1_Reg)
    Theta2_grad = Theta2_grad + ((lbd / m) * Theta2_Reg)

    # unroll the gradients
    grad = unroll_parameters(Theta1_grad, Theta2_grad)
    return (grad)
def sigmoid_gradient(z):
    # g'(z) = d/dz of g(z) = g(z) . (1 - g(z))
    g = lr.sigmoid(z)
    g = g * (1 - g)
    return (np.matrix(g))
 def test_result_shape(self):
     """ Test the function returns the right shape, and the bounds of the sigmoid function. """
     result = sigmoid(self.theta, self.x)
     self.assertEqual(result.shape, self.expected_shape)
Example #14
0
res = optimize.minimize(lr.compute_cost_and_grad,
                        theta,
                        (X, y),
                        jac=True,
                        method='TNC',
                        options=options)

# the fun property of `OptimizeResult` object returns
# the value of costFunction at optimized theta
cost = res.fun

# the optimized theta is in the x property
theta = res.x

# Print theta to screen
print('Cost at theta found by optimize.minimize: {:.3f}'.format(cost))
print('Expected cost (approx): 0.203\n');

print('theta:')
print('\t[{:.3f}, {:.3f}, {:.3f}]'.format(*theta))
print('Expected theta (approx):\n\t[-25.161, 0.206, 0.201]')

prob = lr.sigmoid(np.dot([1, 45, 85], theta))
print('For a student with scores 45 and 85,'
      'we predict an admission probability of {:.3f}'.format(prob))
print('Expected value: 0.775 +/- 0.002\n')

# Compute accuracy on our training set
p = lr.predict(theta, X)
print('Train Accuracy: {:.2f} %'.format(np.mean(p == y) * 100))
print('Expected accuracy (approx): 89.00 %')
Example #15
0
def compute_cost(X, Y, W, b, Lambda):
    Z = np.dot(X, W) + b
    A = sigmoid(Z)
    cost = (-1 / M) * np.sum(Y * np.log(A) + (1 - Y) * np.log(1 - A))
    regularisation_cost = (Lambda * np.sum(np.square(W))) / (2 * M)
    return cost + regularisation_cost
Example #16
0
def cross_validation(y, x, k_fold, function_name, lambda_=0, max_iters=0, gamma=0,threshold=0.5, seed=1, print_=False):
    """return the loss of ridge regression."""

    #rmse_tr = []
    #rmse_te = []
    losses_tr = []
    losses_te = []
    acc_te = []
    acc_tr = []
    
    k_indices = build_k_indices(y, k_fold, seed)
    
    initial_w = np.ones((x.shape[1]))*(-0.01)
        
    # get k'th subgroup in test, others in train
    for i in range(k_fold):
        x_tr = np.empty((0,x.shape[1]))
        y_tr = np.empty((0))
    
        # test data is taken from k'th (i) group
        x_te = x[k_indices[i]]
        y_te = y[k_indices[i]]
        
        # all the other subgroups are in train data
        for j in range(k_fold):
            if j != i:
                x_tr = np.r_[(x_tr, x[k_indices[j]])]
                y_tr = np.r_[(y_tr, y[k_indices[j]])]
                        
        # form data with polynomial degree
        #x_tr = build_poly(x_tr, degree)
        #x_te = build_poly(x_te, degree)
    
        # select function to execute
        f = get_function(function_name)
        
        if function_name == 'least_squares':
            w,loss = f(y_tr, x_tr)
        elif function_name == 'reg_logistic_regression':
            w,loss = f(y_tr, x_tr, lambda_, initial_w, max_iters, gamma)
        elif function_name == 'ridge_regression':
            w,loss = f(y_tr, x_tr, lambda_)
        else:
            w,loss = f(y_tr, x_tr, initial_w, max_iters, gamma, print_)
    
        # calculate the error for train and test data
        #rmse_tr.append(2*compute_mse(y_tr, x_tr, w))
        #rmse_te.append(2*compute_mse(y_te, x_te, w))
        
        # calculate predictions for train and test data
        y_tr_prb = x_tr.dot(w)
        y_te_prb = x_te.dot(w)
        
        if function_name == 'logistic_regression' or function_name == 'reg_logistic_regression':
            y_tr_prb = sigmoid(y_tr_prb)
            y_te_prb = sigmoid(y_te_prb)
            
        
        # calculate accuracy for train and test data
        y_tr_pr = probability_to_prediction(y_tr_prb,threshold)
        y_te_pr = probability_to_prediction(y_te_prb,threshold)
        y_te_real = probability_to_prediction(y_te,0.5)
        y_tr_real = probability_to_prediction(y_tr,0.5)
        
        # getting accuracy
        acc_tr.append(get_prediction_accuracy(y_tr_real, y_tr_pr))
        acc_te.append(get_prediction_accuracy(y_te_real, y_te_pr))
        
    return np.mean(acc_tr), np.mean(acc_te)
Example #17
0
def classifyVector(inX, weights):
    prob = lr.sigmoid(sum(inX * weights))
    if prob > 0.5:
        return 1.0
    else:
        return 0.0
 def test_result_values(self):
     """ Test the bounds of the result, [should be very small, 0.5, very close to 1] """
     result = sigmoid(self.theta, self.x)
     self.assertLess(result[0], self.expected_answer[0])
     self.assertEqual(result[1], self.expected_answer[1])
     self.assertGreater(result[2], self.expected_answer[2])
 def test_result_shape(self):
     """ Test the function returns the right shape, and the bounds of the sigmoid function. """
     result = sigmoid(self.theta, self.x)
     self.assertEqual(result.shape, self.expected_shape)
def costFunctionReg(theta, X, y, lambda_):
    """
    Compute cost and gradient for logistic regression with regularization.
    
    Parameters
    ----------
    theta : array_like
        Logistic regression parameters. A vector with shape (n, ). n is 
        the number of features including any intercept. If we have mapped
        our initial features into polynomial features, then n is the total 
        number of polynomial features. 
    
    X : array_like
        The data set with shape (m x n). m is the number of examples, and
        n is the number of features (after feature mapping).
    
    y : array_like
        The data labels. A vector with shape (m, ).
    
    lambda_ : float
        The regularization parameter. 
    
    Returns
    -------
    J : float
        The computed value for the regularized cost function. 
    
    grad : array_like
        A vector of shape (n, ) which is the gradient of the cost
        function with respect to theta, at the current values of theta.
    
    Instructions
    ------------
    Compute the cost `J` of a particular choice of theta.
    Compute the partial derivatives and set `grad` to the partial
    derivatives of the cost w.r.t. each parameter in theta.
    """
    # Initialize some useful values
    m = y.size  # number of training examples

    # You need to return the following variables correctly
    J = 0
    grad = np.zeros(theta.shape)

    # ===================== YOUR CODE HERE ======================
    h = sigmoid(theta.dot(X.T))
    """ 
    ##  X : m row n feature(col). => matrix m * n dim. 
    ##  theta: n feature(col) 1 row. theta*X.T remember multiple matrix
    ##  remember sigmoid(z) = 1 / (1 + e ^ -z)
    """
    J = 1 / m * (- y.dot(np.log(h)) - (1 - y).dot(np.log(1 - h))) \
        + lambda_ /(2 * m) * np.sum(theta[1:]**2)
    """ ===============================================================================
    ## J (cost function) = 1/m * (-y * log(sigmoid(h)) - ((1 - y) * log(1 - sigmoid(h)) 
    ##  + lambda_ /(2 * m) * np.sum(theta[1:]**2) ; lamda/(2*m) * sichma(theta[1:] ** 2)
    ## gradients = 1 / m * (h(x) - y) * X
    ## + lambda_ / m * np.array([0 if i == 0 else theta[i] for i in range(len(theta))])
    ## np.array([0 if i == 0 else theta[i] for i in range(len(theta))])
    ================================================================================"""
    grad =  1 / m * (h - y).dot(X) \
        + lambda_ / m * np.array([0 if i == 0 else theta[i] for i in range(len(theta))])
    # =============================================================
    return J, grad
Example #21
0
def test_sigmoid():
    from logistic_regression import sigmoid
    logits = np.float32([[-3, 0, 3]]).T
    desired = np.float32([[0.04742587317756678, 0.5, 0.9525741268224334]]).T
    actual = sigmoid(logits)
    np.testing.assert_allclose(actual, desired, rtol=1e-3, atol=1e-3)
Example #22
0
def test_sigmoid():
    assert abs(lr.sigmoid(0) - 0.5) < 1e-8
    assert abs(lr.sigmoid(-1e5)) < 1e-8
    assert abs(lr.sigmoid(1e5) - 1) < 1e-8
Example #23
0
    cost = result.fun

    # scipy.optimizeを利用して指定回数ループして探した最小値のcostを表示
    print('Cost at theta found by scipy.optimize.minimize: %f' % cost)

    # 決定境界の表示
    plot_data(X_original, y, show=False)
    u = np.linspace(-1, 1.5, 50)
    v = np.linspace(-1, 1.5, 50)
    z = np.zeros((u.size, v.size))
    for i in range(u.size):
        for j in range(v.size):
            z[i, j] = map_feature(u[i], v[j]).dot(theta)
    plot.contour(u, v, z.T, 0)
    plot.show()

    # 予測値の出力 (plot図から、試験結果は中央の0に近ければ合格率が高い)
    test_data = [[0.1, -0.1], [-0.7, 0.2], [0.8, -0.1], [1.0, -1.0]]
    for data in test_data:
        test1 = data[0]
        test2 = data[1]
        x = map_feature(np.array(test1), np.array(test2))
        prob = sigmoid(np.array(x).dot(theta))
        print('試験結果が{}と{}だったマイクロチップが品質保証に合格している確率は{}'.format(
            test1, test2, prob))

    # 精度の表示
    predictions = predict(theta, X)
    accuracy = 100 * np.mean(predictions == y)
    print('Train accuracy: %0.2f %%' % accuracy)
 def test_result_values(self):
     """ Test the bounds of the result, [should be very small, 0.5, very close to 1] """
     result = sigmoid(self.theta, self.x)
     self.assertLess(result[0], self.expected_answer[0])
     self.assertEqual(result[1], self.expected_answer[1])
     self.assertGreater(result[2], self.expected_answer[2])