Example #1
0
def __compute_cost(theta, feat, label):
    """计算代价函数.

  Args:
    theta: 参数, 维度(特征数).
    feat: 特征, 维度(样本数, 特征数).
    label: 标签, 维度(样本数).

  Returns:
    代价函数数值.
  """
    hypothesis = sigmoid(np.dot(feat, theta))
    cost = np.sum(-np.multiply(label, np.log(hypothesis)) -
                  np.multiply(1 - label, np.log(1 - hypothesis))) / len(feat)
    return cost
Example #2
0
def __compute_grad(theta, feat, label):
    """计算梯度.

  Args:
    theta: 参数, 维度(特征数).
    feat: 特征, 维度(样本数, 特征数).
    label: 标签, 维度(样本数).

  Returns:
    梯度数值.
  """
    hypothesis = sigmoid(np.dot(feat, theta))
    grad = np.sum(np.multiply(
        np.tile(hypothesis - label, (feat.shape[1], 1)).T, feat),
                  axis=0) / len(feat)
    return grad
Example #3
0
def __compute_cost(theta, feat, label, lambda_factor):
  """计算代价函数.

  Args:
    theta: 参数, 维度(特征数).
    feat: 特征, 维度(样本数, 特征数).
    label: 标签, 维度(样本数).
    lambda_factor: 正则化因子.

  Returns:
    代价函数数值.
  """
  num = len(feat)
  hypothesis = sigmoid(np.dot(feat, theta))
  cost = np.sum(-np.multiply(label, np.log(hypothesis)) -
                np.multiply(1 - label, np.log(1 - hypothesis))) / num
  penalty = lambda_factor / (2 * num) * np.sum(theta[1:])
  return cost + penalty
Example #4
0
def __compute_grad(theta, feat, label, lambda_factor):
  """计算梯度.

  Args:
    theta: 参数, 维度(特征数).
    feat: 特征, 维度(样本数, 特征数).
    label: 标签, 维度(样本数).
    lambda_factor: 正则化因子.

  Returns:
    梯度数值.
  """
  num = len(feat)
  hypothesis = sigmoid(np.dot(feat, theta))
  grad = np.sum(np.multiply(np.tile(hypothesis - label, (feat.shape[1], 1)).T,
                            feat), axis=0) / num
  penalty = np.zeros(len(theta))
  penalty[1:] = lambda_factor / num * theta[1:]
  return grad + penalty
Example #5
0
def __cmd():
    """命令行函数."""
    feat, label = load_txt(Path(__file__).parent / 'data1.txt')
    num = len(feat)  # 样本数

    # 样本作图.
    ax = __plot_data(feat, label)

    # 梯度下降.
    init_theta = np.zeros(3)
    feat = np.concatenate([np.ones((num, 1)), feat], axis=-1)  # 增加全为1的第0列.
    optimal = minimize(__compute_cost,
                       init_theta,
                       args=(feat, label),
                       method='TNC',
                       jac=__compute_grad)
    best_theta = optimal['x']
    logging.info(f'梯度下降得到的最优参数: [{best_theta[0]:.5f} '
                 f'{best_theta[1]:.5f} {best_theta[2]:.5f}].')

    # 绘出决策边界, sigmoid(theta * feature) = 0.5的直线, 即theta * feature = 0的直线.
    boundary_x = np.array([min(feat[:, 1]) - 2, max(feat[:, 2]) + 2])
    boundary_y = -1 / best_theta[2] * (boundary_x * best_theta[1] +
                                       best_theta[0])
    ax.plot(boundary_x, boundary_y, label='Decision Boundary')
    handles, labels = ax.get_legend_handles_labels()
    handles = [handles[0], handles[2], handles[1]]
    labels = [labels[0], labels[2], labels[1]]
    ax.legend(handles, labels)
    plt.show()

    # 预测.
    prob = sigmoid(np.dot(np.array([[1, 45, 85]]), best_theta))[0]
    logging.info(f'科目1分数为45, 科目2分数为85时, 入学概率为: {prob:.3f}.')

    # 训练集准确率.
    prediction = predict(best_theta, feat)
    acc = np.sum(prediction == label) / num
    logging.info(f'训练集准确率为: {acc * 100:.0f}%.')