Exemple #1
0
def main():
    data = readxls.excel_table_byname(
        "/Users/JJjie/Desktop/www/Mechine_Learning/dataset/西瓜3.0.xlsx", 0,
        "Sheet1")
    y = data[3]
    x = mat(data[0:2])
    u = mat(zeros((2, 2)))

    # 计算均值
    for i in range(17):
        index = int(y[i])
        u[:, index] = u[:, index] + x[:, i]
    u[:, 0] /= 8
    u[:, 1] /= 9

    # 计算两类协方差矩阵和
    sw = zeros((2, 2))
    for i in range(17):
        index = int(y[i])
        temp = (x[:, i] - u[:, index])
        sw = sw + temp * temp.T

    # 求逆
    # U, S, V = linalg.svd(sw) # 奇异值分解
    # V / S * U.T 为逆
    B = linalg.inv(sw)
    w = B * (u[:, 0] - u[:, 1])

    # 绘图
    plt.title("LDA")
    plt.xlabel("Denisty")
    plt.ylabel("Sguar content")

    x1 = []
    y1 = []
    x2 = []
    y2 = []
    index = 0
    for i in data[3]:
        if i == 1.0:
            x1.append(data[0][index])
            y1.append(data[1][index])
        else:
            x2.append(data[0][index])
            y2.append(data[1][index])
        index += 1

    plt.plot(x1, y1, 'ro', label="Good")
    plt.plot(x2, y2, 'og', label="Bad")

    W = w.T.A[0]
    pl = -(0.2 * W[0] - 0.01) / W[1]
    pr = -(0.8 * W[0] - 0.01) / W[1]

    plt.plot([0.2, 0.8], [pl, pr])

    plt.legend()
    plt.show()
Exemple #2
0
def main():
    # 数据准备
    data = readxls.excel_table_byname(
        "/Users/JJjie/Desktop/www/Mechine_Learning/dataset/西瓜3.0.xlsx", 0,
        "Sheet1")
    x = mat(data[0:3]).T
    y = mat(data[3]).T
    b = logarithmic_regression(x, y, 2)

    draw(data, b, data[3], "Logarithmic regression", "Denisty",
         "Sguar content")
    while (1):
        cur_l = 0
        bx = np.zeros((17, 1))
        bx = np.dot(b.T, k)
        cur_l = sum((-y * bx[0][:17]) + np.log(1 + np.exp(bx[0][:17])))

        if cur_l - old_l < 0.001:
            break

        n += 1
        old_l = cur_l
        p1 = np.zeros((17, 1))
        dl = 0
        d2l = 0

        for i in range(17):
            p1[i] = 1 - 1 / (1 + np.exp(bx[0][i]))
            dl -= k[:, i] * (y[i] - p1[i])
            d2l += np.dot(k[:, i], k[:, i].T) * p1[i] * (1 - p1[i])

        b = b - d2l / dl


if __name__ == '__main__':
    data = readxls.excel_table_byname(
        "/Users/JJjie/Desktop/www/Mechine_Learning/dataset/西瓜3.0.xlsx", 0,
        "Sheet1")
    x = np.array(data[0:2])
    y = np.array(data[3])

    run(x, y)
# @Author  : UNE
# @Project : Mechine_learning
# @File    : K_means_pro.py
# @Software: PyCharm
# 《机器学习》(周志华)第九章9.10
"""
实现一种能自动确定聚类数的改进k均值算法,编程实现并在西瓜数据集上运行。
"""

from tool import readxls
import numpy as np
import matplotlib.pyplot as plt

if __name__ == '__main__':
    data = readxls.excel_table_byname(
        "/Users/JJjie/Desktop/Projects/Mechine_Learning/dataset/西瓜4.xlsx", 0,
        "Sheet1")
    data = np.array(data)
    (m, n) = data.shape

    old_ts = 100  # 当前最低的平方误差,初始设置为一个很大的值
    old_c = 0
    old_nums = 0

    for k in range(2, 10):
        u = data[np.random.randint(30, size=k), :]  # 产生随机均值

        while 1:
            c = np.zeros((k, 30), dtype="int64")  # 将各类集合清空
            nums = np.zeros((k, 1), dtype="int64")
            # 对所有样本遍历,选择最近集合
Exemple #5
0
# @Author  : UNE
# @Site    :
# @File    : AdaBoosw.py
# @Software: PyCharm
# 《机器学习》(周志华)第八章8.3
"""
编程实现AdaBoosw,以不剪枝决策树为基学习器,在西瓜数据集3.0å上训练一个AdaBoosw集成,并于图8.4作比较
"""

from tool import readxls
import numpy as np
import pandas as pd
from dTree import dTree

if __name__ == '__main__':
    data = readxls.excel_table_byname(
        "/Users/JJjie/Desktop/Projects/dataset/西瓜3.xlsx", 0, "Sheet1")
    x = pd.DataFrame(data[6:8])
    y = pd.DataFrame(data[8])
    y = y.T
    y_index = y - 1
    y = -2 * y + 3  # 将y映射到1,-1

    try:  # 一维数组的情况
        m, n = y.shape
    except:
        m = 1
        n = len(y)

    set = np.arange(0, n)
    sy = np.zeros((1, 17))  # 记录累积分类器的分类
    sw = np.ones((1, 17)) / 17  # 样本的权值,初始相同