Ejemplo n.º 1
0
def wine_example():
    x, y = get_wine()
    x = x[(y == 0) | (y == 1)]
    y = y[(y == 0) | (y == 1)]

    _filter = Filter(FilterType.corr, 3)
    x_filter = _filter.fit_transform(x, y)
    print(x_filter.shape)

    _filter = Filter(FilterType.var, 3)
    x_filter = _filter.fit_transform(x, y)
    print(x_filter.shape)

    _filter = Filter(FilterType.entropy, 3)
    x_filter = _filter.fit_transform(x, y)
    print(x_filter.shape)

    embedded = Embedded(3, EmbeddedType.Lasso)
    x_embedded = embedded.fit_transform(x, y)
    print(x_embedded.shape) # lasso后稀疏到只有两个值非0,因此只输出了两个特征

    # GBDT暂时只支持离散特征
    embedded = Embedded(3, EmbeddedType.GBDT)
    x = np.random.choice([0, 1], 50).reshape(10, 5)
    y = np.random.rand(10)
    x_embedded = embedded.fit_transform(x, y)
    print(x_embedded, y)
Ejemplo n.º 2
0
def wine_example():
    x, y = get_wine()

    x = x[(y == 0) | (y == 1)]
    y = y[(y == 0) | (y == 1)]

    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)

    # 贝叶斯最小错误率
    bme = BayesMinimumError()
    bme.fit(x_train, y_train)
    print(bme.score(x_test, y_test))
    bme.classify_plot(x_test, y_test)

    # 贝叶斯最小风险,需要给定风险矩阵
    # 风险矩阵 [[0,100], [10,0]] 表示把0分为1(存伪)的损失为100,把1分为0(弃真)的损失为10
    bmr = BayesMinimumRisk(np.array([[0, 100], [10, 0]]))
    bmr.fit(x_train, y_train)
    bmr.predict(x_test)
    print(bmr.score(x_test, y_test))
    bmr.classify_plot(x_test, y_test)

    # 朴素贝叶斯
    nb = NaiveBayes()
    nb.fit(x_train, y_train)
    nb.predict(x_test)
    print(nb.score(x_test, y_test))
    nb.classify_plot(x_test, y_test)
Ejemplo n.º 3
0
def moon_example():
    """
    AdaBoost的例子,以月亮数据集为例
    :return:
    """
    x, y = get_wine()
    x = x[(y == 0) | (y == 1)]
    y = y[(y == 0) | (y == 1)]
    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)

    # 采用Logistic回归作为子分类器的AdaBoost
    ada = AdaBoost(classifier=ClassifierType.LR)
    ada.fit(x_train, y_train)
    print(ada.score(x_test, y_test))
    ada.classify_plot(x_test, y_test, ", LR")

    # 采用KNN作为子分类器的AdaBoost
    ada = AdaBoost(classifier=ClassifierType.KNN)
    ada.fit(x_train, y_train)
    print(ada.score(x_test, y_test))
    ada.classify_plot(x_test, y_test, ", KNN")

    # 采用CART树为子分类器的AdaBoost
    ada = AdaBoost(classifier=ClassifierType.CART)
    ada.fit(x_train, y_train)
    print(ada.score(x_test, y_test))
    ada.classify_plot(x_test, y_test, ", CART")
Ejemplo n.º 4
0
def watermelon_example():
    """
    GBDT的例子,以西瓜数据集为例
    GBDT暂时只支持回归操作,不支持分类
    :return:
    """
    x, y = get_watermelon()
    y = x[:, -1]  # y为连续标签
    x = x[:, :-1]  # x为离散标签
    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)

    gbdt = GBDT(learning_rate=1)
    gbdt.fit(x_train, y_train)
    print(gbdt.predict(x_test), y_test)
    print("R square: %.4f" % gbdt.score(x_test, y_test))

    x, y = get_wine()
    y = x[:, -1]  # y为连续标签
    x = x[:, :-1]  # x为离散标签
    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)

    gbdt = GBDT(learning_rate=1)
    gbdt.fit(x_train, y_train)
    print(gbdt.predict(x_test), y_test)
    print("R square: %.4f" % gbdt.score(x_test, y_test))
Ejemplo n.º 5
0
def random_forest_example():
    x, y = get_wine()

    # x = x[(y == 0) | (y == 1)]
    # y = y[(y == 0) | (y == 1)]

    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)

    rf = RandomForest(4, 50)
    rf.fit(x_train, y_train)
    print(rf.score(x_test, y_test))
    rf.classify_plot(x_test, y_test)
Ejemplo n.º 6
0
def AdaBoost_multi_class_example():
    x, y = get_wine()
    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)
    ada = AdaBoost(classifier=ClassifierType.LR)
    ada.fit(x_train, y_train)
    print(ada.predict(x_test))
    ada.classify_plot(x_test, y_test, ", LR")

    ada = AdaBoost(classifier=ClassifierType.CART)
    ada.fit(x_train, y_train)
    print(ada.predict(x_test))
    ada.classify_plot(x_test, y_test, ", CART")

    ada = AdaBoost(classifier=ClassifierType.KNN)
    ada.fit(x_train, y_train)
    print(ada.predict(x_test))
    ada.classify_plot(x_test, y_test, ", CART")
Ejemplo n.º 7
0
def wine_example():
    x, y = get_wine()

    #x = x[(y == 0) | (y == 1)]
    #y = y[(y == 0) | (y == 1)]

    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)

    cart = CART()
    cart.fit(x_train, y_train)
    print(cart.score(x_test, y_test))
    cart.classify_plot(x_test, y_test)

    y = x[:, -1]
    x = x[:, :-1]
    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)

    cart = CART()
    cart.fit(x_train, y_train)
    print(cart.score(x_test, y_test))
Ejemplo n.º 8
0
def wine_example():
    x, y = get_wine()
    x = x[(y == 0) | (y == 1)]
    y = y[(y == 0) | (y == 1)]
    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)

    logistic = BaseLogisticRegression(has_intercept=False)
    logistic.fit(x_train, y_train)
    print(logistic.w)
    logistic.classify_plot(x_test, y_test)
    logistic.auc_plot(x_test, y_test)

    lasso = Lasso()
    lasso.fit(x_train, y_train)
    print(lasso.w)
    lasso.classify_plot(x_test, y_test)

    ridge = Ridge()
    ridge.fit(x_train, y_train)
    print(ridge.w)
    ridge.classify_plot(x_test, y_test)