Exemple #1
0
def watermelon_example():
    """
    GBDT的例子,以西瓜数据集为例
    GBDT暂时只支持回归操作,不支持分类
    :return:
    """
    x, y = get_watermelon()
    y = x[:, -1]  # y为连续标签
    x = x[:, :-1]  # x为离散标签
    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)

    gbdt = GBDT(learning_rate=1)
    gbdt.fit(x_train, y_train)
    print(gbdt.predict(x_test), y_test)
    print("R square: %.4f" % gbdt.score(x_test, y_test))

    x, y = get_wine()
    y = x[:, -1]  # y为连续标签
    x = x[:, :-1]  # x为离散标签
    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)

    gbdt = GBDT(learning_rate=1)
    gbdt.fit(x_train, y_train)
    print(gbdt.predict(x_test), y_test)
    print("R square: %.4f" % gbdt.score(x_test, y_test))
Exemple #2
0
def moon_example():
    """
    AdaBoost的例子,以月亮数据集为例
    :return:
    """
    x, y = get_wine()
    x = x[(y == 0) | (y == 1)]
    y = y[(y == 0) | (y == 1)]
    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)

    # 采用Logistic回归作为子分类器的AdaBoost
    ada = AdaBoost(classifier=ClassifierType.LR)
    ada.fit(x_train, y_train)
    print(ada.score(x_test, y_test))
    ada.classify_plot(x_test, y_test, ", LR")

    # 采用KNN作为子分类器的AdaBoost
    ada = AdaBoost(classifier=ClassifierType.KNN)
    ada.fit(x_train, y_train)
    print(ada.score(x_test, y_test))
    ada.classify_plot(x_test, y_test, ", KNN")

    # 采用CART树为子分类器的AdaBoost
    ada = AdaBoost(classifier=ClassifierType.CART)
    ada.fit(x_train, y_train)
    print(ada.score(x_test, y_test))
    ada.classify_plot(x_test, y_test, ", CART")
Exemple #3
0
def iris_example():
    x, y = get_iris()
    x = x[(y == 0) | (y == 1)]
    y = y[(y == 0) | (y == 1)]
    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)

    logistic = LogisticRegression()
    logistic.fit(x_train, y_train)
    print(logistic.w)
    logistic.predict(x_test)
    logistic.score(x_test, y_test)
    logistic.classify_plot(x_test, y_test)
    logistic.auc_plot(x_test, y_test)

    lasso = Lasso()
    lasso.fit(x_train, y_train)
    print(lasso.w)
    lasso.predict(x_test)
    lasso.score(x_test, y_test)
    lasso.classify_plot(x_test, y_test)

    ridge = Ridge()
    ridge.fit(x_train, y_train)
    print(ridge.w)
    ridge.predict(x_test)
    ridge.score(x_test, y_test)
    ridge.classify_plot(x_test, y_test)
Exemple #4
0
def ID3_example():
    x, y = get_watermelon()
    x = x[:, :4]
    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)
    id3 = ID3()
    id3.fit(x_train, y_train)
    print(id3.score(x_test, y_test))
def multi_svm_example():
    x, y = get_wine()  # get moon()
    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)
    svm = SVM(kernel=KernelType.polynomial, d=2)
    svm.fit(x_train, y_train)
    y_predict = svm.predict(x_test)
    print(y_predict, list(y_test))
Exemple #6
0
def wine_example():
    x, y = get_wine()

    x = x[(y == 0) | (y == 1)]
    y = y[(y == 0) | (y == 1)]

    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)

    # 贝叶斯最小错误率
    bme = BayesMinimumError()
    bme.fit(x_train, y_train)
    print(bme.score(x_test, y_test))
    bme.classify_plot(x_test, y_test)

    # 贝叶斯最小风险,需要给定风险矩阵
    # 风险矩阵 [[0,100], [10,0]] 表示把0分为1(存伪)的损失为100,把1分为0(弃真)的损失为10
    bmr = BayesMinimumRisk(np.array([[0, 100], [10, 0]]))
    bmr.fit(x_train, y_train)
    bmr.predict(x_test)
    print(bmr.score(x_test, y_test))
    bmr.classify_plot(x_test, y_test)

    # 朴素贝叶斯
    nb = NaiveBayes()
    nb.fit(x_train, y_train)
    nb.predict(x_test)
    print(nb.score(x_test, y_test))
    nb.classify_plot(x_test, y_test)
def iris_svm_example():
    x, y = get_iris()
    # x = x[(y == 0) | (y == 1)]
    # y = y[(y == 0) | (y == 1)]
    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)
    svm = SVM(c=0.1)
    svm.fit(x_train, y_train)
    print(svm.predict(x_test), y_test)
Exemple #8
0
def wine_example():
    x, y = get_wine()
    # knn可以解决多分类问题
    # x = x[(y == 0) | (y == 1)]
    # y = y[(y == 0) | (y == 1)]
    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)

    knn = KNN()
    knn.fit(x_train, y_train)
    print(knn.score(x_test, y_test))
    knn.classify_plot(x_test, y_test)
def iris_example():
    x, y = sc.get_iris()
    y = x[:, 0]
    x = x[:, 1:]
    x_train, y_train, x_test, y_test = sd.train_test_split(x, y)
    reg = MultiRegression()
    reg.fit(x_train, y_train)
    print(reg.beta)
    print(reg.r_square)
    print(reg.score(x_test, y_test))
    reg.regression_plot(x_test, y_test, col_id=1)
Exemple #10
0
def multi_class_example():
    x, y = get_wine()

    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)

    nn = NeuralNetwork(alpha=0.5, cost_func=CostFunction.square)
    nn.clear_all()
    nn.add_some_layers(2, 3, active_func=ActiveFunction.relu)
    nn.fit(x_train, y_train)
    # print(nn.predict_prob(x_test))    # raise error here
    nn.classify_plot(x_test, y_test)
Exemple #11
0
def wine_example():
    x, y = get_wine()

    #x = x[(y == 0) | (y == 1)]
    #y = y[(y == 0) | (y == 1)]

    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)

    cart = CART()
    cart.fit(x_train, y_train)
    print(cart.score(x_test, y_test))
    cart.classify_plot(x_test, y_test)

    y = x[:, -1]
    x = x[:, :-1]
    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)

    cart = CART()
    cart.fit(x_train, y_train)
    print(cart.score(x_test, y_test))
Exemple #12
0
def random_forest_example():
    x, y = get_wine()

    # x = x[(y == 0) | (y == 1)]
    # y = y[(y == 0) | (y == 1)]

    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)

    rf = RandomForest(4, 50)
    rf.fit(x_train, y_train)
    print(rf.score(x_test, y_test))
    rf.classify_plot(x_test, y_test)
def iris_svr_example():
    _x, y = get_iris()
    x = _x[:, 1:]
    y = _x[:, 0]
    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)
    svr = SVR(c=1.0, eps=0.9, kernel=KernelType.gaussian, sigma=2.0)
    svr.fit(x_train, y_train)
    y_predict = svr.predict(x_test)
    print(np.corrcoef(y_predict.ravel(), y_test))
    for i, j in enumerate(y_predict):
        print(j, y_test[i])
    svr.regression_plot(x_test, y_test, column_id=0)
Exemple #14
0
def wine_example():
    x, y = get_wine()

    x = x[(y == 0) | (y == 1)]
    y = y[(y == 0) | (y == 1)]

    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)

    nn = NeuralNetwork(alpha=0.5, cost_func=CostFunction.square)
    nn.clear_all()
    nn.add_some_layers(2, 3, active_func=ActiveFunction.relu)
    nn.fit(x_train, y_train)
    print(nn.predict_prob(x_test))
    nn.classify_plot(x_test, y_test)
    nn.auc_plot(x_test, y_test)
Exemple #15
0
def wine_example():
    x, y = get_wine()
    # x = x[(y == 2) | (y == 1)]
    # y = y[(y == 2) | (y == 1)]
    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 91)

    model_list = [logistic.LogisticRegression(),
                  bayes.NaiveBayes()
                  ]  # , SVM(kernel_type=KernelTbbype.gaussian, sigma=1)]

    stack = ensemble.Stacking(model_list, k_folder=5)
    stack.fit(x_train, y_train)
    print(stack.score(x_test, y_test))
    print(stack.score_mat)
    stack.classify_plot(x_test, y_test)
Exemple #16
0
def multi_class_example():
    x, y = get_wine()
    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.5, 918)
    logistic = LogisticRegression(has_intercept=True)
    logistic.fit(x_train, y_train)
    print(logistic.predict(x_test))
    logistic.classify_plot(x_test, y_test)

    logistic = Lasso(has_intercept=True)
    logistic.fit(x_train, y_train)
    print(logistic.predict(x_test))
    logistic.classify_plot(x_test, y_test)

    logistic = Ridge(has_intercept=True)
    logistic.fit(x_train, y_train)
    print(logistic.predict(x_test))
    logistic.classify_plot(x_test, y_test)
Exemple #17
0
def AdaBoost_multi_class_example():
    x, y = get_wine()
    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)
    ada = AdaBoost(classifier=ClassifierType.LR)
    ada.fit(x_train, y_train)
    print(ada.predict(x_test))
    ada.classify_plot(x_test, y_test, ", LR")

    ada = AdaBoost(classifier=ClassifierType.CART)
    ada.fit(x_train, y_train)
    print(ada.predict(x_test))
    ada.classify_plot(x_test, y_test, ", CART")

    ada = AdaBoost(classifier=ClassifierType.KNN)
    ada.fit(x_train, y_train)
    print(ada.predict(x_test))
    ada.classify_plot(x_test, y_test, ", CART")
Exemple #18
0
def cross_validation(model,
                     x,
                     y,
                     method=CrossValidationType.holdout,
                     test_size=0.3,
                     cv=5,
                     seed=918):
    """
    交叉验证函数
    :param model:         模型,继承predict和score方法
    :param x:             特征
    :param y:             标签
    :param method:        交叉验证方法
    :param test_size:     训练集占比,仅对holdout方法有用
    :param cv:            交叉验证次数,如果是k_folder法,则k=cv
    :param seed:          随机种子
    :return:
    """
    if not isinstance(x, np.ndarray):
        raise FeatureTypeError

    if x.shape[0] != len(y):
        raise SampleNumberMismatchError

    result = np.zeros(cv)
    if method == CrossValidationType.holdout:
        for i in range(cv):
            x_train, x_test, y_train, y_test = train_test_split(
                x, y, test_size)
            model.fit(x_train, y_train)
            result[i] = model.score(x_test, y_test)
        return result
    elif method == CrossValidationType.k_folder:
        for i, (test_ids,
                train_ids) in enumerate(get_k_folder_idx(x.shape[0], cv,
                                                         seed)):
            x_test, y_test = x[test_ids], y[test_ids]
            x_train, y_train = x[train_ids], y[train_ids]
            model.fit(x_train, y_train)
            result[i] = model.score(x_test, y_test)
        return result
    else:
        raise CrossValidationTypeError
Exemple #19
0
def wine_example():
    x, y = get_wine()
    x = x[(y == 0) | (y == 1)]
    y = y[(y == 0) | (y == 1)]
    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.5, 918)

    logistic = LogisticRegression(has_intercept=True)
    logistic.fit(x_train, y_train)
    logistic.score(x_test, y_test)
    print(logistic.w)
    logistic.classify_plot(x_test, y_test)
    logistic.auc_plot(x_test, y_test)

    lasso = Lasso()
    lasso.fit(x_train, y_train)
    print(lasso.w)
    lasso.classify_plot(x_test, y_test)
    lasso.auc_plot(x_test, y_test)

    ridge = Ridge()
    ridge.fit(x_train, y_train)
    print(ridge.w)
    ridge.classify_plot(x_test, y_test)
    ridge.auc_plot(x_test, y_test)