Python train_test_split Examples, simple_ml.data_handle.train_test_split Python Examples

Example #1

0

Show file

File: ensemble_example.py Project: ykukey/simple_ml

def watermelon_example():
    """
    GBDT的例子，以西瓜数据集为例
    GBDT暂时只支持回归操作，不支持分类
    :return:
    """
    x, y = get_watermelon()
    y = x[:, -1]  # y为连续标签
    x = x[:, :-1]  # x为离散标签
    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)

    gbdt = GBDT(learning_rate=1)
    gbdt.fit(x_train, y_train)
    print(gbdt.predict(x_test), y_test)
    print("R square: %.4f" % gbdt.score(x_test, y_test))

    x, y = get_wine()
    y = x[:, -1]  # y为连续标签
    x = x[:, :-1]  # x为离散标签
    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)

    gbdt = GBDT(learning_rate=1)
    gbdt.fit(x_train, y_train)
    print(gbdt.predict(x_test), y_test)
    print("R square: %.4f" % gbdt.score(x_test, y_test))

Example #2

0

Show file

File: ensemble_example.py Project: ykukey/simple_ml

def moon_example():
    """
    AdaBoost的例子，以月亮数据集为例
    :return:
    """
    x, y = get_wine()
    x = x[(y == 0) | (y == 1)]
    y = y[(y == 0) | (y == 1)]
    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)

    # 采用Logistic回归作为子分类器的AdaBoost
    ada = AdaBoost(classifier=ClassifierType.LR)
    ada.fit(x_train, y_train)
    print(ada.score(x_test, y_test))
    ada.classify_plot(x_test, y_test, ", LR")

    # 采用KNN作为子分类器的AdaBoost
    ada = AdaBoost(classifier=ClassifierType.KNN)
    ada.fit(x_train, y_train)
    print(ada.score(x_test, y_test))
    ada.classify_plot(x_test, y_test, ", KNN")

    # 采用CART树为子分类器的AdaBoost
    ada = AdaBoost(classifier=ClassifierType.CART)
    ada.fit(x_train, y_train)
    print(ada.score(x_test, y_test))
    ada.classify_plot(x_test, y_test, ", CART")

Example #3

0

Show file

File: logistic_example.py Project: ykukey/simple_ml

def iris_example():
    x, y = get_iris()
    x = x[(y == 0) | (y == 1)]
    y = y[(y == 0) | (y == 1)]
    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)

    logistic = LogisticRegression()
    logistic.fit(x_train, y_train)
    print(logistic.w)
    logistic.predict(x_test)
    logistic.score(x_test, y_test)
    logistic.classify_plot(x_test, y_test)
    logistic.auc_plot(x_test, y_test)

    lasso = Lasso()
    lasso.fit(x_train, y_train)
    print(lasso.w)
    lasso.predict(x_test)
    lasso.score(x_test, y_test)
    lasso.classify_plot(x_test, y_test)

    ridge = Ridge()
    ridge.fit(x_train, y_train)
    print(ridge.w)
    ridge.predict(x_test)
    ridge.score(x_test, y_test)
    ridge.classify_plot(x_test, y_test)

Example #4

0

Show file

File: tree_example.py Project: ykukey/simple_ml

def ID3_example():
    x, y = get_watermelon()
    x = x[:, :4]
    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)
    id3 = ID3()
    id3.fit(x_train, y_train)
    print(id3.score(x_test, y_test))

Example #5

0

Show file

File: support_vector_eaxmple.py Project: ykukey/simple_ml

def multi_svm_example():
    x, y = get_wine()  # get moon()
    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)
    svm = SVM(kernel=KernelType.polynomial, d=2)
    svm.fit(x_train, y_train)
    y_predict = svm.predict(x_test)
    print(y_predict, list(y_test))

Example #6

0

Show file

File: bayes_example.py Project: ykukey/simple_ml

def wine_example():
    x, y = get_wine()

    x = x[(y == 0) | (y == 1)]
    y = y[(y == 0) | (y == 1)]

    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)

    # 贝叶斯最小错误率
    bme = BayesMinimumError()
    bme.fit(x_train, y_train)
    print(bme.score(x_test, y_test))
    bme.classify_plot(x_test, y_test)

    # 贝叶斯最小风险，需要给定风险矩阵
    # 风险矩阵 [[0,100], [10,0]] 表示把0分为1（存伪）的损失为100，把1分为0（弃真）的损失为10
    bmr = BayesMinimumRisk(np.array([[0, 100], [10, 0]]))
    bmr.fit(x_train, y_train)
    bmr.predict(x_test)
    print(bmr.score(x_test, y_test))
    bmr.classify_plot(x_test, y_test)

    # 朴素贝叶斯
    nb = NaiveBayes()
    nb.fit(x_train, y_train)
    nb.predict(x_test)
    print(nb.score(x_test, y_test))
    nb.classify_plot(x_test, y_test)

Example #7

0

Show file

File: support_vector_eaxmple.py Project: ykukey/simple_ml

def iris_svm_example():
    x, y = get_iris()
    # x = x[(y == 0) | (y == 1)]
    # y = y[(y == 0) | (y == 1)]
    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)
    svm = SVM(c=0.1)
    svm.fit(x_train, y_train)
    print(svm.predict(x_test), y_test)

Example #8

0

Show file

def wine_example():
    x, y = get_wine()
    # knn可以解决多分类问题
    # x = x[(y == 0) | (y == 1)]
    # y = y[(y == 0) | (y == 1)]
    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)

    knn = KNN()
    knn.fit(x_train, y_train)
    print(knn.score(x_test, y_test))
    knn.classify_plot(x_test, y_test)

Example #9

0

Show file

File: multi_regression_example.py Project: ykukey/simple_ml

def iris_example():
    x, y = sc.get_iris()
    y = x[:, 0]
    x = x[:, 1:]
    x_train, y_train, x_test, y_test = sd.train_test_split(x, y)
    reg = MultiRegression()
    reg.fit(x_train, y_train)
    print(reg.beta)
    print(reg.r_square)
    print(reg.score(x_test, y_test))
    reg.regression_plot(x_test, y_test, col_id=1)

Example #10

0

Show file

def multi_class_example():
    x, y = get_wine()

    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)

    nn = NeuralNetwork(alpha=0.5, cost_func=CostFunction.square)
    nn.clear_all()
    nn.add_some_layers(2, 3, active_func=ActiveFunction.relu)
    nn.fit(x_train, y_train)
    # print(nn.predict_prob(x_test))    # raise error here
    nn.classify_plot(x_test, y_test)

Example #11

0

Show file

File: tree_example.py Project: ykukey/simple_ml

def wine_example():
    x, y = get_wine()

    #x = x[(y == 0) | (y == 1)]
    #y = y[(y == 0) | (y == 1)]

    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)

    cart = CART()
    cart.fit(x_train, y_train)
    print(cart.score(x_test, y_test))
    cart.classify_plot(x_test, y_test)

    y = x[:, -1]
    x = x[:, :-1]
    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)

    cart = CART()
    cart.fit(x_train, y_train)
    print(cart.score(x_test, y_test))

Example #12

0

Show file

File: tree_example.py Project: ykukey/simple_ml

def random_forest_example():
    x, y = get_wine()

    # x = x[(y == 0) | (y == 1)]
    # y = y[(y == 0) | (y == 1)]

    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)

    rf = RandomForest(4, 50)
    rf.fit(x_train, y_train)
    print(rf.score(x_test, y_test))
    rf.classify_plot(x_test, y_test)

Example #13

0

Show file

File: support_vector_eaxmple.py Project: ykukey/simple_ml

def iris_svr_example():
    _x, y = get_iris()
    x = _x[:, 1:]
    y = _x[:, 0]
    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)
    svr = SVR(c=1.0, eps=0.9, kernel=KernelType.gaussian, sigma=2.0)
    svr.fit(x_train, y_train)
    y_predict = svr.predict(x_test)
    print(np.corrcoef(y_predict.ravel(), y_test))
    for i, j in enumerate(y_predict):
        print(j, y_test[i])
    svr.regression_plot(x_test, y_test, column_id=0)

Example #14

0

Show file

def wine_example():
    x, y = get_wine()

    x = x[(y == 0) | (y == 1)]
    y = y[(y == 0) | (y == 1)]

    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)

    nn = NeuralNetwork(alpha=0.5, cost_func=CostFunction.square)
    nn.clear_all()
    nn.add_some_layers(2, 3, active_func=ActiveFunction.relu)
    nn.fit(x_train, y_train)
    print(nn.predict_prob(x_test))
    nn.classify_plot(x_test, y_test)
    nn.auc_plot(x_test, y_test)

Example #15

0

Show file

File: stacking_example.py Project: ykukey/simple_ml

def wine_example():
    x, y = get_wine()
    # x = x[(y == 2) | (y == 1)]
    # y = y[(y == 2) | (y == 1)]
    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 91)

    model_list = [logistic.LogisticRegression(),
                  bayes.NaiveBayes()
                  ]  # , SVM(kernel_type=KernelTbbype.gaussian, sigma=1)]

    stack = ensemble.Stacking(model_list, k_folder=5)
    stack.fit(x_train, y_train)
    print(stack.score(x_test, y_test))
    print(stack.score_mat)
    stack.classify_plot(x_test, y_test)

Example #16

0

Show file

File: logistic_example.py Project: ykukey/simple_ml

def multi_class_example():
    x, y = get_wine()
    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.5, 918)
    logistic = LogisticRegression(has_intercept=True)
    logistic.fit(x_train, y_train)
    print(logistic.predict(x_test))
    logistic.classify_plot(x_test, y_test)

    logistic = Lasso(has_intercept=True)
    logistic.fit(x_train, y_train)
    print(logistic.predict(x_test))
    logistic.classify_plot(x_test, y_test)

    logistic = Ridge(has_intercept=True)
    logistic.fit(x_train, y_train)
    print(logistic.predict(x_test))
    logistic.classify_plot(x_test, y_test)

Example #17

0

Show file

File: ensemble_example.py Project: ykukey/simple_ml

def AdaBoost_multi_class_example():
    x, y = get_wine()
    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918)
    ada = AdaBoost(classifier=ClassifierType.LR)
    ada.fit(x_train, y_train)
    print(ada.predict(x_test))
    ada.classify_plot(x_test, y_test, ", LR")

    ada = AdaBoost(classifier=ClassifierType.CART)
    ada.fit(x_train, y_train)
    print(ada.predict(x_test))
    ada.classify_plot(x_test, y_test, ", CART")

    ada = AdaBoost(classifier=ClassifierType.KNN)
    ada.fit(x_train, y_train)
    print(ada.predict(x_test))
    ada.classify_plot(x_test, y_test, ", CART")

Example #18

0

Show file

def cross_validation(model,
                     x,
                     y,
                     method=CrossValidationType.holdout,
                     test_size=0.3,
                     cv=5,
                     seed=918):
    """
    交叉验证函数
    :param model:         模型，继承predict和score方法
    :param x:             特征
    :param y:             标签
    :param method:        交叉验证方法
    :param test_size:     训练集占比，仅对holdout方法有用
    :param cv:            交叉验证次数，如果是k_folder法，则k=cv
    :param seed:          随机种子
    :return:
    """
    if not isinstance(x, np.ndarray):
        raise FeatureTypeError

    if x.shape[0] != len(y):
        raise SampleNumberMismatchError

    result = np.zeros(cv)
    if method == CrossValidationType.holdout:
        for i in range(cv):
            x_train, x_test, y_train, y_test = train_test_split(
                x, y, test_size)
            model.fit(x_train, y_train)
            result[i] = model.score(x_test, y_test)
        return result
    elif method == CrossValidationType.k_folder:
        for i, (test_ids,
                train_ids) in enumerate(get_k_folder_idx(x.shape[0], cv,
                                                         seed)):
            x_test, y_test = x[test_ids], y[test_ids]
            x_train, y_train = x[train_ids], y[train_ids]
            model.fit(x_train, y_train)
            result[i] = model.score(x_test, y_test)
        return result
    else:
        raise CrossValidationTypeError

Example #19

0

Show file

File: logistic_example.py Project: ykukey/simple_ml

def wine_example():
    x, y = get_wine()
    x = x[(y == 0) | (y == 1)]
    y = y[(y == 0) | (y == 1)]
    x_train, y_train, x_test, y_test = train_test_split(x, y, 0.5, 918)

    logistic = LogisticRegression(has_intercept=True)
    logistic.fit(x_train, y_train)
    logistic.score(x_test, y_test)
    print(logistic.w)
    logistic.classify_plot(x_test, y_test)
    logistic.auc_plot(x_test, y_test)

    lasso = Lasso()
    lasso.fit(x_train, y_train)
    print(lasso.w)
    lasso.classify_plot(x_test, y_test)
    lasso.auc_plot(x_test, y_test)

    ridge = Ridge()
    ridge.fit(x_train, y_train)
    print(ridge.w)
    ridge.classify_plot(x_test, y_test)
    ridge.auc_plot(x_test, y_test)