def watermelon_example(): """ GBDT的例子,以西瓜数据集为例 GBDT暂时只支持回归操作,不支持分类 :return: """ x, y = get_watermelon() y = x[:, -1] # y为连续标签 x = x[:, :-1] # x为离散标签 x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918) gbdt = GBDT(learning_rate=1) gbdt.fit(x_train, y_train) print(gbdt.predict(x_test), y_test) print("R square: %.4f" % gbdt.score(x_test, y_test)) x, y = get_wine() y = x[:, -1] # y为连续标签 x = x[:, :-1] # x为离散标签 x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918) gbdt = GBDT(learning_rate=1) gbdt.fit(x_train, y_train) print(gbdt.predict(x_test), y_test) print("R square: %.4f" % gbdt.score(x_test, y_test))
def moon_example(): """ AdaBoost的例子,以月亮数据集为例 :return: """ x, y = get_wine() x = x[(y == 0) | (y == 1)] y = y[(y == 0) | (y == 1)] x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918) # 采用Logistic回归作为子分类器的AdaBoost ada = AdaBoost(classifier=ClassifierType.LR) ada.fit(x_train, y_train) print(ada.score(x_test, y_test)) ada.classify_plot(x_test, y_test, ", LR") # 采用KNN作为子分类器的AdaBoost ada = AdaBoost(classifier=ClassifierType.KNN) ada.fit(x_train, y_train) print(ada.score(x_test, y_test)) ada.classify_plot(x_test, y_test, ", KNN") # 采用CART树为子分类器的AdaBoost ada = AdaBoost(classifier=ClassifierType.CART) ada.fit(x_train, y_train) print(ada.score(x_test, y_test)) ada.classify_plot(x_test, y_test, ", CART")
def iris_example(): x, y = get_iris() x = x[(y == 0) | (y == 1)] y = y[(y == 0) | (y == 1)] x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918) logistic = LogisticRegression() logistic.fit(x_train, y_train) print(logistic.w) logistic.predict(x_test) logistic.score(x_test, y_test) logistic.classify_plot(x_test, y_test) logistic.auc_plot(x_test, y_test) lasso = Lasso() lasso.fit(x_train, y_train) print(lasso.w) lasso.predict(x_test) lasso.score(x_test, y_test) lasso.classify_plot(x_test, y_test) ridge = Ridge() ridge.fit(x_train, y_train) print(ridge.w) ridge.predict(x_test) ridge.score(x_test, y_test) ridge.classify_plot(x_test, y_test)
def ID3_example(): x, y = get_watermelon() x = x[:, :4] x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918) id3 = ID3() id3.fit(x_train, y_train) print(id3.score(x_test, y_test))
def multi_svm_example(): x, y = get_wine() # get moon() x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918) svm = SVM(kernel=KernelType.polynomial, d=2) svm.fit(x_train, y_train) y_predict = svm.predict(x_test) print(y_predict, list(y_test))
def wine_example(): x, y = get_wine() x = x[(y == 0) | (y == 1)] y = y[(y == 0) | (y == 1)] x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918) # 贝叶斯最小错误率 bme = BayesMinimumError() bme.fit(x_train, y_train) print(bme.score(x_test, y_test)) bme.classify_plot(x_test, y_test) # 贝叶斯最小风险,需要给定风险矩阵 # 风险矩阵 [[0,100], [10,0]] 表示把0分为1(存伪)的损失为100,把1分为0(弃真)的损失为10 bmr = BayesMinimumRisk(np.array([[0, 100], [10, 0]])) bmr.fit(x_train, y_train) bmr.predict(x_test) print(bmr.score(x_test, y_test)) bmr.classify_plot(x_test, y_test) # 朴素贝叶斯 nb = NaiveBayes() nb.fit(x_train, y_train) nb.predict(x_test) print(nb.score(x_test, y_test)) nb.classify_plot(x_test, y_test)
def iris_svm_example(): x, y = get_iris() # x = x[(y == 0) | (y == 1)] # y = y[(y == 0) | (y == 1)] x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918) svm = SVM(c=0.1) svm.fit(x_train, y_train) print(svm.predict(x_test), y_test)
def wine_example(): x, y = get_wine() # knn可以解决多分类问题 # x = x[(y == 0) | (y == 1)] # y = y[(y == 0) | (y == 1)] x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918) knn = KNN() knn.fit(x_train, y_train) print(knn.score(x_test, y_test)) knn.classify_plot(x_test, y_test)
def iris_example(): x, y = sc.get_iris() y = x[:, 0] x = x[:, 1:] x_train, y_train, x_test, y_test = sd.train_test_split(x, y) reg = MultiRegression() reg.fit(x_train, y_train) print(reg.beta) print(reg.r_square) print(reg.score(x_test, y_test)) reg.regression_plot(x_test, y_test, col_id=1)
def multi_class_example(): x, y = get_wine() x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918) nn = NeuralNetwork(alpha=0.5, cost_func=CostFunction.square) nn.clear_all() nn.add_some_layers(2, 3, active_func=ActiveFunction.relu) nn.fit(x_train, y_train) # print(nn.predict_prob(x_test)) # raise error here nn.classify_plot(x_test, y_test)
def wine_example(): x, y = get_wine() #x = x[(y == 0) | (y == 1)] #y = y[(y == 0) | (y == 1)] x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918) cart = CART() cart.fit(x_train, y_train) print(cart.score(x_test, y_test)) cart.classify_plot(x_test, y_test) y = x[:, -1] x = x[:, :-1] x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918) cart = CART() cart.fit(x_train, y_train) print(cart.score(x_test, y_test))
def random_forest_example(): x, y = get_wine() # x = x[(y == 0) | (y == 1)] # y = y[(y == 0) | (y == 1)] x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918) rf = RandomForest(4, 50) rf.fit(x_train, y_train) print(rf.score(x_test, y_test)) rf.classify_plot(x_test, y_test)
def iris_svr_example(): _x, y = get_iris() x = _x[:, 1:] y = _x[:, 0] x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918) svr = SVR(c=1.0, eps=0.9, kernel=KernelType.gaussian, sigma=2.0) svr.fit(x_train, y_train) y_predict = svr.predict(x_test) print(np.corrcoef(y_predict.ravel(), y_test)) for i, j in enumerate(y_predict): print(j, y_test[i]) svr.regression_plot(x_test, y_test, column_id=0)
def wine_example(): x, y = get_wine() x = x[(y == 0) | (y == 1)] y = y[(y == 0) | (y == 1)] x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918) nn = NeuralNetwork(alpha=0.5, cost_func=CostFunction.square) nn.clear_all() nn.add_some_layers(2, 3, active_func=ActiveFunction.relu) nn.fit(x_train, y_train) print(nn.predict_prob(x_test)) nn.classify_plot(x_test, y_test) nn.auc_plot(x_test, y_test)
def wine_example(): x, y = get_wine() # x = x[(y == 2) | (y == 1)] # y = y[(y == 2) | (y == 1)] x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 91) model_list = [logistic.LogisticRegression(), bayes.NaiveBayes() ] # , SVM(kernel_type=KernelTbbype.gaussian, sigma=1)] stack = ensemble.Stacking(model_list, k_folder=5) stack.fit(x_train, y_train) print(stack.score(x_test, y_test)) print(stack.score_mat) stack.classify_plot(x_test, y_test)
def multi_class_example(): x, y = get_wine() x_train, y_train, x_test, y_test = train_test_split(x, y, 0.5, 918) logistic = LogisticRegression(has_intercept=True) logistic.fit(x_train, y_train) print(logistic.predict(x_test)) logistic.classify_plot(x_test, y_test) logistic = Lasso(has_intercept=True) logistic.fit(x_train, y_train) print(logistic.predict(x_test)) logistic.classify_plot(x_test, y_test) logistic = Ridge(has_intercept=True) logistic.fit(x_train, y_train) print(logistic.predict(x_test)) logistic.classify_plot(x_test, y_test)
def AdaBoost_multi_class_example(): x, y = get_wine() x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918) ada = AdaBoost(classifier=ClassifierType.LR) ada.fit(x_train, y_train) print(ada.predict(x_test)) ada.classify_plot(x_test, y_test, ", LR") ada = AdaBoost(classifier=ClassifierType.CART) ada.fit(x_train, y_train) print(ada.predict(x_test)) ada.classify_plot(x_test, y_test, ", CART") ada = AdaBoost(classifier=ClassifierType.KNN) ada.fit(x_train, y_train) print(ada.predict(x_test)) ada.classify_plot(x_test, y_test, ", CART")
def cross_validation(model, x, y, method=CrossValidationType.holdout, test_size=0.3, cv=5, seed=918): """ 交叉验证函数 :param model: 模型,继承predict和score方法 :param x: 特征 :param y: 标签 :param method: 交叉验证方法 :param test_size: 训练集占比,仅对holdout方法有用 :param cv: 交叉验证次数,如果是k_folder法,则k=cv :param seed: 随机种子 :return: """ if not isinstance(x, np.ndarray): raise FeatureTypeError if x.shape[0] != len(y): raise SampleNumberMismatchError result = np.zeros(cv) if method == CrossValidationType.holdout: for i in range(cv): x_train, x_test, y_train, y_test = train_test_split( x, y, test_size) model.fit(x_train, y_train) result[i] = model.score(x_test, y_test) return result elif method == CrossValidationType.k_folder: for i, (test_ids, train_ids) in enumerate(get_k_folder_idx(x.shape[0], cv, seed)): x_test, y_test = x[test_ids], y[test_ids] x_train, y_train = x[train_ids], y[train_ids] model.fit(x_train, y_train) result[i] = model.score(x_test, y_test) return result else: raise CrossValidationTypeError
def wine_example(): x, y = get_wine() x = x[(y == 0) | (y == 1)] y = y[(y == 0) | (y == 1)] x_train, y_train, x_test, y_test = train_test_split(x, y, 0.5, 918) logistic = LogisticRegression(has_intercept=True) logistic.fit(x_train, y_train) logistic.score(x_test, y_test) print(logistic.w) logistic.classify_plot(x_test, y_test) logistic.auc_plot(x_test, y_test) lasso = Lasso() lasso.fit(x_train, y_train) print(lasso.w) lasso.classify_plot(x_test, y_test) lasso.auc_plot(x_test, y_test) ridge = Ridge() ridge.fit(x_train, y_train) print(ridge.w) ridge.classify_plot(x_test, y_test) ridge.auc_plot(x_test, y_test)