import pandas as pd from sklearn.ensemble import BaggingClassifier dataset = pd.read_csv('dataset.csv') target = dataset.iloc[:,30].values data = dataset.iloc[:,0:30].values print(dataset) ## n_estimators is how many trees you want ## Bagging estimator do not use all the features(classification line in gini impurity). it only use some subset of the feature machine = BaggingClassifier(n_estimators = 21) r2, confusion_matrix, accu_rate = kfold_module.run_kfold(3,data,target,machine,1,1) print(r2, accu_rate) for i in confusion_matrix: print(i) """ [0.8245609755336845, 0.8309294179398095, 0.8268148424855563] [0.9216253918730406, 0.9245053774731127, 0.9230942309423095] [[ 532 380 10 0] [ 119 43969 1616 3] [ 1 2203 11162 362] [ 0 10 521 5779]] [[ 591 365 3 0] [ 129 44284 1383 3] [ 0 2242 11077 421] [ 0 2 485 5682]] [[ 558 366 9 0]
test_x = np.random.randint(0, 10, (3, 6)) prediction = machine.predict(test_x) # print(prediction) """[5.58515802 3.63327177 2.89742717] 显然恨不准确,我们的target只有0和1""" machine1 = linear_model.LogisticRegression() machine1.fit(data, target) test_x = np.random.randint(0, 10, (3, 6)) prediction1 = machine1.predict(test_x) # print(prediction1) """ [1 1 1] """ r2 = kfold_module.run_kfold(4, data, target, linear_model.LogisticRegression()) # print(r2) """ [0.5933534356123221, 0.583945682347001, 0.586508569823394, 0.5872767696511276] """ result = KNeighborsClassifier() result.fit(data, target) prediction2 = result.predict(test_x) # print(prediction2) # how about predict y2 y2 = data_source.iloc[:, 2].values r2_y2 = kfold_module.run_kfold(4, data, target, linear_model.LogisticRegression()) # print(r2_y2)
from sklearn import linear_model import pandas as pd import numpy as np import kfold_module from sklearn import linear_model from sklearn.neighbors import KNeighborsClassifier import matplotlib.pyplot as plt from sklearn.metrics import confusion_matrix data_source = pd.read_csv('logistic_dataset.csv') # here, y1 is target target = data_source.iloc[:, 2].values data = data_source.iloc[:, 3:9].values target_for_real = data_source.iloc[:, 1].values r2, confusion_matri, accuracy_score = kfold_module.run_kfold( 4, data, target, linear_model.LogisticRegression(), 1, 1) """打印的矩阵很难看,没有对齐""" print(r2) for confu_ma in confusion_matri: print(confu_ma) """此时发现打印出来的矩阵很好看""" """对角线数字越大,说明预测的越准""" print(accuracy_score) """[0.6924, 0.7292, 0.7184, 0.7124]""" """if you think your model is good, then you create a new machine, and fit the machine with all of the data and target, and use real_world_X for prediction""" real_world_x = [[24, 55, 31, 3, 0, 7], [ 5,
from sklearn import svm import kfold_module # data, target = make_blobs(n_samples = 400, centers = 2, cluster_std=0.7) ##random_state is same as np.random.seed() ##cluster_std规定了两个cluster的离散度,数字越大越离散 data, target = make_blobs(n_samples=400, centers=2, cluster_std=1, random_state=0) target[target == 0] = -1 """ data, target = make_blobs(n_samples = 400, centers = 4, cluster_std=0.9) """ # print(data) # print(target) plt.scatter(data[:, 0], data[:, 1], c=target, alpha=0.3) plt.savefig('sample.png') #plt.show() result_r2, result_confusion_matrix, result_accu_rate = kfold_module.run_kfold( 5, data, target, svm.SVC(gamma='auto'), confusion=1, use_accuracy=1) print(result_r2) print(result_accu_rate) for i in result_confusion_matrix: print(i) """([1.0, 1.0, 1.0, 1.0, 1.0], [], []) """ plt.show()