Esempio n. 1
0
import pandas as pd
from sklearn.ensemble import BaggingClassifier




dataset = pd.read_csv('dataset.csv')

target = dataset.iloc[:,30].values
data = dataset.iloc[:,0:30].values
print(dataset)

##  n_estimators is how many trees you want
## Bagging estimator do not use all the features(classification line in gini impurity). it only use some subset of the feature
machine = BaggingClassifier(n_estimators = 21)
r2, confusion_matrix, accu_rate = kfold_module.run_kfold(3,data,target,machine,1,1)
print(r2, accu_rate)
for i in confusion_matrix:
    print(i)

"""
[0.8245609755336845, 0.8309294179398095, 0.8268148424855563] [0.9216253918730406, 0.9245053774731127, 0.9230942309423095]
[[  532   380    10     0]
 [  119 43969  1616     3]
 [    1  2203 11162   362]
 [    0    10   521  5779]]
[[  591   365     3     0]
 [  129 44284  1383     3]
 [    0  2242 11077   421]
 [    0     2   485  5682]]
[[  558   366     9     0]
test_x = np.random.randint(0, 10, (3, 6))
prediction = machine.predict(test_x)
# print(prediction)
"""[5.58515802 3.63327177 2.89742717]
显然恨不准确,我们的target只有0和1"""

machine1 = linear_model.LogisticRegression()
machine1.fit(data, target)
test_x = np.random.randint(0, 10, (3, 6))
prediction1 = machine1.predict(test_x)
# print(prediction1)
"""
[1 1 1]
"""

r2 = kfold_module.run_kfold(4, data, target, linear_model.LogisticRegression())
# print(r2)
"""
[0.5933534356123221, 0.583945682347001, 0.586508569823394, 0.5872767696511276]
"""

result = KNeighborsClassifier()
result.fit(data, target)
prediction2 = result.predict(test_x)
# print(prediction2)

# how about predict y2
y2 = data_source.iloc[:, 2].values
r2_y2 = kfold_module.run_kfold(4, data, target,
                               linear_model.LogisticRegression())
# print(r2_y2)
Esempio n. 3
0
from sklearn import linear_model
import pandas as pd
import numpy as np
import kfold_module
from sklearn import linear_model
from sklearn.neighbors import KNeighborsClassifier
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

data_source = pd.read_csv('logistic_dataset.csv')
# here, y1 is target
target = data_source.iloc[:, 2].values
data = data_source.iloc[:, 3:9].values
target_for_real = data_source.iloc[:, 1].values

r2, confusion_matri, accuracy_score = kfold_module.run_kfold(
    4, data, target, linear_model.LogisticRegression(), 1, 1)
"""打印的矩阵很难看,没有对齐"""
print(r2)

for confu_ma in confusion_matri:
    print(confu_ma)
"""此时发现打印出来的矩阵很好看"""
"""对角线数字越大,说明预测的越准"""
print(accuracy_score)
"""[0.6924, 0.7292, 0.7184, 0.7124]"""
"""if you think your model is good, then you create a new machine,
and fit the machine with all of the data and target,
and use real_world_X for prediction"""

real_world_x = [[24, 55, 31, 3, 0, 7], [
    5,
from sklearn import svm
import kfold_module

# data, target = make_blobs(n_samples = 400, centers = 2, cluster_std=0.7)
##random_state is same as np.random.seed()
##cluster_std规定了两个cluster的离散度,数字越大越离散
data, target = make_blobs(n_samples=400,
                          centers=2,
                          cluster_std=1,
                          random_state=0)
target[target == 0] = -1
"""
data, target = make_blobs(n_samples = 400, centers = 4, cluster_std=0.9)
"""

# print(data)
# print(target)

plt.scatter(data[:, 0], data[:, 1], c=target, alpha=0.3)
plt.savefig('sample.png')
#plt.show()

result_r2, result_confusion_matrix, result_accu_rate = kfold_module.run_kfold(
    5, data, target, svm.SVC(gamma='auto'), confusion=1, use_accuracy=1)
print(result_r2)
print(result_accu_rate)
for i in result_confusion_matrix:
    print(i)
"""([1.0, 1.0, 1.0, 1.0, 1.0], [], [])
"""
plt.show()