예제 #1
0
def lrisTrain():
    data0 = dataUtil.load_data('../data/data.csv')
    data = data0.iloc[:, 1:]
    str = [
        'number', 'cql', 'vitalCapacity', 'a', 'zxqdcs', 'age', 'BMI', 'label'
    ]
    data = data[str]
    # 标准化
    # data = dataUtil.standardization(data)
    print('*' * 10, 'data(标准化之后)')
    print(data.head(10))
    # 把数据分为测试数据和验证数据
    # 划分x,y
    x, y = dataUtil.get_x_y(data)
    print('*' * 10, 'x')
    print(x)
    print('*' * 10, 'y')
    print(y)
    # 划分训练集测试集
    train_data, test_data, train_target, test_target = dataUtil.k_fold(x, y)
    print('*' * 10, 'train_data')
    print(train_data)
    print('*' * 10, 'test_data')
    print(test_data)
    print('*' * 10, 'train_target')
    print(train_target)
    print('*' * 10, 'test_target')
    print(test_target)
    # Model(建模)-引入决策树
    # 决策树
    # clf = tree.DecisionTreeClassifier(criterion="entropy")
    # AdaBoost
    clf = tree.DecisionTreeRegressor()
    # clf = tree.AdaBoostClassifier(
    #     base_estimator=tree.DecisionTreeClassifier(max_depth=5, min_samples_split=30, min_samples_leaf=5),
    #     n_estimators=10, learning_rate=0.2)
    # 训练集进行训练
    clf.fit(train_data, train_target)
    # # 进行预测
    y_pred = clf.predict(test_data)
    # # 法一:通过准确率进行验证
    print(metrics.accuracy_score(y_true=test_target, y_pred=y_pred))
    # 画图方法1-生成dot文件
    # with open('treeone.dot', 'w') as f:
    #     dot_data = tree.export_graphviz(clf, out_file=None)
    #     f.write(dot_data)
    # 画图方法2-生成pdf文件
    # dot_data = tree.export_graphviz(clf, out_file=None, feature_names=clf.feature_importances_,
    #                                 filled=True, rounded=True, special_characters=True)
    dot_data = tree.export_graphviz(clf,
                                    out_file=None,
                                    filled=True,
                                    rounded=True,
                                    special_characters=True)
    graph = pydotplus.graph_from_dot_data(dot_data)
    # # 保存图像到pdf文件
    graph.write_pdf("treetwo.pdf")
예제 #2
0
"""
import matplotlib.pyplot as plt
from sko.GA import GA
from math_model.py import ANN
import numpy as np
import pandas as pd
from math_model.util import dataUtil

data = dataUtil.load_data('../data/data_22.csv')
print('data shape:', data.shape)
# data = data.iloc[:, 2:]
# 标准化
# data = dataUtil.standardization(data)
input_num = 14
# 划分x,y
x, y = dataUtil.get_x_y(data)
print('x:', x.shape)
print('y:', y.shape)
# 正则化
# x = dataUtil.normalization(x)
# 标准化
x = dataUtil.standardization2(x)
# 归一化
# x = dataUtil.scale(x)
best = float(0.0)


def schaffer(p):  # 求a,b为何值时误差最小(a=1,b=0)
    p = pd.DataFrame(p)
    a = p.iloc[:input_num * 27]
    b = p.iloc[input_num * 27:]
예제 #3
0
from sklearn.semi_supervised import LabelSpreading
from math_model.util import dataUtil
import pandas as pd

# 获取数据
data0 = dataUtil.load_data('../data/data.csv')
# 更改标签
data0.loc[(data0['label'] == 0), ['label']] = -1
# 确定为阴性的标签,该为0
num_list = dataUtil.find_92()
data0.loc[data0['number'].isin(num_list), ['label']] = 0
print(data0[data0['number'].isin(num_list)])
print(data0)
# 划分x,y
data0 = data0.iloc[:, 5:]
data_x, data_y = dataUtil.get_x_y(data0)
# 训练数据
# data_y_train = np.copy(data_y)
print('data_y_train:', data_y)
# 模型
clf = LabelSpreading(max_iter=100, kernel='rbf', gamma=0.1)
ls = (clf.fit(data_x, data_y), data_y)
rbf_svc = (svm.SVC(kernel='rbf', gamma=.5).fit(data_x, data_y), data_y)
y_pre = clf.predict(data_x)

print('预测:', y_pre)
np.savetxt('pre.txt', y_pre)
# print('真实:', y_test)
# print('正确个数:', len((y_pre == y_test)))
# h = 0.02
# x_min, x_max = data_x[:, 0].min() - 1, data_x[:, 0].max() + 1