def lrisTrain(): data0 = dataUtil.load_data('../data/data.csv') data = data0.iloc[:, 1:] str = [ 'number', 'cql', 'vitalCapacity', 'a', 'zxqdcs', 'age', 'BMI', 'label' ] data = data[str] # 标准化 # data = dataUtil.standardization(data) print('*' * 10, 'data(标准化之后)') print(data.head(10)) # 把数据分为测试数据和验证数据 # 划分x,y x, y = dataUtil.get_x_y(data) print('*' * 10, 'x') print(x) print('*' * 10, 'y') print(y) # 划分训练集测试集 train_data, test_data, train_target, test_target = dataUtil.k_fold(x, y) print('*' * 10, 'train_data') print(train_data) print('*' * 10, 'test_data') print(test_data) print('*' * 10, 'train_target') print(train_target) print('*' * 10, 'test_target') print(test_target) # Model(建模)-引入决策树 # 决策树 # clf = tree.DecisionTreeClassifier(criterion="entropy") # AdaBoost clf = tree.DecisionTreeRegressor() # clf = tree.AdaBoostClassifier( # base_estimator=tree.DecisionTreeClassifier(max_depth=5, min_samples_split=30, min_samples_leaf=5), # n_estimators=10, learning_rate=0.2) # 训练集进行训练 clf.fit(train_data, train_target) # # 进行预测 y_pred = clf.predict(test_data) # # 法一:通过准确率进行验证 print(metrics.accuracy_score(y_true=test_target, y_pred=y_pred)) # 画图方法1-生成dot文件 # with open('treeone.dot', 'w') as f: # dot_data = tree.export_graphviz(clf, out_file=None) # f.write(dot_data) # 画图方法2-生成pdf文件 # dot_data = tree.export_graphviz(clf, out_file=None, feature_names=clf.feature_importances_, # filled=True, rounded=True, special_characters=True) dot_data = tree.export_graphviz(clf, out_file=None, filled=True, rounded=True, special_characters=True) graph = pydotplus.graph_from_dot_data(dot_data) # # 保存图像到pdf文件 graph.write_pdf("treetwo.pdf")
""" import matplotlib.pyplot as plt from sko.GA import GA from math_model.py import ANN import numpy as np import pandas as pd from math_model.util import dataUtil data = dataUtil.load_data('../data/data_22.csv') print('data shape:', data.shape) # data = data.iloc[:, 2:] # 标准化 # data = dataUtil.standardization(data) input_num = 14 # 划分x,y x, y = dataUtil.get_x_y(data) print('x:', x.shape) print('y:', y.shape) # 正则化 # x = dataUtil.normalization(x) # 标准化 x = dataUtil.standardization2(x) # 归一化 # x = dataUtil.scale(x) best = float(0.0) def schaffer(p): # 求a,b为何值时误差最小(a=1,b=0) p = pd.DataFrame(p) a = p.iloc[:input_num * 27] b = p.iloc[input_num * 27:]
from sklearn.semi_supervised import LabelSpreading from math_model.util import dataUtil import pandas as pd # 获取数据 data0 = dataUtil.load_data('../data/data.csv') # 更改标签 data0.loc[(data0['label'] == 0), ['label']] = -1 # 确定为阴性的标签,该为0 num_list = dataUtil.find_92() data0.loc[data0['number'].isin(num_list), ['label']] = 0 print(data0[data0['number'].isin(num_list)]) print(data0) # 划分x,y data0 = data0.iloc[:, 5:] data_x, data_y = dataUtil.get_x_y(data0) # 训练数据 # data_y_train = np.copy(data_y) print('data_y_train:', data_y) # 模型 clf = LabelSpreading(max_iter=100, kernel='rbf', gamma=0.1) ls = (clf.fit(data_x, data_y), data_y) rbf_svc = (svm.SVC(kernel='rbf', gamma=.5).fit(data_x, data_y), data_y) y_pre = clf.predict(data_x) print('预测:', y_pre) np.savetxt('pre.txt', y_pre) # print('真实:', y_test) # print('正确个数:', len((y_pre == y_test))) # h = 0.02 # x_min, x_max = data_x[:, 0].min() - 1, data_x[:, 0].max() + 1