def normalize_data_2d(): input_values, contaminated = ia_n2020.ioValues() predict_values = ia_n2020.newData() norm = StandardScaler() input_values_norm = norm.fit_transform(input_values) predict_values_norm = norm.fit_transform(predict_values) pca = PCA(n_components=2) input_values_2d = pca.fit_transform(input_values_norm) predict_values_2d = pca.fit_transform(predict_values_norm) reg1 = '' reg2 = '' for i in range(len(input_values_2d)): if contaminated[i] == 0: reg1 = plt.scatter(input_values_2d[i][0], input_values_2d[i][1], marker='x', color='g') elif contaminated[i] == 1: reg2 = plt.scatter(input_values_2d[i][0], input_values_2d[i][1], marker='o', color='b') plt.xlabel('PC1') plt.ylabel('PC2') plt.grid(True) plt.legend((reg1, reg2), ('Não contaminado', 'Contaminado')) plt.savefig('graphs/pca_graph.png') plt.close() return input_values_2d, predict_values_2d
def normalize_data_3d(): input_values, contaminated = ia_n2020.ioValues() predict_values = ia_n2020.newData() norm = StandardScaler() input_values_norm = norm.fit_transform(input_values) predict_values_norm = norm.fit_transform(predict_values) pca = PCA(n_components=3) input_values_3d = pca.fit_transform(input_values_norm) predict_values_3d = pca.fit_transform(predict_values_norm) return input_values_3d, predict_values_3d
from sklearn import svm import pandas as pd import ia_n2020 import pca_graph input_values, contaminated = ia_n2020.ioValues() predict_values = ia_n2020.newData() input_values_2d, predict_values_2d = pca_graph.normalize_data_2d() input_values_3d, predict_values_3d = pca_graph.normalize_data_3d() # svm classifier with 4 input data # sv_clf = svm.SVC(kernel='linear', C=0.5) sv_clf = svm.SVC(kernel='rbf', gamma=1, C=0.5) # sv_clf = svm.SVC(kernel='poly', degree=3, C=0.5) sv_clf.fit(input_values, contaminated) sv_score = sv_clf.score(input_values, contaminated) print(sv_score) score_sheet = pd.read_csv('output_data/score.csv') score_sheet['sv_score'] = sv_score score_sheet.to_csv('output_data/score.csv', index=False) sv_predict = sv_clf.predict(predict_values) print(sv_predict) spreadsheet = pd.read_csv('output_data/predict_data.csv') spreadsheet['svm_predict'] = sv_predict spreadsheet.to_csv('output_data/predict_data.csv', index=False) # svm classifier with 2D data sv_clf_2d = svm.SVC(kernel='linear', C=0.5) # sv_clf_2d = svm.SVC(kernel='rbf', gamma=1, C=0.5)