import data import helper as h from sklearn.metrics import classification_report, confusion_matrix, \ accuracy_score, precision_score, f1_score, recall_score from sklearn.neighbors import KNeighborsClassifier from sklearn.model_selection import train_test_split # Adição das classes mediante o volume de vendas data.classification('../files/input/') # Criação do data frame a partir do ficheiro csv df = h.get_data('../files/output/classification.data.csv') # Criação de data frame com base no data frame train, # sem a coluna 'Weekly_Sales', visto que é o valor que se pretende prever X = df.drop(columns=['class', 'Unnamed: 0', 'Dept', 'Type', 'Size']) # Criação de uma lista com os valores da coluna que se pretende prever y = df['class'].values.tolist() # Utilização de um módulo da sklearn para dividir o # data frame train, em train e test, por forma a avaliar a accuracy X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) # Obtenção do melhor valor de K acc_arr = [] predict_arr = [] conf_matrix = [] class_report = [] prec_score = [] f1 = [] recall = [] for K in range(25):
import numpy as np import neuralNetwork as nn import data #------------------- # 0. ハイパーパラメータの設定 dataType = 5 # データの種類 activeType = 2 # 活性化関数の種類 hDim = 20 # 中間層のノード数 alpha = 1 # 学習率 rate = 0.5 # ノード選択確率(ドロップアウト) #------------------- #------------------- # 1. データの作成 myData = data.classification(negLabel=0, posLabel=1) myData.makeData(dataType=dataType) #------------------- #------------------- # 2. データを学習と評価用に分割 dtrNum = int(len(myData.X) * 0.9) # 学習データ数 # 学習データ(全体の90%) Xtr = myData.X[:dtrNum] Ytr = myData.Y[:dtrNum] # 評価データ(全体の10%) Xte = myData.X[dtrNum:] Yte = myData.Y[dtrNum:] #-------------------
# -*- coding: utf-8 -*- import numpy as np import kernelFunc as kf import kernelSVM as svm import data #------------------- # 1. データの作成 myData = data.classification(negLabel=-1.0, posLabel=1.0) myData.makeData(dataType=5) #------------------- #------------------- # 2. データを学習と評価用に分割 dtrNum = int(len(myData.X) * 0.9) # 学習データ数 # 学習データ(全体の90%) Xtr = myData.X[:dtrNum] Ytr = myData.Y[:dtrNum] # 評価データ(全体の10%) Xte = myData.X[dtrNum:] Yte = myData.Y[dtrNum:] #------------------- #------------------- # 3. 標準化 xMean = np.mean(Xtr, axis=0) xStd = np.std(Xtr, axis=0) Xtr = (Xtr - xMean) / xStd Xte = (Xte - xMean) / xStd #-------------------
import data import helper as h from sklearn import tree from sklearn.preprocessing import Imputer data.classification() train = h.get_data('../files/output/classification.data.csv') test = h.get_data('../files/output/test.csv') # The columns that we will be making predictions with. x_columns = [ 'Store', 'Dept', 'week_number', 'IsHoliday', 'Type', 'Size', 'Temperature', 'Fuel_Price', 'CPI', 'Unemployment' ] # The column that we want to predict. y_column = ["class"] # Troca os valores nulos que interromperiam o modelo, pela média # Esta estratégia foi adotada devido ao facto de se tratar das colunas Unemployment e CPI imp = Imputer(missing_values='NaN', strategy='mean', axis=0) imp = imp.fit(test[x_columns]) X_test_imp = imp.transform(test[x_columns]) clf = tree.DecisionTreeClassifier() clf.fit(X=train[x_columns], y=train[y_column].values.ravel()) clf.feature_importances_ predictions = clf.predict(X_test_imp) test['class'] = predictions