예제 #1
0
import data
import helper as h
from sklearn.metrics import classification_report, confusion_matrix, \
                            accuracy_score, precision_score, f1_score, recall_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
# Adição das classes mediante o volume de vendas
data.classification('../files/input/')

# Criação do data frame a partir do ficheiro csv
df = h.get_data('../files/output/classification.data.csv')

# Criação de data frame com base no data frame train,
# sem a coluna 'Weekly_Sales', visto que é o valor que se pretende prever
X = df.drop(columns=['class', 'Unnamed: 0', 'Dept', 'Type', 'Size'])

# Criação de uma lista com os valores da coluna que se pretende prever
y = df['class'].values.tolist()
# Utilização de um módulo da sklearn para dividir o
# data frame train, em train e test, por forma a avaliar a accuracy
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

# Obtenção do melhor valor de K
acc_arr = []
predict_arr = []
conf_matrix = []
class_report = []
prec_score = []
f1 = []
recall = []
for K in range(25):
예제 #2
0
import numpy as np
import neuralNetwork as nn
import data

#-------------------
# 0. ハイパーパラメータの設定
dataType = 5  # データの種類
activeType = 2  # 活性化関数の種類
hDim = 20  # 中間層のノード数
alpha = 1  # 学習率
rate = 0.5  # ノード選択確率(ドロップアウト)
#-------------------

#-------------------
# 1. データの作成
myData = data.classification(negLabel=0, posLabel=1)
myData.makeData(dataType=dataType)
#-------------------

#-------------------
# 2. データを学習と評価用に分割
dtrNum = int(len(myData.X) * 0.9)  # 学習データ数
# 学習データ(全体の90%)
Xtr = myData.X[:dtrNum]
Ytr = myData.Y[:dtrNum]

# 評価データ(全体の10%)
Xte = myData.X[dtrNum:]
Yte = myData.Y[dtrNum:]
#-------------------
예제 #3
0
# -*- coding: utf-8 -*-
import numpy as np
import kernelFunc as kf
import kernelSVM as svm
import data

#-------------------
# 1. データの作成
myData = data.classification(negLabel=-1.0, posLabel=1.0)
myData.makeData(dataType=5)
#-------------------

#-------------------
# 2. データを学習と評価用に分割
dtrNum = int(len(myData.X) * 0.9)  # 学習データ数
# 学習データ(全体の90%)
Xtr = myData.X[:dtrNum]
Ytr = myData.Y[:dtrNum]

# 評価データ(全体の10%)
Xte = myData.X[dtrNum:]
Yte = myData.Y[dtrNum:]
#-------------------

#-------------------
# 3. 標準化
xMean = np.mean(Xtr, axis=0)
xStd = np.std(Xtr, axis=0)
Xtr = (Xtr - xMean) / xStd
Xte = (Xte - xMean) / xStd
#-------------------
예제 #4
0
import data
import helper as h
from sklearn import tree
from sklearn.preprocessing import Imputer

data.classification()

train = h.get_data('../files/output/classification.data.csv')
test = h.get_data('../files/output/test.csv')

# The columns that we will be making predictions with.
x_columns = [
    'Store', 'Dept', 'week_number', 'IsHoliday', 'Type', 'Size', 'Temperature',
    'Fuel_Price', 'CPI', 'Unemployment'
]

# The column that we want to predict.
y_column = ["class"]

# Troca os valores nulos que interromperiam o modelo, pela média
# Esta estratégia foi adotada devido ao facto de se tratar das colunas Unemployment e CPI
imp = Imputer(missing_values='NaN', strategy='mean', axis=0)
imp = imp.fit(test[x_columns])
X_test_imp = imp.transform(test[x_columns])

clf = tree.DecisionTreeClassifier()
clf.fit(X=train[x_columns], y=train[y_column].values.ravel())
clf.feature_importances_

predictions = clf.predict(X_test_imp)
test['class'] = predictions