Exemple #1
0
x_train = x_train.reshape(len(x_train),1)
y_train = train.loc[:,'Direction']

test    = Weekly[Weekly['Year'] >= 2009] 
x_test  = test.iloc[:,3]
x_test  = x_test.reshape(len(x_test),1)
y_test  = test.loc[:,'Direction']

glm2      = LogisticRegression()
glm2.pred = glm2.fit(x_train, y_train).predict(x_test)
print(pd.DataFrame(confusion_matrix(y_test, glm2.pred), index=['y=0', 'y=1'], columns=['y_pred=0', 'y_pred=1']))
print('error rate: ', accuracy_score(y_test, glm2.pred)) # 62.5%

# (e) 
lda      = LinearDiscriminantAnalysis() 
lda.pred = lda.fit(x_train, y_train).predict(x_test)
print(pd.DataFrame(confusion_matrix(y_test, lda.pred), index=['y=0', 'y=1'], columns=['y_pred=0', 'y_pred=1']))
print('error rate: ', accuracy_score(y_test, lda.pred)) # 62.5%

# (f) 
qda      = QuadraticDiscriminantAnalysis()
qda.pred = qda.fit(x_train, y_train).predict(x_test)
print(pd.DataFrame(confusion_matrix(y_test, qda.pred), index=['y=0', 'y=1'], columns=['y_pred=0', 'y_pred=1']))
print('error rate: ', accuracy_score(y_test, qda.pred)) # 58.7%

# (g)
knn        = KNeighborsClassifier(n_neighbors=1)
knn.pred   = knn.fit(x_train, y_train).predict(x_test) 
print('error rate: ', accuracy_score(y_test, knn.pred)) # 49%

# (h): Logistic and LDA models are the best.