/
validation_pilotaz.py
116 lines (76 loc) · 3.49 KB
/
validation_pilotaz.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#python3
import numpy as np
from sklearn import cross_validation
import pandas as pd
def cross_validation(X, Y):
from sklearn.metrics import mean_squared_error as mse
from sklearn import cross_validation, linear_model
from math import exp, log
from sklearn.neighbors import NearestNeighbors as NN
from sklearn.neighbors import KNeighborsClassifier as KNC
from sklearn.neighbors import KNeighborsRegressor as KNR
N = 31
resultsX = []
resultsId = []
resultsRegr = []
resultsRegr_ey = []
resultsRegr_log = []
results1NN, results2NN, results3NN = [], [], []
resultsY = []
kf = cross_validation.KFold(N, n_folds=10, random_state=True)
#kf = cross_validation.LeaveOneOut(N) #LeaveOneOut == KFold(n, n_folds=n)
for train_index, test_index in kf:
X_train, Y_train = X[train_index], Y[train_index]
X_test, Y_test = X[test_index], Y[test_index]
print(X_train, X_test)
resultsX.append(X_test)
resultsId.append(Y_test)
resultsY.append(Y_test)
X_train_list = [[x] for x in X_train]
regr = linear_model.LinearRegression().fit(X_train_list, Y_train)
resultsRegr += [[float(regr.predict(x)) for x in X_test]]
regr_ey = linear_model.LinearRegression().fit(np.exp(X_train_list), Y_train)
resultsRegr_ey += [[float(regr_ey.predict(np.exp(x))) for x in X_test]]
# X_regr_ey = []
# for x in X_test:
# x_regr = float(regr_ey.predict(x))
# if (x_regr >= 1):
# X_regr_ey.append(log(x_regr))
# else:
# X_regr_ey.append(1)
# resultsRegr_ey += [X_regr_ey]
# resultsRegr_ey += [[log(float(regr_ey.predict(x))) for x in X_test]]
# resultsRegr_ey += [[log(abs(float(regr_ey.predict(x)))) for x in X_test]]
regr_log = linear_model.LinearRegression().fit(np.log(X_train_list), Y_train)
resultsRegr_log += [[float(regr_log.predict(np.log(x))) for x in X_test]]
#Y_train = np.asarray(Y_train, dtype="|S6")
nb1NN =KNR(n_neighbors=1, algorithm='ball_tree').fit(X_train_list, Y_train)
results1NN += [[float(nb1NN.predict(x)[0]) for x in X_test]]
nb2NN =KNR(n_neighbors=2, algorithm='ball_tree').fit(X_train_list, Y_train)
results2NN += [[float(nb2NN.predict(x)[0]) for x in X_test]]
nb3NN =KNR(n_neighbors=3, algorithm='ball_tree').fit(X_train_list, Y_train)
results3NN += [[float(nb3NN.predict(x)[0]) for x in X_test]]
#return [mse(resultsX, resultsId), mse(resultsX, resultsRegr), mse(resultsX, resultsRegr_ey), mse(resultsX, resultsRegr_log), mse(resultsX, results1NN), mse(resultsX, results2NN), mse(resultsX, results3NN)]
return [mse(resultsX, resultsId), mse(resultsY, resultsRegr), mse(resultsY, resultsRegr_ey), mse(resultsY, resultsRegr_log), mse(resultsY, results1NN), mse(resultsY, results2NN), mse(resultsY, results3NN)]
import getdata_script as gd
from os import listdir
from os.path import isfile, join
# df = gd.GetData(gd.f)
MSEId, MSERegr, MSERegr_ey, MSE_log, MSE1NN, MSE2NN, MSE3NN = 0, 0, 0, 0, 0, 0, 0
MSE = [0, 0, 0, 0, 0, 0, 0]
path = 'data_all/'
#onlyfiles = [f for f in listdir(path) if isfile(join(path, f))]
#path = ''
onlyfiles1 = ['aaa', 'data_test.csv']
onlyfiles = ['pilotaz'+str(i+1)+'.csv' for i in range(5)]
print(onlyfiles)
for file in onlyfiles:
DF = gd.get_data(path+file)
CV = cross_validation((DF['stimulus']), (DF['converted']))
MSE2 = MSE
MSE = [MSE2[i] + CV[i] for i in range(len(CV))]
l = len(onlyfiles)
msn_labels = ['MSEId:', 'MSERegr:', 'MSERegr_ey:', 'MSE_log:', 'MSE1NN:', 'MSE2NN:', 'MSE3NN:']
for index, mse in enumerate(MSE):
print(msn_labels[index] + " ", mse/l)
# print(cross_validation((df['stimulus']), (df['converted'])))