/
runExperiment.py
87 lines (76 loc) · 2.31 KB
/
runExperiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
f_in_trn = 'Data/images_train'
f_in_tst = 'Data/images_test'
f_in_sol = 'Data/train_solutions.csv'
f_in_flat_trn = 'Data/train_.csv'
f_in_flat_tst = 'Data/test_.csv'
f_out_trn = 'Data/train_32_deskew.csv'
f_out_tst = 'Data/test_32_deskew.csv'
f_out_subm = 'Submissions/ls_32_deskew.csv'
# Process images
from readData import readData
(Xtrn, Ytrn, Xtst) = readData(f_in_trn, f_in_tst, f_in_sol, augmenting=False)
from saveData import saveData
saveData((Xtrn, Xtst), (f_out_trn, f_out_tst), colfmt='%.18e')
# Load processed images from flat file, on disk
'''
from loadData import loadData
Xtrn = loadData(f_in_flat_trn, rowskip=0)
Xtst = loadData(f_in_flat_tst, rowskip=0)
tst = loadData(f_in_flat_tst, rowskip=0)
Ytrn = loadData(f_in_sol, rowskip=1)
'''
# Fit OLS
'''
from sklearn import linear_model
model = linear_model.LinearRegression()
model.fit(Xtrn, Ytrn[::, 1:])
Ytst = model.predict(Xtst)
'''
# Fit RF
'''
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor()
model.fit(Xtrn, Ytrn[::, 1:])
Ytst = model.predict(Xtst)
'''
# Fit Extra Trees (More random)
'''
seed = 0
from sklearn.ensemble import ExtraTreesRegressor
# n_estimators=10 : 0.14191 [sqrt] [32]
# n_estimators=10 : 0.14185 [sqrt] [32c]
# n_estimators=10 : 0.13998 [sqrt] [64]
# n_estimators=50 : 0.12859 [None] [64c]
# n_estimators=50 : 0.13590 [sqrt]
# n_estimators=50 : 0.13081 [None]
# n_estimators=500 : 0.12954 [None]
# n_estimators=500 : 0.12935 [None]
model = ExtraTreesRegressor(n_estimators=500, max_features=None,
random_state=seed, verbose=True,
oob_score=True, bootstrap=True,
n_jobs=1)
model.fit(Xtrn, Ytrn[::, 1:])
Ytst = model.predict(Xtst)
'''
# Fit gradient boosting
from sklearn.ensemble import GradientBoostingRegressor
model = GradientBoostingRegressor()
model.fit(Xtrn, Ytrn[::, 1:])
Ytst = model.predict(Xtst)
# Plot learning curve
'''
from plotLearningCurve import plotLearningCurve
plotLearningCurve(Xtrn, Ytrn, model)
'''
# Fit Ridge
'''
model = linear_model.RidgeCV(alphas = [0, 25, 50, 100])
model.fit(Xtrn, Ytrn[::, 1:])
my_alpha = model.alpha_
model = linear_model.Ridge(alpha = my_alpha)
model.fit(Xtrn, Ytrn[::, 1:])
Ytst = model.predict(Xtst)
'''
# Save submission to disk
from saveSubmission import *
saveSubmission(Ytst, f_in_tst, f_out_subm)