-
Notifications
You must be signed in to change notification settings - Fork 0
/
grid.py
96 lines (72 loc) · 2.66 KB
/
grid.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
print __doc__
# Author: Olivier Grisel <olivier.grisel@ensta.org>
# Peter Prettenhofer <peter.prettenhofer@gmail.com>
# Mathieu Blondel <mathieu@mblondel.org>
# License: Simplified BSD
from pprint import pprint
from time import time
import logging
import pickle
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.grid_search import GridSearchCV
from sklearn.pipeline import Pipeline
import featureGeneration as fg
import utils
from sklearn.decomposition import FastICA
import numpy as np
# Display progress logs on stdout
logging.basicConfig(level=logging.INFO,
format='%(asctime)s %(levelname)s %(message)s')
###############################################################################
#data
path = '/local/attale00/AFLW_ALL/'
path_ea = '/local/attale00/AFLW_cropped/cropped2/'
#
fileNames = utils.getAllFiles(path_ea);
labs=utils.parseLabelFiles(path+'/labels/labels','mouth_opening',fileNames,cutoffSeq='.png',suffix='_face0.labels')
testSet = fg.dataContainer(labs)
roi=(0,37,0,115)
X=fg.getAllImagesFlat(path_ea,testSet.fileNames,(37,115),roi=roi)
#
# perform ICA
ica = FastICA(n_components=100,whiten=True)
ica.fit(X)
meanI=np.mean(X,axis=0)
X1=X-meanI
data=ica.transform(X1)
filters=ica.components_
for i in range(len(fileNames)):
testSet.data[i].extend(data[i,:])
testSet.targetNum=map(utils.mapMouthLabels2Two,testSet.target)
###############################################################################
# define a pipeline combining a text feature extractor with a simple
# classifier
clf = RandomForestClassifier()
parameters = {'n_estimators': range(10, 40,20),
'max_depth': range(5, 40,5),
'min_samples_split':range(5,40,2),
'max_features':range(1,30,2),
'min_samples_leaf':range(1,11,2)}
#if __name__ == "__main__":
# multiprocessing requires the fork to happen in a __main__ protected
# block
# find the best parameters for both the feature extraction and the
# classifier
grid_search = GridSearchCV(clf, parameters, n_jobs=-1, verbose=1)
print "Performing grid search..."
print "parameters:"
pprint(parameters)
t0 = time()
grid_search.fit(data, np.array(testSet.targetNum))
print "done in %0.3fs" % (time() - t0)
print
print "Best score: %0.3f" % grid_search.best_score_
try:
print grid_search.best_estimator_
except E:
print 'printing did not work'
pickle.dump(grid_search.best_score_,open('/local/attale00/clf','w'))