/
keras_tools.py
208 lines (174 loc) · 7.68 KB
/
keras_tools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
# Deep learning lab course final project.
# Kaggle whale classification.
# Helper functions for the main keras model.
import datetime
import os
import numpy as np
from keras.applications.inception_v3 import InceptionV3
from keras.preprocessing import image
import keras.utils
import utilities as ut
def get_run_name(prefix="run", additional=""):
return "_".join([prefix,
datetime.datetime.now().strftime("%Y-%m-%d-%H%M%S"),
additional])
def save_learning_curves(history, run_name, base_path="plots/"):
"""Saves the data from keras history dict in loss and accuracy graphs to folder
specified by base_path and run_name."""
path = os.path.join(base_path, run_name)
if not os.path.isdir(path):
os.makedirs(path)
losses = {k: history[k] for k in ['loss', 'val_loss']}
accuracies = {k: history[k] for k in ['acc', 'val_acc']}
x = range(len(losses['loss']))
fn_losses = os.path.join(path, "loss.png")
fn_accuracies = os.path.join(path, "accuracy.png")
ut.save_plot(x, ys=losses, xlabel="epoch", ylabel="loss",
title=run_name, path=fn_losses)
ut.save_plot(x, ys=accuracies, xlabel="epoch", ylabel="accuracy",
title=run_name, path=fn_accuracies)
def save_learning_curves_2(history, cnn_after, run_name, base_path="plots/"):
"""Saves the data from keras history dict in loss and accuracy graphs to folder
specified by base_path and run_name."""
path = os.path.join(base_path, run_name)
if not os.path.isdir(path):
os.makedirs(path)
# losses = {k: history[k] for k in ['loss', 'val_loss']}
accuracies = {k: history[k] for k in ['val_acc','acc']}
accuracies = {k: history[k] for k in ['val_acc']}
x = range(len(accuracies['val_acc']))
# fn_losses = os.path.join(path, "loss.png")
fn_accuracies = os.path.join(path, "accuracy.png")
ut.save_plot_2(cnn_after, x, ys=accuracies, xlabel="epoch", ylabel="accuracy",
title=run_name, path=fn_accuracies)
def draw_num_classes_graphs():
print("Will likely not work because")
print("keras_tools.draw_num_classes_graphs() was not yet adapted")
print("to the usage of config_dict in keras_model.py")
"""Train network and save learning curves for different values for num_classes."""
values = [10, 50, 100, 250, 1000, 4000]
for num_classes in values:
print("Training model on {} most common classes.".format(num_classes))
model = create_pretrained_model(num_classes=num_classes)
histories = train(model, num_classes, epochs=50)
run_name = get_run_name("{}classes".format(num_classes))
save_learning_curves(histories, run_name)
csv_path = os.path.join("plots/", run_name, "data.csv")
ut.write_csv_dict(histories,
keys=['loss', 'acc', 'val_loss', 'val_acc'],
filename=csv_path)
def visualize_model(model=None,
filename="InceptionV3_visualization.png",
show_shapes=False):
"""
Write graph visualization of Keras Model to file.
Default model is InceptionV3
"""
if model is None:
model = InceptionV3(weights='imagenet', include_top=False)
else:
model = model
keras.utils.print_summary(model)
print("---")
print("len(model.layers)", len(model.layers))
print("saveing graph visualization to file")
keras.utils.plot_model(model, show_shapes=show_shapes, to_file=filename)
print("saved graph visualization to file")
def compute_preds(model, num_classes, train_dir = "data/model_train",
test_dir = "data/model_valid", test_csv = "data/model_valid.csv"):
batch_size = 16 # used for training as well as validation
max_preds = 5 # number of ranked predictions (default 5)
if model.name == 'InceptionV3' or model.name == 'Xception' or model.name == 'InceptionResNetV2':
target_size = (299, 299)
elif model.name == 'ResNet50' or model.name == 'MobileNet':
target_size = (224, 224)
else:
print("invalid model: ", model.name)
print("training model", model.name)
'''
num_train_imgs, num_valid_imgs = ut.create_small_case(
sel_whales = np.arange(1,num_classes+1), # whales to be considered
all_train_dir = all_train_dir,
all_train_csv = all_train_csv,
train_dir = test_dir,
train_csv = test_csv,
valid_dir = None, # no validation, copy all data into test_dir "data/model_test"
valid_csv = None,
train_valid = 1.,
sub_dirs = True)
'''
test_gen = image.ImageDataGenerator(
rescale = 1./255,
fill_mode = "nearest")
test_flow = test_gen.flow_from_directory(
test_dir,
shuffle=False,
batch_size = batch_size,
target_size = target_size,
class_mode = None) # use "categorical" ??
preds = model.predict_generator(test_flow, verbose = 1)
# whale_class_map = (test_flow.class_indices) # get dict mapping whalenames --> class_no
class_whale_map = ut.make_label_dict(directory=train_dir) # get dict mapping class_no --> whalenames
'''
print("whale_class_map:")
print(whale_class_map)
print("class_whale_map:")
print(class_whale_map)
print("preds.shape:")
print(preds.shape)
print("preds[:10]")
print(preds[:10])
'''
# get list of model predictions: one ordered list of maxpred whalenames per image
top_k = preds.argsort()[:, -max_preds:][:, ::-1]
model_preds = [([class_whale_map[i] for i in line]) for line in top_k]
# get list of true labels: one whalename per image
true_labels = []
file_names = []
if test_csv != '':
test_list = ut.read_csv(file_name = test_csv) # list with (filename, whalename)
i = 0
for fn in test_flow.filenames:
if i<3:
print("fn",fn)
i=i+1
offset, directory, filename = fn.split('/')
file_names.append(filename)
if test_csv != '':
whale = [line[1] for line in test_list if line[0]==filename][0]
true_labels.append(whale)
return file_names, model_preds, true_labels
def write_pred_to_csv(file_names, model_preds, path = "data/submission.csv"):
csv_list = []
for i in range(len(model_preds)):
csv_row = ['','']
csv_row[0] = file_names[i]
s = 'new_whale' # string containing the five whale names separated by blanks
for j in range(len(model_preds[i])-1): # run over 5 ordered predictions
# if j>0:
s = s + ' '
s = s + model_preds[i][j]
# print("next_s", s)
csv_row[1] = s
csv_list.append(csv_row)
# print("csv_list", csv_list)
print("write csv file")
ut.write_csv(csv_list, path)
print("done writing csv file")
# perform prediction on validation data, compare with true labels and compute acc and MAP
def compute_map(model_preds, true_labels):
max_preds = len(model_preds[0])
print("max_preds", max_preds)
# print("model predictions: \n", np.array(model_preds)[0:10])
# print("true labels \n", np.array(true_labels)[0:10])
# compute accuracy by hand
TP_List = [(1 if model_preds[i][0]==true_labels[i] else 0) for i in range(len(true_labels))]
acc = np.sum(TP_List) / len(true_labels)
print("{} true predictions out of {}: accurracy: {} ".format(np.sum(TP_List),len(true_labels),acc))
MAP = ut.mean_average_precision(model_preds, true_labels, max_preds)
print("MAP", MAP)
return MAP
if __name__ == "__main__":
import sys
if "--visualize_inceptionV3" in sys.argv:
visualize_model()