forked from JiangXiangBo/yolo3_fish_detection
-
Notifications
You must be signed in to change notification settings - Fork 0
/
train.py
193 lines (164 loc) · 7.98 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
import os
import numpy as np
import keras.backend as K
from keras.layers import Input, Lambda
from keras.models import Model
from yolo3.model import preprocess_true_boxes, yolo_body, yolo_loss
from yolo3.utils import get_random_data
def getClassNameList(classFilePath):
with open(classFilePath) as file:
className_list = [k.strip() for k in file.readlines() if k.strip() != '']
return className_list
def getAnchorList(anchorFilePath):
with open(anchorFilePath) as file:
anchor_list = [float(k) for k in file.read().split(',')]
return np.array(anchor_list).reshape(-1, 2)
def main():
classeFilePath = 'model_data/voc_classes.txt'
anchorFilePath = 'model_data/yolo_anchors.txt'
className_list = getClassNameList(classeFilePath)
anchor_list = getAnchorList(anchorFilePath)
# multiple of 32, height and width
input_shape = (416,416)
model = create_model(input_shape, anchor_list, len(className_list))
annotationFilePath = 'dataset_train.txt'
train(model, annotationFilePath, input_shape, anchor_list, len(className_list))
def create_model(input_shape,
anchor_list,
num_classes,
load_pretrained=True,
freeze_body=False,
weights_path='saved_model/trained_weights.h5'):
"""
:param input_shape: 输入图片的尺寸,默认是(416, 416)
:param anchor_list: 默认的9种anchor box,结构是(9, 2)
:param num_classes: 类别个数。在网络中,类别值按0~n排列,同时,输入数据的类别也是用索引表示;
:param load_pretrained:是否使用预训练权重
:param freeze_body: 冻结模式,1或2。其中,1是冻结DarkNet53网络中的层,2是只保留最后3个1x1的卷积层,其余层全部冻结
:param weights_path:
:return:
"""
# get a new session
K.clear_session()
image_input = Input(shape=(None, None, 3))
height, width = input_shape
num_anchors = len(anchor_list) # 9
"""
通过循环,创建3个Input层的列表,作为y_true,其张量(Tensor)结构,如下:
Tensor("input_2:0", shape=(?, 13, 13, 3, 7), dtype=float32)
Tensor("input_3:0", shape=(?, 26, 26, 3, 7), dtype=float32)
Tensor("input_4:0", shape=(?, 52, 52, 3, 7), dtype=float32)
其中,在真值y_true中,第1位是输入的样本数,第2~3位是特征图的尺寸,如13x13,
第4位是每个图中的anchor数,第5位是:类别(n)+4个框值(xywh)+框的置信度(是否含有物体)
"""
y_true = [Input(shape=(height // k,
width // k,
num_anchors // 3,
num_classes + 5)) for k in [32, 16, 8]]
"""
通过传入,输入Input层image_input、每个尺度的anchor数num_anchors//3
和类别数num_classes,构建YOLO v3的网络yolo_body
在model_body中,最终的输入是image_input--(?, 416, 416, 3)
最终的输出output是3个矩阵的列表
[(?, 13, 13, 3,(2+5)), (?, 26, 26, 3, 7), (?, 52, 52, 3, 7)]
"""
model_body = yolo_body(image_input, num_anchors//3, num_classes)
print('Create YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes))
if load_pretrained and os.path.exists(weights_path):
# 加载模型权重
model_body.load_weights(weights_path, by_name=True, skip_mismatch=True)
print('Load weights {}.'.format(weights_path))
if freeze_body:
# freeze_body=False 所以不冻结,重头训练
num = len(model_body.layers)-7
for i in range(num):
model_body.layers[i].trainable = False
print('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers)))
"""
构建模型的损失层model_loss,其内容如下:
Lambda是Keras的自定义层,输入为model_body.output和y_true,输出output_shape是(1,),即一个损失值;
自定义Lambda层的名字name为yolo_loss;
层的参数是锚框列表anchors、类别数num_classes和IoU阈值ignore_thresh。
其中,ignore_thresh用于在物体置信度损失(object confidence loss)中过滤IoU较小的框
"""
model_loss = Lambda(yolo_loss,
output_shape=(1,),
name='yolo_loss',
arguments={'anchors': anchor_list,
'num_classes': num_classes,
'ignore_thresh': 0.5
}
)([*model_body.output, *y_true])
"""
构建完整的算法模型,步骤如下:
模型的输入层:model_body的输入(即image_input)和真值y_true;
模型的输出层:自定义的model_loss层,其输出是一个损失值(None,1);
model_body.input是任意(?)个(416,416,3)的图片;y_true是已标注数据所转换的真值结构。
"""
model = Model([model_body.input, *y_true], model_loss)
return model
def train(model,
annotationFilePath,
input_shape,
anchor_list,
num_classes,
logDirPath='saved_model/'):
model.compile(optimizer='adam',
loss={'yolo_loss': lambda y_true, y_pred: y_pred})
batch_size = 2 * num_classes
val_split = 0.05
with open(annotationFilePath) as file:
lines = file.readlines()
np.random.shuffle(lines)
num_val = int(len(lines)*val_split)
num_train = len(lines) - num_val
print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
# 在训练中,模型调用fit_generator方法,按批次创建数据,输入模型,进行训练
model.fit_generator(
data_generator(lines[:num_train], batch_size, input_shape, anchor_list, num_classes),
steps_per_epoch=max(1, num_train // batch_size),
validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchor_list, num_classes),
validation_steps=max(1, num_val // batch_size),
epochs=200,
initial_epoch=0)
# when model training finished, save model
if not os.path.isdir(logDirPath):
os.makedirs(logDirPath)
model_savedPath = os.path.join(logDirPath, 'trained_weights.h5')
model.save_weights(model_savedPath)
def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes):
"""
annotation_lines:标注数据的行,每行数据包含图片路径,和框的位置信息
在第0次时,将数据洗牌shuffle,调用get_random_data解析annotation_lines[i],
生成图片image和标注框box,添加至各自的列表image_data和box_data中。
"""
n = len(annotation_lines)
np.random.shuffle(annotation_lines)
i = 0
while True:
image_data = []
box_data = []
for b in range(batch_size): # 4
i %= n
image, box = get_random_data(annotation_lines[i], input_shape, random=True)
image_data.append(image)
box_data.append(box)
i += 1
"""
索引值递增i+1,当完成n个一轮之后,重新将i置0,再次调用shuffle洗牌数据。
将image_data和box_data都转换为np数组,其中:
image_data: (4, 416, 416, 3)
box_data: (4, 20, 5) # 每个图片最多含有20个框
"""
image_data = np.array(image_data)
box_data = np.array(box_data)
"""
将框的数据box_data、输入图片尺寸input_shape、anchor box列表anchors和类别数num_classes
转换为真值y_true,其中y_true是3个预测特征的列表:
[(4, 13, 13, 3, 7), (4, 26, 26, 3, 7), (4, 52, 52, 3, 7)]
"""
y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes)
# 最终输出:图片数据image_data、真值y_true、每个图片的损失值np.zeros
yield [image_data, *y_true], np.zeros(batch_size)
if __name__ == '__main__':
main()