-
Notifications
You must be signed in to change notification settings - Fork 0
/
pose_detector.py
374 lines (311 loc) · 18.7 KB
/
pose_detector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
import cv2
import time
import argparse
import numpy as np
import matplotlib.pyplot as plt
from scipy.ndimage.filters import gaussian_filter
import chainer
from chainer import cuda, serializers, functions as F
from entity import params, JointType
from models.CocoPoseNet import CocoPoseNet
chainer.using_config('enable_backprop', False)
class PoseDetector(object):
def __init__(self, arch=None, weights_file=None, model=None, device=-1):
# test
# self.model = params['archs']['nn1']()
# serializers.load_npz('result/nn1/model_iter_50000', self.model)
print('Loading PoseNet...')
self.model = params['archs']['posenet']()
serializers.load_npz('models/coco_posenet.npz', self.model)
# if model is not None:
# self.model = model
# else:
# # load model
# print('Loading PoseNet...')
# self.model = params['archs'][arch]()
# if weights_file:
# serializers.load_npz(weights_file, self.model)
self.device = device
if self.device >= 0:
cuda.get_device_from_id(device).use()
self.model.to_gpu()
# create gaussian filter
ksize = params['ksize']
kernel = cuda.to_gpu(self.create_gaussian_kernel(sigma=params['gaussian_sigma'], ksize=ksize))
self.gaussian_kernel = kernel
# compute gaussian filter
def create_gaussian_kernel(self, sigma=1, ksize=5):
center = int(ksize / 2)
kernel = np.zeros((1, 1, ksize, ksize), dtype=np.float32)
for y in range(ksize):
distance_y = abs(y-center)
for x in range(ksize):
distance_x = abs(x-center)
kernel[0][0][y][x] = 1/(sigma**2 * 2 * np.pi) * np.exp(-(distance_x**2 + distance_y**2)/(2 * sigma**2))
return kernel
def compute_optimal_size(self, orig_img, img_size):
"""画像のサイズが幅と高さが8の倍数になるように調節する"""
orig_img_h, orig_img_w, _ = orig_img.shape
aspect = orig_img_h / orig_img_w
if orig_img_h < orig_img_w:
img_h = img_size
img_w = np.round(img_size / aspect).astype(int)
surplus = img_w % 8
if surplus != 0:
img_w += 8 - surplus
else:
img_w = img_size
img_h = np.round(img_size * aspect).astype(int)
surplus = img_h % 8
if surplus != 0:
img_h += 8 - surplus
return (img_w, img_h)
def compute_peaks_from_heatmaps(self, heatmaps):
peak_counter = 0
all_peaks = []
xp = cuda.get_array_module(heatmaps)
if xp == np:
for i in range(heatmaps.shape[0] - 1):
heatmap = gaussian_filter(heatmaps[i], sigma=params['gaussian_sigma'])
map_left = xp.zeros(heatmap.shape)
map_right = xp.zeros(heatmap.shape)
map_top = xp.zeros(heatmap.shape)
map_bottom = xp.zeros(heatmap.shape)
map_left[1:, :] = heatmap[:-1, :]
map_right[:-1, :] = heatmap[1:, :]
map_top[:, 1:] = heatmap[:, :-1]
map_bottom[:, :-1] = heatmap[:, 1:]
peaks_binary = xp.logical_and.reduce((heatmap >= map_left, heatmap >= map_right, heatmap >= map_top, heatmap >= map_bottom, heatmap > params['heatmap_peak_thresh']))
peaks = zip(xp.nonzero(peaks_binary)[1], xp.nonzero(peaks_binary)[0]) # [(x, y), (x, y)...]のpeak座標配列
peaks_with_score = [peak_pos + (heatmap[peak_pos[1], peak_pos[0]],) for peak_pos in peaks] # [(x, y, score), (x, y, score)...]のpeak配列 scoreはheatmap上のscore
peaks_id = range(peak_counter, peak_counter + len(peaks_with_score))
peaks_with_score_and_id = [peaks_with_score[i] + (peaks_id[i], ) for i in range(len(peaks_id))] # [(x, y, score, id), (x, y, score, id)...]のpeak配列
peak_counter += len(peaks_with_score_and_id)
all_peaks.append(peaks_with_score_and_id)
else:
heatmaps = F.convolution_2d(heatmaps[:, None], self.gaussian_kernel, stride=1, pad=int(params['ksize']/2)).data.squeeze()
left_heatmaps = xp.zeros(heatmaps.shape)
right_heatmaps = xp.zeros(heatmaps.shape)
top_heatmaps = xp.zeros(heatmaps.shape)
bottom_heatmaps = xp.zeros(heatmaps.shape)
left_heatmaps[:, 1:, :] = heatmaps[:, :-1, :]
right_heatmaps[:, :-1, :] = heatmaps[:, 1:, :]
top_heatmaps[:, :, 1:] = heatmaps[:, :, :-1]
bottom_heatmaps[:, :, :-1] = heatmaps[:, :, 1:]
peaks_binary = xp.logical_and(heatmaps >= left_heatmaps, heatmaps >= right_heatmaps)
peaks_binary = xp.logical_and(peaks_binary, heatmaps >= top_heatmaps)
peaks_binary = xp.logical_and(peaks_binary, heatmaps >= bottom_heatmaps)
peaks_binary = xp.logical_and(peaks_binary, heatmaps >= params['heatmap_peak_thresh'])
for ch, peaks_binary_per_ch in enumerate(peaks_binary[:-1]):
heatmap = heatmaps[ch]
peaks = zip(xp.nonzero(peaks_binary_per_ch)[1], xp.nonzero(peaks_binary_per_ch)[0])
peaks_with_score = [peak_pos + (heatmap[peak_pos[1], peak_pos[0]],) for peak_pos in peaks] # [(x, y, score), (x, y, score)...]のpeak配列 scoreはheatmap上のscore
peaks_id = range(peak_counter, peak_counter + len(peaks_with_score))
peaks_with_score_and_id = np.array([peaks_with_score[i] + (peaks_id[i],) for i in range(len(peaks_id))], dtype=np.float32) # [(x, y, score, id), (x, y, score, id)...]のpeak配列
peak_counter += len(peaks_with_score_and_id)
all_peaks.append(peaks_with_score_and_id)
return all_peaks
def extract_paf_in_points(self, paf, points):
paf_in_edge = []
for point in points:
point_x = int(round(point[0]))
point_y = int(round(point[1]))
paf_in_edge.append([paf[0, point_y, point_x], paf[1, point_y, point_x]])
return paf_in_edge
def compute_candidate_connections_greedy(self, paf, cand_a, cand_b, img_len, params):
candidate_connections = []
for index_a, joint_a in enumerate(cand_a):
for index_b, joint_b in enumerate(cand_b): # jointは(x, y)座標
vec = np.subtract(joint_b[:2], joint_a[:2])
vec_len = np.linalg.norm(vec)
if vec_len == 0:
continue
vec_unit = vec / vec_len
integ_points = zip(
np.linspace(joint_a[0], joint_b[0], num=params['n_integ_points']),
np.linspace(joint_a[1], joint_b[1], num=params['n_integ_points'])
) # joint_aとjoint_bの2点間を結ぶ線分上の座標点 [[x1, y1], [x2, y2]...]
paf_in_edge = self.extract_paf_in_points(paf, integ_points)
inner_products = np.dot(paf_in_edge, vec_unit)
integ_value = np.sum(inner_products) / len(inner_products)
integ_value_with_dist_prior = integ_value + min(params['length_penalty_ratio'] * img_len / vec_len - 1, 0) # vectorの長さが1以上の時にペナルティを与える(0 ~ 0.75、長いほどペナルティが大きい)
n_valid_points = len(np.nonzero(inner_products > params['inner_product_thresh'])[0])
if n_valid_points > params['n_integ_points_thresh'] and integ_value_with_dist_prior > 0:
candidate_connections.append([index_a, index_b, integ_value_with_dist_prior, integ_value_with_dist_prior + joint_a[2] + joint_b[2]])
candidate_connections = sorted(candidate_connections, key=lambda x: x[2], reverse=True)
return candidate_connections
def compute_connections(self, pafs, all_peaks, all_peaks_flatten, img_len, params):
all_connections = []
for i in range(len(params['limbs_point'])):
paf_index = [i * 2, i * 2 + 1]
paf = pafs[paf_index]
limb_point = params['limbs_point'][i]
cand_a = all_peaks[limb_point[0]]
cand_b = all_peaks[limb_point[1]]
if len(cand_a) > 0 and len(cand_b) > 0:
candidate_connections = self.compute_candidate_connections_greedy(paf, cand_a, cand_b, img_len, params)
connections = np.zeros((0, 5))
for c in candidate_connections:
index_a, index_b, score = c[0:3]
if index_a not in connections[:, 3] and index_b not in connections[:, 4]:
connections = np.vstack([connections, [cand_a[index_a][3], cand_b[index_b][3], score, index_a, index_b]])
if len(connections) >= min(len(cand_a), len(cand_b)):
break
all_connections.append(connections)
else:
all_connections.append(np.zeros((0, 5)))
return all_connections
def grouping_key_points(self, all_connections, candidate_peaks, params):
subsets = -1 * np.ones((0, 20))
for connection_category_index in range(len(params['limbs_point'])):
paf_index = [connection_category_index * 2, connection_category_index * 2 + 1]
joint_a_indices = all_connections[connection_category_index][:, 0]
joint_b_indices = all_connections[connection_category_index][:, 1]
joint_category_a_index, joint_category_b_index = params['limbs_point'][connection_category_index] # カテゴリのindex
for connection_index, _ in enumerate(all_connections[connection_category_index]):
joint_found_cnt = 0
joint_found_subset_index = [-1, -1]
for subset_index, subset in enumerate(subsets):
# そのconnectionのjointをもってるsubsetがいる場合
if subset[joint_category_a_index] == joint_a_indices[connection_index] or subset[joint_category_b_index] == joint_b_indices[connection_index]:
joint_found_subset_index[joint_found_cnt] = subset_index
joint_found_cnt += 1
if joint_found_cnt == 1: # そのconnectionのどちらかのjointをsubsetが持っている場合
found_subset = subsets[joint_found_subset_index[0]]
# 肩->耳のconnectionの組合せを除いて、始点の一致しか起こり得ない。肩->耳の場合、終点が一致していた場合は、既に顔のbone検出済みなので処理不要。
if(found_subset[joint_category_b_index] != joint_b_indices[connection_index]):
found_subset[joint_category_b_index] = joint_b_indices[connection_index]
found_subset[-1] += 1 # increment joint count
# joint bのscoreとconnectionの積分値を加算
found_subset[-2] += candidate_peaks[joint_b_indices[connection_index].astype(int), 2] + all_connections[connection_category_index][connection_index][2]
elif joint_found_cnt == 2: # subset1にjoint1が、subset2にjoint2がある場合(肩->耳のconnectionの組合せした起こり得ない)
found_subset_1 = subsets[joint_found_subset_index[0]]
found_subset_2 = subsets[joint_found_subset_index[1]]
membership = ((found_subset_1 >= 0).astype(int) + (found_subset_2 >= 0).astype(int))[:-2]
if len(np.nonzero(membership == 2)[0]) == 0: # merge two subsets when no duplication
found_subset_1[:-2] += found_subset_2[:-2] + 1 # default is -1
found_subset_1[-2:] += found_subset_2[-2:]
found_subset_1[-2:] += all_connections[connection_category_index][connection_index][2] # connectionの積分値のみ加算(jointのscoreはmerge時に全て加算済み)
subsets = np.delete(subsets, joint_found_subset_index[1], 0)
else:
pass
# found_subset_1[joint_category_b_index] = joint_b_indices[connection_index]
# found_subset_1[-1] += 1 # increment joint count
# found_subset_1[-2] += candidate_peaks[joint_b_indices[connection_index].astype(int), 2] + all_connections[connection_category_index][connection_index][2]
# joint bのscoreとconnectionの積分値を加算
elif joint_found_cnt == 0 and connection_category_index < 17: # 肩耳のconnectionは新規group対象外
row = -1 * np.ones(20)
row[joint_category_a_index] = joint_a_indices[connection_index]
row[joint_category_b_index] = joint_b_indices[connection_index]
row[-1] = 2
row[-2] = sum(candidate_peaks[all_connections[connection_category_index][connection_index, :2].astype(int), 2]) + all_connections[connection_category_index][connection_index][2]
subsets = np.vstack([subsets, row])
# delete low score subsets
keep = np.logical_and(subsets[:, -1] >= params['n_subset_limbs_thresh'], subsets[:, -2]/subsets[:, -1] >= params['subset_score_thresh'])
subsets = subsets[keep]
return subsets
def subsets_to_person_pose_array(self, subsets, all_peaks_flatten):
person_pose_array = []
for subset in subsets:
joints = []
for joint_index in subset[:18].astype('i'):
if joint_index >= 0:
joint = all_peaks_flatten[joint_index][:2].astype('i').tolist()
joint.append(2)
joints.append(joint)
else:
joints.append([0, 0, 0])
person_pose_array.append(np.array(joints))
person_pose_array = np.array(person_pose_array)
return person_pose_array
def __call__(self, orig_img, fast_mode=False):
orig_img_h, orig_img_w, _ = orig_img.shape
resized_output_img_w, resized_output_img_h = self.compute_optimal_size(orig_img, params['heatmap_size'])
pafs_sum = 0
heatmaps_sum = 0
# use only the first scale on fast mode
scales = [params['inference_scales'][0]] if fast_mode else params['inference_scales']
for scale in scales:
print("Inference scale: %.1f..." % (scale))
img_size = int(params['inference_img_size'] * scale)
resized_input_img_w, resized_input_img_h = self.compute_optimal_size(orig_img, img_size)
resized_image = cv2.resize(orig_img, (resized_input_img_w, resized_input_img_h))
x_data = np.array(resized_image[np.newaxis], dtype=np.float32).transpose(0, 3, 1, 2) / 256 - 0.5
if self.device >= 0:
x_data = cuda.to_gpu(x_data)
h1s, h2s = self.model(x_data)
pafs_sum += F.resize_images(h1s[-1], (resized_output_img_h, resized_output_img_w)).data[0]
heatmaps_sum += F.resize_images(h2s[-1], (resized_output_img_h, resized_output_img_w)).data[0]
pafs = pafs_sum / len(scales)
heatmaps = heatmaps_sum / len(scales)
if self.device >= 0:
pafs = cuda.to_cpu(pafs)
all_peaks = self.compute_peaks_from_heatmaps(heatmaps)
all_peaks_flatten = np.array([peak for peaks_each_category in all_peaks for peak in peaks_each_category])
if len(all_peaks_flatten) == 0:
return np.empty((0, len(JointType), 3))
all_connections = self.compute_connections(pafs, all_peaks, all_peaks_flatten, resized_output_img_w, params)
subsets = self.grouping_key_points(all_connections, all_peaks_flatten, params)
all_peaks_flatten[:, 0] *= orig_img_w / resized_output_img_w
all_peaks_flatten[:, 1] *= orig_img_h / resized_output_img_h
person_pose_array = self.subsets_to_person_pose_array(subsets, all_peaks_flatten)
return person_pose_array
def draw_person_pose(oriImg, person_pose):
if len(person_pose) == 0:
return oriImg
limb_colors = [
[0, 255, 0], [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255],
[0, 85, 255], [255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0.],
[255, 0, 85], [170, 255, 0], [85, 255, 0], [170, 0, 255.], [0, 0, 255],
[0, 0, 255], [255, 0, 255], [170, 0, 255], [255, 0, 170],
]
joint_colors = [
[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0],
[85, 255, 0], [0, 255, 0], [0, 255, 85], [0, 255, 170], [0, 255, 255],
[0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], [170, 0, 255],
[255, 0, 255], [255, 0, 170], [255, 0, 85]]
canvas = oriImg.copy()
# limbs
for pose in person_pose:
for i, (limb, color) in enumerate(zip(params['limbs_point'], limb_colors)):
if i != 9 and i != 13: # don't show ear-shoulder connection
limb_ind = np.array(limb)
if np.all(pose[limb_ind][:, 2] != 0):
joint1, joint2 = pose[limb_ind][:, :2]
cv2.line(canvas, tuple(joint1), tuple(joint2), color, 2)
# joints
for pose in person_pose:
for i, ((x, y, v), color) in enumerate(zip(pose, joint_colors)):
if v != 0:
cv2.circle(canvas, (x, y), 6, color, -1)
return canvas
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Pose detector')
parser.add_argument('arch', choices=params['archs'].keys(), default='posenet', help='Model architecture')
parser.add_argument('weights', help='weights file path')
parser.add_argument('--img', default=None, help='image file path')
parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)')
args = parser.parse_args()
pose_detector = PoseDetector(args.arch, args.weights, device=args.gpu)
if args.img:
img = cv2.imread(args.img)
person_pose_array = pose_detector(img)
res_img = draw_person_pose(img, person_pose_array)
print('Saving result into result.png...')
cv2.imwrite('result.png', res_img)
else:
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
while True:
# get video frame
ret, img = cap.read()
if not ret:
print("Failed to capture image")
break
person_pose_array = pose_detector(img, fast_mode=True)
res_img = draw_person_pose(img, person_pose_array)
cv2.imshow("result", res_img)
key = cv2.waitKey(1000)
if key & 0xFF == ord('q'):
break