-
Notifications
You must be signed in to change notification settings - Fork 0
/
dumper.py
328 lines (266 loc) · 10.6 KB
/
dumper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
#!/usr/bin/python
from __future__ import print_function
import cv2
import glob
import math
import numpy as np
import os
import sys
from scipy import ndimage
import random
from PIL import Image
from PIL import ImageOps
import struct
import argparse
'''
Dump samples for cascade training from images folder or movie file
Markup (regions with objects) can be given in .gt format
Output object sample is specified by 4 coordinates (row, column, width and height; all in pixels)
'''
parser = argparse.ArgumentParser(description='Object samples dumper from pictures to binary uncompressed dumps.')
parser.add_argument('media', help='Images folder or movie file')
parser.add_argument('--gt', default=None, help='Ground truth file (if not specified, write whole image)')
parser.add_argument('--plot', action='store_true', help='Show dumped images')
parser.add_argument('--object', default=None, type=int, help='Object type for dumping, default is all')
parser.add_argument('--out', default='images.dump', help='Output data file (default is images.dump)')
parser.add_argument('--nrands', default=7, type=int,
help='Number of variations for each sample (default is 7)')
parser.add_argument('--rotate-jitter', default=True, type=bool,
help='Add small rotation to variations')
parser.add_argument('--rotate', default=None, type=int,
help='Image rotate angle in degrees (default is 0)')
parser.add_argument('--obj-part', choices=['whole', 'upper'], default='whole',
help='Part of object to export (whole object, upper half)')
args = parser.parse_args()
if args.plot:
import matplotlib.pyplot
import matplotlib.image
import matplotlib.cm
def write_rid(im, out_file):
# raw intensity data
h = im.shape[0]
w = im.shape[1]
hw = struct.pack('ii', h, w)
tmp = [None] * w * h
for y in range(0, h):
for x in range(0, w):
tmp[y * w + x] = im[y, x]
pixels = struct.pack('%sB' % w * h, *tmp)
out_file.write(hw)
out_file.write(pixels)
def export(im, r, c, w, h, out_file):
nrows = im.shape[0]
ncols = im.shape[1]
# crop
r0 = max(int(r - 0.75 * h), 0)
r1 = min(int(r + 0.75 * h), nrows)
c0 = max(int(c - 0.75 * w), 0)
c1 = min(int(c + 0.75 * w), ncols)
im = im[r0:r1, c0:c1]
nrows = im.shape[0]
ncols = im.shape[1]
r -= r0
c -= c0
# resize, if needed
maxwsize = 192.0
wsize = max(nrows, ncols)
ratio = maxwsize / wsize
if ratio < 1.0:
im = np.asarray(Image.fromarray(im).resize((int(ratio * ncols), int(ratio * nrows))))
r *= ratio
c *= ratio
w *= ratio
h *= ratio
lst = []
for i in range(args.nrands):
wtmp = w * random.uniform(0.9, 1.1)
htmp = h * random.uniform(0.9, 1.1)
rtmp = r + h * random.uniform(-0.05, 0.05)
ctmp = c + w * random.uniform(-0.05, 0.05)
if args.plot:
#cv2.imshow("test", im)
#cv2.waitKey()
matplotlib.pyplot.cla()
matplotlib.pyplot.plot([ctmp - wtmp / 2, ctmp + wtmp / 2], [rtmp - htmp / 2, rtmp - htmp / 2], 'b',
linewidth=3)
matplotlib.pyplot.plot([ctmp + wtmp / 2, ctmp + wtmp / 2], [rtmp - htmp / 2, rtmp + htmp / 2], 'b',
linewidth=3)
matplotlib.pyplot.plot([ctmp + wtmp / 2, ctmp - wtmp / 2], [rtmp + htmp / 2, rtmp + htmp / 2], 'b',
linewidth=3)
matplotlib.pyplot.plot([ctmp - wtmp / 2, ctmp - wtmp / 2], [rtmp + htmp / 2, rtmp - htmp / 2], 'b',
linewidth=3)
matplotlib.pyplot.imshow(im, cmap=matplotlib.cm.Greys_r)
matplotlib.pyplot.show()
lst.append((int(ctmp), int(rtmp), int(wtmp), int(htmp)))
write_rid(im, out_file)
out_file.write(struct.pack('i', args.nrands))
for i in range(args.nrands):
out_file.write(struct.pack('iiii', lst[i][0], lst[i][1], lst[i][2], lst[i][3]))
def mirror_and_export(im, r, c, w, h, out_file):
# exploit mirror symmetry of the face
# flip image
im = np.asarray(ImageOps.mirror(Image.fromarray(im)))
# flip column coordinate of the object
c = im.shape[1] - c
# export
export(im, r, c, w, h, out_file)
def rotate_coords(x, y, angle, cx=0, cy=0):
x -= cx
y -= cy
new_x = x * math.cos(angle) + y * math.sin(angle)
new_y = -x * math.sin(angle) + y * math.cos(angle)
return new_x + cx, new_y + cy
def whole_rect_from_gt(gt_vals, shape):
cx = float(gt_vals[1]) / 100.0 * shape[1]
cy = float(gt_vals[2]) / 100.0 * shape[0]
w = float(gt_vals[3]) / 100.0 * shape[1]
h = float(gt_vals[4]) / 100.0 * shape[0]
return cx, cy, w, h
def upper_rect_from_gt(gt_vals, shape):
w = float(gt_vals[3]) / 100.0 * shape[1]
h = float(gt_vals[4]) / 100.0 * shape[0] / 2
cx = float(gt_vals[1]) / 100.0 * shape[1]
cy = float(gt_vals[2]) / 100.0 * shape[0] - h / 2
return cx, cy, w, h
def scan_image_folder(media, gt_file, object_type):
total = 0
total_records = 0
line_no = 0
if args.rotate_jitter:
angles = [0, -5, 5]
else:
angles = [0]
gt_lines = [line.strip() for line in open(gt_file, 'r').readlines() if line]
for line in gt_lines:
line_no += 1
# construct full image path
gt_vals = line.split()
path = media + '/' + gt_vals[0].strip()
print('%s (%d/%d)\r' % (path, line_no, len(gt_lines)), end="")
sys.stdout.flush()
image = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
if image is None:
print("can't open image '" + path + "'")
continue
# cv2.imshow("test1", image)
# cv2.waitKey()
if args.obj_part == 'whole':
cx, cy, w, h = whole_rect_from_gt(gt_vals, image.shape)
else:
cx, cy, w, h = upper_rect_from_gt(gt_vals, image.shape)
for angle in angles:
if angle == 0:
image_cur = image
cx_cur = cx
cy_cur = cy
else:
image_cur = ndimage.interpolation.rotate(image, angle, reshape=False)
angle = math.pi * angle / 180
cx_cur, cy_cur = rotate_coords(cx, cy, angle, image.shape[1] / 2, image.shape[0] / 2)
total_records += 1
if cx_cur - w / 2 < 0 or cx_cur + w / 2 >= image.shape[1] or \
cy_cur - h / 2 < 0 or cy_cur + h / 2 >= image.shape[0]:
continue
if object_type is None or object_type == int(gt_vals[-1]):
export(image_cur, cy_cur, cx_cur, w, h, out)
# faces are symmetric and we exploit this here
mirror_and_export(image_cur, cy_cur, cx_cur, w, h, out)
total += 1
return total, total_records
def scan_movie(media_path, gt_file, object_type):
gt_lines = [line.strip() for line in open(gt_file, 'r').readlines() if line]
gt_in_frame = {}
for line in gt_lines:
gt_vals = line.split()
frame_num = int(gt_vals[0])
if frame_num in gt_in_frame:
gt_in_frame[frame_num].append(gt_vals)
else:
gt_in_frame[frame_num] = [gt_vals]
total = 0
total_records = 0
cur_frame_num = 0
if args.rotate_jitter:
angles = [0, -5.0, 5.0]
else:
angles = [0]
media = cv2.VideoCapture(media_path)
if media is None:
raise RuntimeError("can't open '" + media_path + "'")
total_frames = media.get(cv2.cv.CV_CAP_PROP_FRAME_COUNT)
while True:
ret_val, image = media.read()
if not ret_val:
break
if image is None:
continue
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
cur_frame_num += 1
if cur_frame_num not in gt_in_frame:
continue
if cur_frame_num % 20 == 0:
print('frame %d/%d\r' % (cur_frame_num, total_frames), end="")
sys.stdout.flush()
for angle in angles:
if angle == 0:
image_cur = image
else:
image_cur = ndimage.interpolation.rotate(image, angle, reshape=False)
angle = math.pi * angle / 180
for record in gt_in_frame[cur_frame_num]:
gt_vals = record
if args.obj_part == 'whole':
cx, cy, w, h = whole_rect_from_gt(gt_vals, image.shape)
else:
cx, cy, w, h = upper_rect_from_gt(gt_vals, image.shape)
if angle == 0:
cx_cur = cx
cy_cur = cy
else:
cx_cur, cy_cur = rotate_coords(cx, cy, angle, image.shape[1] / 2, image.shape[0] / 2)
total_records += 1
if cx_cur - w / 2 < 0 or cx_cur + w / 2 >= image.shape[1] or \
cy_cur - h / 2 < 0 or cy_cur + h / 2 >= image.shape[0]:
continue
if object_type is None or object_type == int(gt_vals[-1]):
export(image_cur, cy_cur, cx_cur, w, h, out)
# faces are symmetric and we exploit this here
mirror_and_export(image_cur, cy_cur, cx_cur, w, h, out)
total += 1
return total, total_records
out = open(args.out, 'wb')
if args.gt:
if os.path.isdir(args.media):
total, total_records = scan_image_folder(args.media, args.gt, args.object)
else:
total, total_records = scan_movie(args.media, args.gt, args.object)
print('Exported %d (%d * %d * 2) objects from %d records' %\
(total * args.nrands * 2, total, args.nrands, total_records))
else:
total = 0
all_images = glob.glob(args.media + '/*')
for cur_file in all_images:
print(cur_file + ' (' + str(total) + '/' + str(len(all_images)) + ')\r', end="")
sys.stdout.flush()
image = cv2.imread(cur_file, cv2.IMREAD_GRAYSCALE)
if args.rotate is None:
pass
elif args.rotate == 90:
image = np.rot90(image)
elif args.rotate == 180:
image = np.rot90(image, 2)
elif args.rotate == 270:
image = np.rot90(image, 3)
else:
image = ndimage.interpolation.rotate(image, args.rotate)
if image is None:
print("can't open image '" + cur_file + "'")
continue
write_rid(image, out)
out.write(struct.pack('i', 0))
if args.plot:
cv2.imshow('pic', image)
cv2.waitKey(50)
total += 1
print('Exported %d images' % total)
out.close()