-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_gen.py
210 lines (168 loc) · 6.25 KB
/
data_gen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
import numpy as np
import matplotlib.pyplot as plt
import random
from PIL import Image, ImageDraw, ImageFont, ImageOps
import os
from arguments import Arguments
from bbox_based_rot_correction import rotation_correct_and_line_order
import processing
import blob_extraction
import mser_extraction
def make_string(args):
'''generates a random string with parameters defined in args.'''
lines = random.randint(1, args.max_lines)
n_char = len(args.alphabet)
string = ''
for line in range(lines):
l = random.randint(args.min_l, args.max_l)
for i in range(l):
rand = random.randint(0, n_char - 1)
string += args.alphabet[rand]
string += '\n'
if args.lower_case:
return string.lower()
else:
return string
def make_image(args, string, font=None):
'''creates an artificial image from a string as generated by make_string and the parameters defined in args.'''
if font is None:
font = args.font
fnt = ImageFont.truetype(font, args.font_size)
img = Image.new(args.colorspace, args.shape, color='white')
text = Image.new('L', args.text_box)
angle = np.random.uniform(- args.max_angle, args.max_angle)
d = ImageDraw.Draw(text)
d.text((0, 0), string, font=fnt, fill=255)
text = text.rotate(angle, expand=True)
text = ImageOps.colorize(text, black=(255, 255, 255), white=(0, 0, 0))
if args.pos is not None:
x_pos = random.randint(0, int(args.shape[0] - args.pos[0] * args.shape[0]))
y_pos = random.randint(0, int(args.shape[1] - args.pos[1] * args.shape[1]))
else:
x_pos = 0
y_pos = 0
img.paste(text, (x_pos, y_pos))
return img
def get_font_files():
'''searches for for font files from a root directory 'fonts' '''
font_files = []
for path, subdirs, files in os.walk('fonts'):
for name in files:
if name.endswith('.ttf'):
font_files.append(os.path.join(path, name))
return font_files
def make_data(args):
'''generates a dataset consisting of artificial images as generated by make_image plus a truth.txt file.
Specification is done through the parameters in args.
'''
if os.path.exists(args.image_path):
if args.safe_override:
r = input('Path exists. Do you want to override? Type "y" for yes: \n')
if r is not 'y':
return
else:
os.makedirs(args.image_path)
if not args.container:
f = open('{}/truth.txt'.format(args.image_path), 'w')
if args.font == 'all':
font_files = get_font_files()
for i in range(args.n):
string = make_string(args)
if args.font == 'all':
font = font_files[i % len(font_files)]
else:
font = None
img = make_image(args, string=string, font=font)
img.save('{}/{}.jpg'.format(args.image_path, str(i)))
truth = string + '\n'
f.write(truth)
f.close()
return
else:
img_container = [None] * args.n
truth_container = [None] * args.n
for i in range(args.n):
string = make_string(args)
img_container[i] = make_image(args)
truth_container[i] = string
data, target, labels = convertToNumpy(img_container, truth_container)
np.save('{}/data'.format(args.image_path), data)
np.save('{}/target'.format(args.image_path), target)
np.save('{}/label'.format(args.image_path), labels)
return data, target, labels
def convertToNumpy(data, target):
'''
converts an already generated data and target to numpy arrays ...
'''
num_images = target.__len__()
labels = target
target = np.asarray(target)
_, target = np.unique(target, return_inverse=True) # convert target to numbers
data = list(map(np.array, data))
data = np.array(data) / 255
# data = np.reshape(data, (data.shape[0], -1))
return data, target, labels
def generate_char_data(load_path, args=None):
'''
...
'''
if args is None:
args = Arguments()
if args.method == 'threshold':
method = blob_extraction.find_blobs
elif args.method == 'mser':
method = mser_extraction.extract_mser
else:
assert False
# load alphabet
char_dict = args.char_dict
# load truth
with open(os.path.join(load_path, 'truth.txt')) as f:
content = f.readlines()
content = [x.strip() for x in content]
truth_blocks = []
current_block = []
for line in content:
if line == '':
truth_blocks.append(current_block)
current_block = []
else:
current_block.append(np.array([char_dict[char] for char in line], dtype=np.int32))
# process images
char_images = []
char_truths = []
for i in range(args.n):
file = str(i)+'.jpg'
if file.endswith('.jpg'):
file_path = os.path.join(load_path, file)
img = processing.load_img(file_path, args)
components = method(img, args)
components = rotation_correct_and_line_order(components)
chars = components.extract(args)
# check if number of detected chars conicides with groundtruth
diff = sum(len(line) for line in truth_blocks[i]) - len(chars)
print(f'image {i}: #gt chars - #detected chars: {diff}')
if diff == 0:
char_images.append(chars)
char_truths.append(np.concatenate(truth_blocks[i]))
#print('image is valid')
return np.concatenate(char_images, axis=0), np.concatenate(char_truths, axis=0)
def save_char_data(args=None):
'''...'''
if args is None:
args = Arguments()
load_path = args.image_path
save_path = args.train_path
if not(os.path.exists(save_path)):
os.mkdir(save_path)
char_images, char_truth = generate_char_data(load_path, args)
print(f'saving {len(char_truth)} images')
np.save(os.path.join(save_path, 'images.npy'), char_images)
np.save(os.path.join(save_path, 'gt.npy'), char_truth)
if __name__ == '__main__':
args = Arguments()
#args.n = 10
#args.image_path = 'test_data'
#args.train_path = 'test_data'
make_data(args)
save_char_data(args)