class Renderer(object): def __init__(self, corpus, fonts, bgs, cfg, width=256, height=32, space_ratio=0.0, clip_max_chars=False, max_chars=None, debug=False, gpu=False, strict=False): self.corpus = corpus self.fonts = fonts self.bgs = bgs self.out_width = width self.out_height = height self.space_ratio = space_ratio self.clip_max_chars = clip_max_chars self.max_chars = math.floor(width / 4) - 1 if max_chars is None else max_chars self.debug = debug self.gpu = gpu self.strict = strict self.cfg = cfg self.timer = Timer() self.liner = Liner(cfg) self.noiser = Noiser(cfg) self.remaper = Remaper(cfg) self.create_kernals() if self.strict: self.font_unsupport_chars = font_utils.get_unsupported_chars(self.fonts, corpus.chars_file) def gen_img(self, img_index): word, font, word_size = self.pick_font(img_index) if self.space_ratio > 0 and len(word) > 2: word = list(word) for i in range(1, len(word) - 1): if word[i-1] != ' ' and random.random() < self.space_ratio: # 不允许出现连续的空格 word[i] = ' ' word = ''.join(word) self.dmsg("after pick font") # Background's height should much larger than raw word image's height, # to make sure we can crop full word image after apply perspective bg = self.gen_bg(width=word_size[0] * 8, height=word_size[1] * 8) word_img, text_box_pnts, word_color = self.draw_text_on_bg(word, font, bg) self.dmsg("After draw_text_on_bg") if apply(self.cfg.crop): text_box_pnts = self.apply_crop(text_box_pnts, self.cfg.crop) if apply(self.cfg.line): word_img, text_box_pnts = self.liner.apply(word_img, text_box_pnts, word_color) self.dmsg("After draw line") if self.debug: word_img = draw_box(word_img, text_box_pnts, (0, 255, 155)) if apply(self.cfg.curve): word_img, text_box_pnts = self.remaper.apply(word_img, text_box_pnts, word_color) self.dmsg("After remapping") if self.debug: word_img = draw_box(word_img, text_box_pnts, (155, 255, 0)) word_img, img_pnts_transformed, text_box_pnts_transformed = \ self.apply_perspective_transform(word_img, text_box_pnts, max_x=self.cfg.perspective_transform.max_x, max_y=self.cfg.perspective_transform.max_y, max_z=self.cfg.perspective_transform.max_z, gpu=self.gpu) self.dmsg("After perspective transform") if self.debug: _, crop_bbox = self.crop_img(word_img, text_box_pnts_transformed) word_img = draw_bbox(word_img, crop_bbox, (255, 0, 0)) else: word_img, crop_bbox = self.crop_img(word_img, text_box_pnts_transformed) self.dmsg("After crop_img") if apply(self.cfg.noise): word_img = np.clip(word_img, 0., 255.) word_img = self.noiser.apply(word_img) self.dmsg("After noiser") blured = False if apply(self.cfg.blur): blured = True word_img = self.apply_blur_on_output(word_img) self.dmsg("After blur") if not blured: if apply(self.cfg.prydown): word_img = self.apply_prydown(word_img) self.dmsg("After prydown") word_img = np.clip(word_img, 0., 255.) if apply(self.cfg.reverse_color): word_img = self.reverse_img(word_img) self.dmsg("After reverse_img") if apply(self.cfg.emboss): word_img = self.apply_emboss(word_img) self.dmsg("After emboss") if apply(self.cfg.sharp): word_img = self.apply_sharp(word_img) self.dmsg("After sharp") # word_img = cv2.resize(word_img, None, fx=0.5, fy=0.5) return word_img, word def dmsg(self, msg): if self.debug: print(msg) def random_xy_offset(self, src_height, src_width, dst_height, dst_width): """ Get random left-top point for putting a small rect in a large rect. Normally dst_height>src_height and dst_width>src_width """ y_max_offset = 0 if dst_height > src_height: y_max_offset = dst_height - src_height x_max_offset = 0 if dst_width > src_width: x_max_offset = dst_width - src_width y_offset = 0 if y_max_offset != 0: y_offset = random.randint(0, y_max_offset) x_offset = 0 if x_max_offset != 0: x_offset = random.randint(0, x_max_offset) return x_offset, y_offset def crop_img(self, img, text_box_pnts_transformed): """ Crop text from large input image :param img: image to crop :param text_box_pnts_transformed: text_bbox_pnts after apply_perspective_transform :return: dst: image with desired output size, height=32, width=flags.img_width crop_bbox: bounding box on input image """ bbox = cv2.boundingRect(text_box_pnts_transformed) bbox_width = bbox[2] bbox_height = bbox[3] # Output shape is (self.out_width, self.out_height) # We randomly put bounding box of transformed text in the output shape # so the max value of dst_height is out_height # TODO: If rotate angle(z) of text is too big, text will become very small, # we should do something to prevent text too small # dst_height and dst_width is used to leave some padding around text bbox dst_height = random.randint(self.out_height // 4 * 3, self.out_height) if self.out_width == 0: scale = bbox_height / dst_height else: dst_width = self.out_width scale = max(bbox_height / dst_height, bbox_width / self.out_width) s_bbox_width = math.ceil(bbox_width / scale) s_bbox_height = math.ceil(bbox_height / scale) if self.out_width == 0: padding = random.randint(s_bbox_width // 10, s_bbox_width // 8) dst_width = s_bbox_width + padding * 2 s_bbox = (np.around(bbox[0] / scale), np.around(bbox[1] / scale), np.around(bbox[2] / scale), np.around(bbox[3] / scale)) x_offset, y_offset = self.random_xy_offset(s_bbox_height, s_bbox_width, self.out_height, dst_width) dst_bbox = ( self.int_around((s_bbox[0] - x_offset) * scale), self.int_around((s_bbox[1] - y_offset) * scale), self.int_around(dst_width * scale), self.int_around(self.out_height * scale) ) # It's important do crop first and than do resize for speed consider dst = img[dst_bbox[1]:dst_bbox[1] + dst_bbox[3], dst_bbox[0]:dst_bbox[0] + dst_bbox[2]] dst = cv2.resize(dst, (dst_width, self.out_height), interpolation=cv2.INTER_CUBIC) return dst, dst_bbox def int_around(self, val): return int(np.around(val)) def get_word_color(self, bg, text_x, text_y, word_height, word_width): """ Only use word roi area to get word color """ offset = 10 ymin = text_y - offset ymax = text_y + word_height + offset xmin = text_x - offset xmax = text_x + word_width + offset word_roi_bg = bg[ymin: ymax, xmin: xmax] bg_mean = int(np.mean(word_roi_bg) * (2 / 3)) word_color = random.randint(0, bg_mean) return word_color def draw_text_on_bg(self, word, font, bg): """ Draw word in the center of background :param word: word to draw :param font: font to draw word :param bg: background numpy image :return: np_img: word image text_box_pnts: left-top, right-top, right-bottom, left-bottom """ bg_height = bg.shape[0] bg_width = bg.shape[1] word_size = self.get_word_size(font, word) word_height = word_size[1] word_width = word_size[0] offset = font.getoffset(word) pil_img = Image.fromarray(np.uint8(bg)) draw = ImageDraw.Draw(pil_img) # Draw text in the center of bg text_x = int((bg_width - word_width) / 2) text_y = int((bg_height - word_height) / 2) word_color = self.get_word_color(bg, text_x, text_y, word_height, word_width) if apply(self.cfg.random_space): text_x, text_y, word_width, word_height = self.draw_text_with_random_space(draw, font, word, word_color, bg_width, bg_height) np_img = np.array(pil_img).astype(np.float32) else: if apply(self.cfg.seamless_clone): np_img = self.draw_text_seamless(font, bg, word, word_color, word_height, word_width, offset) else: self.draw_text_wrapper(draw, word, text_x - offset[0], text_y - offset[1], font, word_color) # draw.text((text_x - offset[0], text_y - offset[1]), word, fill=word_color, font=font) np_img = np.array(pil_img).astype(np.float32) text_box_pnts = [ [text_x, text_y], [text_x + word_width, text_y], [text_x + word_width, text_y + word_height], [text_x, text_y + word_height] ] return np_img, text_box_pnts, word_color def draw_text_seamless(self, font, bg, word, word_color, word_height, word_width, offset): # For better seamlessClone seamless_offset = 6 # Draw text on a white image, than draw it on background white_bg = np.ones((word_height + seamless_offset, word_width + seamless_offset)) * 255 text_img = Image.fromarray(np.uint8(white_bg)) draw = ImageDraw.Draw(text_img) # draw.text((0 + seamless_offset // 2, 0 - offset[1] + seamless_offset // 2), word, # fill=word_color, font=font) self.draw_text_wrapper(draw, word, 0 + seamless_offset // 2, 0 - offset[1] + seamless_offset // 2, font, word_color) # assume whole text_img as mask text_img = np.array(text_img).astype(np.uint8) text_mask = 255 * np.ones(text_img.shape, text_img.dtype) # This is where the CENTER of the airplane will be placed center = (bg.shape[1] // 2, bg.shape[0] // 2) # opencv seamlessClone require bgr image text_img_bgr = np.ones((text_img.shape[0], text_img.shape[1], 3), np.uint8) bg_bgr = np.ones((bg.shape[0], bg.shape[1], 3), np.uint8) cv2.cvtColor(text_img, cv2.COLOR_GRAY2BGR, text_img_bgr) cv2.cvtColor(bg, cv2.COLOR_GRAY2BGR, bg_bgr) flag = np.random.choice([ cv2.NORMAL_CLONE, cv2.MIXED_CLONE, cv2.MONOCHROME_TRANSFER ]) mixed_clone = cv2.seamlessClone(text_img_bgr, bg_bgr, text_mask, center, flag) np_img = cv2.cvtColor(mixed_clone, cv2.COLOR_BGR2GRAY) return np_img def draw_text_with_random_space(self, draw, font, word, word_color, bg_width, bg_height): """ If random_space applied, text_x, text_y, word_width, word_height may change""" width = 0 height = 0 chars_size = [] y_offset = 10 ** 5 for c in word: size = font.getsize(c) chars_size.append(size) width += size[0] # set max char height as word height if size[1] > height: height = size[1] # Min chars y offset as word y offset # Assume only y offset c_offset = font.getoffset(c) if c_offset[1] < y_offset: y_offset = c_offset[1] char_space_width = int(height * np.random.uniform(self.cfg.random_space.min, self.cfg.random_space.max)) width += (char_space_width * (len(word) - 1)) text_x = int((bg_width - width) / 2) text_y = int((bg_height - height) / 2) c_x = text_x c_y = text_y for i, c in enumerate(word): # self.draw_text_wrapper(draw, c, c_x, c_y - y_offset, font, word_color, force_text_border) draw.text((c_x, c_y - y_offset), c, fill=word_color, font=font) c_x += (chars_size[i][0] + char_space_width) return text_x, text_y, width, height def draw_text_wrapper(self, draw, text, x, y, font, text_color): """ :param x/y: 应该是移除了 offset 的 """ if apply(self.cfg.text_border): self.draw_border_text(draw, text, x, y, font, text_color) else: draw.text((x, y), text, fill=text_color, font=font) def draw_border_text(self, draw, text, x, y, font, text_color): """ :param x/y: 应该是移除了 offset 的 """ # thickness larger than 1 may give bad border result thickness = 1 choices = [] p = [] if self.cfg.text_border.light.enable: choices.append(0) p.append(self.cfg.text_border.light.fraction) if self.cfg.text_border.dark.enable: choices.append(1) p.append(self.cfg.text_border.dark.fraction) light_or_dark = np.random.choice(choices, p=p) if light_or_dark == 0: border_color = text_color + np.random.randint(0, 255 - text_color - 1) elif light_or_dark == 1: border_color = text_color - np.random.randint(0, text_color + 1) # thin border draw.text((x - thickness, y), text, font=font, fill=border_color) draw.text((x + thickness, y), text, font=font, fill=border_color) draw.text((x, y - thickness), text, font=font, fill=border_color) draw.text((x, y + thickness), text, font=font, fill=border_color) # thicker border draw.text((x - thickness, y - thickness), text, font=font, fill=border_color) draw.text((x + thickness, y - thickness), text, font=font, fill=border_color) draw.text((x - thickness, y + thickness), text, font=font, fill=border_color) draw.text((x + thickness, y + thickness), text, font=font, fill=border_color) # now draw the text over it draw.text((x, y), text, font=font, fill=text_color) def gen_bg(self, width, height): if apply(self.cfg.img_bg): bg = self.gen_bg_from_image(int(width), int(height)) else: bg = self.gen_rand_bg(int(width), int(height)) return bg def gen_rand_bg(self, width, height): """ Generate random background """ bg_high = random.uniform(220, 255) bg_low = bg_high - random.uniform(1, 60) bg = np.random.randint(bg_low, bg_high, (height, width)).astype(np.uint8) bg = self.apply_gauss_blur(bg) return bg def gen_bg_from_image(self, width, height): """ Resize background, let bg_width>=width, bg_height >=height, and random crop from resized background """ assert width > height bg = random.choice(self.bgs) scale = max(width / bg.shape[1], height / bg.shape[0]) out = cv2.resize(bg, None, fx=scale, fy=scale) x_offset, y_offset = self.random_xy_offset(height, width, out.shape[0], out.shape[1]) out = out[y_offset:y_offset + height, x_offset:x_offset + width] out = self.apply_gauss_blur(out, ks=[7, 11, 13, 15, 17]) # bg_mean = int(np.mean(out)) # TODO: find a better way to deal with background # alpha = 255 / bg_mean # 对比度 # beta = np.random.randint(bg_mean // 4, bg_mean // 2) # 亮度 # out = np.uint8(np.clip((alpha * out + beta), 0, 255)) return out @retry def pick_font(self, img_index): """ :param img_index when use list corpus, this param is used :return: font: truetype size: word size, removed offset (width, height) """ word = self.corpus.get_sample(img_index) if self.clip_max_chars and len(word) > self.max_chars: word = self.clip_chars(word) font_path = random.choice(self.fonts) if self.strict: unsupport_chars = self.font_unsupport_chars[font_path] for c in word: if c == ' ': continue if c in unsupport_chars: print('Retry pick_font(), \'%s\' contains chars \'%s\' not supported by font %s' % ( word, c, font_path)) raise Exception # Font size in point font_size = random.randint(self.cfg.font_size.min, self.cfg.font_size.max) font = ImageFont.truetype(font_path, font_size) return word, font, self.get_word_size(font, word) def clip_chars(self, word): for _ in range(6): start = random.randint(0, len(word) - self.max_chars) if word[start] != ' ' and word[start + self.max_chars - 1] != ' ': # 保证边上的不是空格字符 return word[start: start + self.max_chars] raise Exception def get_word_size(self, font, word): """ Get word size removed offset :param font: truetype :param word: :return: size: word size, removed offset (width, height) """ offset = font.getoffset(word) size = font.getsize(word) size = (size[0] - offset[0], size[1] - offset[1]) return size def apply_perspective_transform(self, img, text_box_pnts, max_x, max_y, max_z, gpu=False): """ Apply perspective transform on image :param img: origin numpy image :param text_box_pnts: four corner points of text :param x: max rotate angle around X-axis :param y: max rotate angle around Y-axis :param z: max rotate angle around Z-axis :return: dst_img: dst_img_pnts: points of whole word image after apply perspective transform dst_text_pnts: points of text after apply perspective transform """ x = math_utils.cliped_rand_norm(0, max_x) y = math_utils.cliped_rand_norm(0, max_y) z = math_utils.cliped_rand_norm(0, max_z) # print("x: %f, y: %f, z: %f" % (x, y, z)) transformer = math_utils.PerspectiveTransform(x, y, z, scale=1.0, fovy=50) dst_img, M33, dst_img_pnts = transformer.transform_image(img, gpu) dst_text_pnts = transformer.transform_pnts(text_box_pnts, M33) return dst_img, dst_img_pnts, dst_text_pnts def apply_blur_on_output(self, img): if prob(0.5): return self.apply_gauss_blur(img, [3, 5]) else: return self.apply_norm_blur(img) def apply_gauss_blur(self, img, ks=None): if ks is None: ks = [7, 9, 11, 13] ksize = random.choice(ks) sigmas = [0, 1, 2, 3, 4, 5, 6, 7] sigma = 0 if ksize <= 3: sigma = random.choice(sigmas) img = cv2.GaussianBlur(img, (ksize, ksize), sigma) return img def apply_norm_blur(self, img, ks=None): # kernel == 1, the output image will be the same if ks is None: ks = [2, 3] kernel = random.choice(ks) img = cv2.blur(img, (kernel, kernel)) return img def apply_prydown(self, img): """ 模糊图像,模拟小图片放大的效果 """ scale = random.uniform(1, self.cfg.prydown.max_scale) height = img.shape[0] width = img.shape[1] out = cv2.resize(img, (int(width / scale), int(height / scale)), interpolation=cv2.INTER_AREA) return cv2.resize(out, (width, height), interpolation=cv2.INTER_AREA) def reverse_img(self, word_img): offset = np.random.randint(-10, 10) return 255 + offset - word_img def create_kernals(self): self.emboss_kernal = np.array([ [-2, -1, 0], [-1, 1, 1], [0, 1, 2] ]) self.sharp_kernel = np.array([ [-1, -1, -1], [-1, 9, -1], [-1, -1, -1] ]) def apply_emboss(self, word_img): return cv2.filter2D(word_img, -1, self.emboss_kernal) def apply_sharp(self, word_img): return cv2.filter2D(word_img, -1, self.sharp_kernel) def apply_crop(self, text_box_pnts, crop_cfg): """ Random crop text box height top or bottom, we don't need image information in this step, only change box pnts :param text_box_pnts: bbox of text [left-top, right-top, right-bottom, left-bottom] :param crop_cfg: :return: croped_text_box_pnts """ height = abs(text_box_pnts[0][1] - text_box_pnts[3][1]) scale = float(height) / float(self.out_height) croped_text_box_pnts = text_box_pnts if prob(0.5): top_crop = int(random.randint(crop_cfg.top.min, crop_cfg.top.max) * scale) self.dmsg("top crop %d" % top_crop) croped_text_box_pnts[0][1] += top_crop croped_text_box_pnts[1][1] += top_crop else: bottom_crop = int(random.randint(crop_cfg.bottom.min, crop_cfg.bottom.max) * scale) self.dmsg("bottom crop %d " % bottom_crop) croped_text_box_pnts[2][1] -= bottom_crop croped_text_box_pnts[3][1] -= bottom_crop return croped_text_box_pnts
class Renderer(object): def __init__(self, corpus, fonts, bgs, cfg, width=256, height=32, clip_max_chars=False, debug=False, gpu=False, strict=False): self.corpus = corpus self.fonts = fonts self.bgs = bgs self.out_width = width self.out_height = height self.clip_max_chars = clip_max_chars self.max_chars = math.floor(width / 4) - 1 self.debug = debug self.gpu = gpu self.strict = strict self.cfg = cfg self.timer = Timer() self.liner = Liner(cfg) self.noiser = Noiser(cfg) if self.strict: self.font_chars = get_fonts_chars(self.fonts, corpus.chars_file) def gen_img(self): word, font, word_size = self.pick_font() # Background's height should much larger than raw word image's height, # to make sure we can crop full word image after apply perspective bg = self.gen_bg(width=word_size[0] * 8, height=word_size[1] * 8) word_img, text_box_pnts, word_color = self.draw_text_on_bg( word, font, bg) if apply(self.cfg.line): word_img, text_box_pnts = self.liner.apply(word_img, text_box_pnts, word_color) word_img, img_pnts_transformed, text_box_pnts_transformed = \ self.apply_perspective_transform(word_img, text_box_pnts, max_x=self.cfg.perspective_transform.max_x, max_y=self.cfg.perspective_transform.max_y, max_z=self.cfg.perspective_transform.max_z, gpu=self.gpu) if self.debug: word_img = draw_box(word_img, img_pnts_transformed, (0, 255, 0)) word_img = draw_box(word_img, text_box_pnts_transformed, (0, 0, 255)) _, crop_bbox = self.crop_img(word_img, text_box_pnts_transformed) word_img = draw_bbox(word_img, crop_bbox, (255, 0, 0)) else: word_img, crop_bbox = self.crop_img(word_img, text_box_pnts_transformed) if apply(self.cfg.noise): word_img = np.clip(word_img, 0., 255.) word_img = self.noiser.apply(word_img) blured = False if apply(self.cfg.blur): blured = True word_img = self.apply_blur_on_output(word_img) if not blured: if apply(self.cfg.prydown): word_img = self.apply_prydown(word_img) word_img = np.clip(word_img, 0., 255.) if apply(self.cfg.reverse_color): word_img = self.reverse_img(word_img) return word_img, word def random_xy_offset(self, src_height, src_width, dst_height, dst_width): """ Get random left-top point for putting a small rect in a large rect. Normally dst_height>src_height and dst_width>src_width """ y_max_offset = 0 if dst_height > src_height: y_max_offset = dst_height - src_height x_max_offset = 0 if dst_width > src_width: x_max_offset = dst_width - src_width y_offset = 0 if y_max_offset != 0: y_offset = random.randint(0, y_max_offset) x_offset = 0 if x_max_offset != 0: x_offset = random.randint(0, x_max_offset) return x_offset, y_offset def crop_img(self, img, text_box_pnts_transformed): """ :param img: image to crop :param text_box_pnts_transformed: text_bbox_pnts after apply_perspective_transform :return: dst: image with desired output size, height=32, width=flags.img_width crop_bbox: bounding box on input image """ bbox = cv2.boundingRect(text_box_pnts_transformed) bbox_width = bbox[2] bbox_height = bbox[3] # Output shape is (self.out_width, self.out_height) # We randomly put bounding box of transformed text in the output shape # so the max value of dst_height is out_height # TODO: prevent text too small dst_height = random.randint(self.out_height // 4 * 3, self.out_height) scale = max(bbox_height / dst_height, bbox_width / self.out_width) s_bbox_width = math.ceil(bbox_width / scale) s_bbox_height = math.ceil(bbox_height / scale) s_bbox = (np.around(bbox[0] / scale), np.around(bbox[1] / scale), np.around(bbox[2] / scale), np.around(bbox[3] / scale)) x_offset, y_offset = self.random_xy_offset(s_bbox_height, s_bbox_width, self.out_height, self.out_width) def int_around(val): return int(np.around(val)) dst_bbox = (int_around((s_bbox[0] - x_offset) * scale), int_around((s_bbox[1] - y_offset) * scale), int_around(self.out_width * scale), int_around(self.out_height * scale)) # It's important do crop first and than do resize for speed consider dst = img[dst_bbox[1]:dst_bbox[1] + dst_bbox[3], dst_bbox[0]:dst_bbox[0] + dst_bbox[2]] dst = cv2.resize(dst, (self.out_width, self.out_height), interpolation=cv2.INTER_CUBIC) return dst, dst_bbox def get_word_color(self, bg, text_x, text_y, word_height, word_width): """ Only use word roi area to get word color """ offset = 10 ymin = text_y - offset ymax = text_y + word_height + offset xmin = text_x - offset xmax = text_x + word_width + offset word_roi_bg = bg[ymin:ymax, xmin:xmax] bg_mean = int(np.mean(word_roi_bg) * (2 / 3)) word_color = random.randint(0, bg_mean) return word_color def draw_text_on_bg(self, word, font, bg): """ Draw word in the center of background :param word: word to draw :param font: font to draw word :param bg: background numpy image :return: np_img: word image text_box_pnts: left-top, right-top, right-bottom, left-bottom """ bg_height = bg.shape[0] bg_width = bg.shape[1] word_size = self.get_word_size(font, word) word_height = word_size[1] word_width = word_size[0] offset = font.getoffset(word) pil_img = Image.fromarray(np.uint8(bg)) draw = ImageDraw.Draw(pil_img) # Draw text in the center of bg text_x = int((bg_width - word_width) / 2) text_y = int((bg_height - word_height) / 2) word_color = self.get_word_color(bg, text_x, text_y, word_height, word_width) if apply(self.cfg.random_space): text_x, text_y, word_width, word_height = self.draw_text_with_random_space( draw, font, word, word_color, bg_width, bg_height) else: draw.text((text_x - offset[0], text_y - offset[1]), word, fill=word_color, font=font) np_img = np.array(pil_img).astype(np.float32) text_box_pnts = [[text_x, text_y], [text_x + word_width, text_y], [text_x + word_width, text_y + word_height], [text_x, text_y + word_height]] return np_img, text_box_pnts, word_color def draw_text_with_random_space(self, draw, font, word, word_color, bg_width, bg_height): """ If random_space applied, text_x, text_y, word_width, word_height may change""" width = 0 height = 0 chars_size = [] y_offset = 10**5 for c in word: size = font.getsize(c) chars_size.append(size) width += size[0] # set max char height as word height if size[1] > height: height = size[1] # Min chars y offset as word y offset # Assume only y offset c_offset = font.getoffset(c) if c_offset[1] < y_offset: y_offset = c_offset[1] char_space_width = int(height * np.random.uniform( self.cfg.random_space.min, self.cfg.random_space.max)) width += (char_space_width * (len(word) - 1)) text_x = int((bg_width - width) / 2) text_y = int((bg_height - height) / 2) c_x = text_x c_y = text_y for i, c in enumerate(word): draw.text((c_x, c_y - y_offset), c, fill=word_color, font=font) c_x += (chars_size[i][0] + char_space_width) return text_x, text_y, width, height def gen_bg(self, width, height): if apply(self.cfg.img_bg): bg = self.gen_bg_from_image(int(width), int(height)) else: bg = self.gen_rand_bg(int(width), int(height)) return bg def gen_rand_bg(self, width, height): """ Generate random background """ bg_high = random.uniform(220, 255) bg_low = bg_high - random.uniform(1, 60) bg = np.random.randint(bg_low, bg_high, (height, width)).astype(np.uint8) bg = self.apply_gauss_blur(bg) return bg def gen_bg_from_image(self, width, height): """ Resize background, let bg_width>=width, bg_height >=height, and random crop from resized background """ assert width > height bg = random.choice(self.bgs) scale = max(width / bg.shape[1], height / bg.shape[0]) out = cv2.resize(bg, None, fx=scale, fy=scale) x_offset, y_offset = self.random_xy_offset(height, width, out.shape[0], out.shape[1]) out = out[y_offset:y_offset + height, x_offset:x_offset + width] out = self.apply_gauss_blur(out, ks=[7, 11, 13, 15, 17]) bg_mean = int(np.mean(out)) alpha = 255 / bg_mean # 对比度 beta = np.random.randint(bg_mean // 2, bg_mean) # 亮度 out = np.uint8(np.clip((alpha * out + beta), 0, 255)) return out @retry def pick_font(self): """ :return: font: truetype size: word size, removed offset (width, height) """ word = self.corpus.get_sample() if self.clip_max_chars and len(word) > self.max_chars: word = word[:self.max_chars] font_path = random.choice(self.fonts) if self.strict: supported_chars = self.font_chars[font_path] for c in word: if c == ' ': continue if c not in supported_chars: print( 'Retry pick_font(), \'%s\' contains chars \'%s\' not supported by font %s' % (word, c, font_path)) raise Exception # Font size in point font_size = random.randint(self.cfg.font_size.min, self.cfg.font_size.max) font = ImageFont.truetype(font_path, font_size) return word, font, self.get_word_size(font, word) def get_word_size(self, font, word): """ Get word size removed offset :param font: truetype :param word: :return: size: word size, removed offset (width, height) """ offset = font.getoffset(word) size = font.getsize(word) size = (size[0] - offset[0], size[1] - offset[1]) return size def apply_perspective_transform(self, img, text_box_pnts, max_x, max_y, max_z, gpu=False): """ Apply perspective transform on image :param img: origin numpy image :param text_box_pnts: four corner points of text :param x: max rotate angle around X-axis :param y: max rotate angle around Y-axis :param z: max rotate angle around Z-axis :return: dst_img: dst_img_pnts: points of whole word image after apply perspective transform dst_text_pnts: points of text after apply perspective transform """ x = math_utils.cliped_rand_norm(0, max_x) y = math_utils.cliped_rand_norm(0, max_y) z = math_utils.cliped_rand_norm(0, max_z) # print("x: %f, y: %f, z: %f" % (x, y, z)) transformer = math_utils.PerspectiveTransform(x, y, z, scale=1.0, fovy=50) dst_img, M33, dst_img_pnts = transformer.transform_image(img, gpu) dst_text_pnts = transformer.transform_pnts(text_box_pnts, M33) return dst_img, dst_img_pnts, dst_text_pnts def apply_blur_on_output(self, img): if prob(0.5): return self.apply_gauss_blur(img, [3, 5]) else: return self.apply_norm_blur(img) def apply_gauss_blur(self, img, ks=None): if ks is None: ks = [7, 9, 11, 13] ksize = random.choice(ks) sigmas = [0, 1, 2, 3, 4, 5, 6, 7] sigma = 0 if ksize <= 3: sigma = random.choice(sigmas) img = cv2.GaussianBlur(img, (ksize, ksize), sigma) return img def apply_norm_blur(self, img, ks=None): # kernel == 1, the output image will be the same if ks is None: ks = [2, 3] kernel = random.choice(ks) img = cv2.blur(img, (kernel, kernel)) return img def apply_prydown(self, img): """ 模糊图像,模拟小图片放大的效果 """ scale = random.uniform(1, self.cfg.prydown.max_scale) height = img.shape[0] width = img.shape[1] out = cv2.resize(img, (int(width / scale), int(height / scale)), interpolation=cv2.INTER_AREA) return cv2.resize(out, (width, height), interpolation=cv2.INTER_AREA) def reverse_img(self, word_img): offset = np.random.randint(-10, 10) return 255 + offset - word_img
class Renderer(object): def __init__(self, corpus, fonts, bgs, cfg, width=256, height=32, debug=False, gpu=False, strict=False): self.corpus = corpus self.fonts = fonts self.bgs = bgs self.out_width = width self.out_height = height self.debug = debug self.gpu = gpu self.strict = strict self.cfg = cfg self.timer = Timer() self.liner = Liner(cfg) self.noiser = Noiser(cfg) if self.strict: self.font_chars = get_fonts_chars(self.fonts, corpus.chars_file) def gen_img(self): word, font, word_size = self.pick_font() # Background's height should much larger than raw word image's height, # to make sure we can crop full word image after apply perspective bg = self.gen_bg(width=word_size[0] * 8, height=word_size[1] * 8) word_img, char_pnts, text_box_pnts, word_color = self.draw_text_on_bg_char_pts( word, font, bg) if self.cfg.line.enable and prob(self.cfg.line.fraction): word_img, text_box_pnts = self.liner.apply(word_img, text_box_pnts, word_color) word_img, img_pnts_transformed, char_pnts_transformed = \ self.apply_perspective_transform(word_img, char_pnts, max_x=self.cfg.perspective_transform.max_x, max_y=self.cfg.perspective_transform.max_y, max_z=self.cfg.perspective_transform.max_z, gpu=self.gpu) if self.debug: word_img = draw_box(word_img, img_pnts_transformed, (0, 255, 0)) word_img = draw_box(word_img, char_pnts_transformed, (0, 0, 255)) # _, crop_bbox = self.crop_img(word_img, char_pnts_transformed) # word_img = draw_bbox(word_img, crop_bbox, (255, 0, 0)) else: word_img, char_pnts_transformed = self.crop_img( word_img, char_pnts_transformed) if self.cfg.noise.enable and prob(self.cfg.noise.fraction): word_img = np.clip(word_img, 0., 255.) word_img = self.noiser.apply(word_img) blured = False if self.cfg.blur.enable and prob(self.cfg.blur.fraction): blured = True word_img = self.apply_blur_on_output(word_img) if not blured: if self.cfg.prydown.enable and prob(self.cfg.prydown.fraction): word_img = self.apply_prydown(word_img) word_img = np.clip(word_img, 0., 255.) return word_img, word, char_pnts_transformed def random_xy_offset(self, src_height, src_width, dst_height, dst_width): y_max_offset = 0 if dst_height > src_height: y_max_offset = dst_height - src_height x_max_offset = 0 if dst_width > src_width: x_max_offset = dst_width - src_width y_offset = 0 if y_max_offset != 0: y_offset = random.randint(0, y_max_offset) x_offset = 0 if x_max_offset != 0: x_offset = random.randint(0, x_max_offset) return x_offset, y_offset def crop_img(self, img, text_box_pnts_transformed): """ :param img: image to crop :param text_box_pnts_transformed: text_bbox_pnts after apply_perspective_transform :return: dst: image with desired output size, height=32, width=flags.img_width crop_bbox: bounding box on input image """ bbox = cv2.boundingRect(text_box_pnts_transformed) bbox_width = bbox[2] bbox_height = bbox[3] # Output shape is (self.out_width, self.out_height) # We randomly put bounding box of transformed text in the output shape # so the max value of dst_height is out_height # TODO: prevent text too small dst_height = random.randint(self.out_height // 4 * 3, self.out_height) scale = max(bbox_height / dst_height, bbox_width / self.out_width) s_bbox_width = math.ceil(bbox_width / scale) s_bbox_height = math.ceil(bbox_height / scale) s_bbox = (np.around(bbox[0] / scale), np.around(bbox[1] / scale), np.around(bbox[2] / scale), np.around(bbox[3] / scale)) x_offset, y_offset = self.random_xy_offset(self.out_height, self.out_width, s_bbox_height, s_bbox_width) def int_around(val): return int(np.around(val)) dst_bbox = (int_around((s_bbox[0] - x_offset) * scale), int_around((s_bbox[1] - y_offset) * scale), int_around(self.out_width * scale), int_around(self.out_height * scale)) # It's important do crop first and than do resize for speed consider dst = img[dst_bbox[1]:dst_bbox[1] + dst_bbox[3], dst_bbox[0]:dst_bbox[0] + dst_bbox[2]] sc_w, sc_h = self.out_width / dst.shape[ 1], self.out_height / dst.shape[0] dst = cv2.resize(dst, (self.out_width, self.out_height), interpolation=cv2.INTER_CUBIC) offseted_pts = [] for pt in text_box_pnts_transformed: offseted_pts.append([ int_around( ((pt[0] / scale - x_offset) * scale - dst_bbox[0]) * sc_w), int_around( ((pt[1] / scale - y_offset) * scale - dst_bbox[1]) * sc_h) ]) return dst, offseted_pts def draw_text_on_bg(self, word, font, bg): """ Draw word in the center of background :param word: word to draw :param font: font to draw word :param bg: background numpy image :return: np_img: word image text_box_pnts: left-top, right-top, right-bottom, left-bottom """ bg_height = bg.shape[0] bg_width = bg.shape[1] word_size = self.get_word_size(font, word) word_height = word_size[1] word_width = word_size[0] offset = font.getoffset(word) pil_img = Image.fromarray(np.uint8(bg)) draw = ImageDraw.Draw(pil_img) # Draw text in the center of bg text_x = int((bg_width - word_width) / 2) text_y = int((bg_height - word_height) / 2) bg_mean = int(np.mean(bg)) word_color = random.randint(0, int(bg_mean / 3 * 2)) draw.text((text_x - offset[0], text_y - offset[1]), word, fill=word_color, font=font) np_img = np.array(pil_img).astype(np.float32) text_box_pnts = [[text_x, text_y], [text_x + word_width, text_y], [text_x + word_width, text_y + word_height], [text_x, text_y + word_height]] return np_img, text_box_pnts, word_color def draw_text_on_bg_char_pts(self, word, font, bg): """ Draw word in the center of background :param word: word to draw :param font: font to draw word :param bg: background numpy image :return: np_img: word image text_box_pnts: left-top, right-top, right-bottom, left-bottom """ bg_height = bg.shape[0] bg_width = bg.shape[1] word_size = self.get_word_size(font, word) word_height = word_size[1] word_width = word_size[0] offset = font.getoffset(word) pil_img = Image.fromarray(np.uint8(bg)) draw = ImageDraw.Draw(pil_img) # Draw text in the center of bg text_x = int((bg_width - word_width) / 2) text_y = int((bg_height - word_height) / 2) bg_mean = int(np.mean(bg)) word_color = random.randint(0, int(bg_mean / 3 * 2)) draw.text((text_x - offset[0], text_y - offset[1]), word, fill=word_color, font=font) np_img = np.array(pil_img).astype(np.float32) chars_box_pnts = [] x, y = text_x, text_y maxh = 0 for char in word: char_offset = font.getoffset(char) char_size = font.getsize(char) char_size = (char_size[0] - char_offset[0], char_size[1] - char_offset[1]) maxh = max(char_size[1], maxh) for char in word: char_offset = font.getoffset(char) char_size = font.getsize(char) char_size = (char_size[0] - char_offset[0], char_size[1] - char_offset[1]) h, w = char_size[1], char_size[0] chars_box_pnts.append([x, y]) chars_box_pnts.append([x + w, y]) chars_box_pnts.append([x + w, y + maxh]) chars_box_pnts.append([x, y + maxh]) x += w text_box_pnts = [[text_x, text_y], [text_x + word_width, text_y], [text_x + word_width, text_y + word_height], [text_x, text_y + word_height]] return np_img, chars_box_pnts, text_box_pnts, word_color def gen_bg(self, width, height): if prob(0.5): bg = self.gen_rand_bg(int(width), int(height)) else: bg = self.gen_bg_from_image(int(width), int(height)) return bg def gen_rand_bg(self, width, height): """ Generate random background """ bg_high = random.uniform(220, 255) bg_low = bg_high - random.uniform(1, 60) bg = np.random.randint(bg_low, bg_high, (height, width)).astype(np.uint8) bg = self.apply_gauss_blur(bg) return bg def gen_bg_from_image(self, width, height): """ Resize background, let bg_width>=width, bg_height >=height, and random crop from resized background """ assert width > height bg = random.choice(self.bgs) scale = max(width / bg.shape[1], height / bg.shape[0]) out = cv2.resize(bg, None, fx=scale, fy=scale) x_offset, y_offset = self.random_xy_offset(height, width, out.shape[0], out.shape[1]) out = out[y_offset:y_offset + height, x_offset:x_offset + width] return out @retry def pick_font(self): """ :return: font: truetype size: word size, removed offset (width, height) """ word = self.corpus.get_sample() font_path = random.choice(self.fonts) if self.strict: supported_chars = self.font_chars[font_path] for c in word: if c not in supported_chars: print( 'Retry pick_font(), \'%s\' contains chars \'%s\' not supported by font %s' % (word, c, font_path)) raise Exception # Font size in point font_size = random.randint(20, 40) font = ImageFont.truetype(font_path, font_size) return word, font, self.get_word_size(font, word) def get_word_size(self, font, word): """ Get word size removed offset :param font: truetype :param word: :return: size: word size, removed offset (width, height) """ offset = font.getoffset(word) size = font.getsize(word) size = (size[0] - offset[0], size[1] - offset[1]) return size def apply_perspective_transform(self, img, text_box_pnts, max_x, max_y, max_z, gpu=False): """ Apply perspective transform on image :param img: origin numpy image :param text_box_pnts: four corner points of text :param x: max rotate angle around X-axis :param y: max rotate angle around Y-axis :param z: max rotate angle around Z-axis :return: dst_img: dst_img_pnts: points of whole word image after apply perspective transform dst_text_pnts: points of text after apply perspective transform """ x = math_utils.cliped_rand_norm(0, max_x) y = math_utils.cliped_rand_norm(0, max_y) z = math_utils.cliped_rand_norm(0, max_z) transformer = math_utils.PerspectiveTransform(x, y, z, scale=1.0, fovy=50) dst_img, M33, dst_img_pnts = transformer.transform_image(img, gpu) dst_text_pnts = transformer.transform_pnts(text_box_pnts, M33) return dst_img, dst_img_pnts, dst_text_pnts def apply_blur_on_output(self, img): if prob(0.5): return self.apply_gauss_blur(img, [3, 5]) else: return self.apply_norm_blur(img) def apply_gauss_blur(self, img, ks=None): if ks is None: ks = [7, 9, 11, 13] ksize = random.choice(ks) sigmas = [0, 1, 2, 3, 4, 5, 6, 7] sigma = 0 if ksize <= 3: sigma = random.choice(sigmas) img = cv2.GaussianBlur(img, (ksize, ksize), sigma) return img def apply_norm_blur(self, img, ks=None): # kernel == 1, the output image will be the same if ks is None: ks = [2, 3] kernel = random.choice(ks) img = cv2.blur(img, (kernel, kernel)) return img def apply_prydown(self, img): """ 模糊图像,模拟小图片放大的效果 """ scale = random.uniform(1, 1.5) height = img.shape[0] width = img.shape[1] out = cv2.resize(img, (int(width / scale), int(height / scale)), interpolation=cv2.INTER_AREA) return cv2.resize(out, (width, height), interpolation=cv2.INTER_AREA)
class Renderer(object): def __init__(self, corpus, fonts, bgs, cfg, width=256, height=32, clip_max_chars=False, debug=False, gpu=False, strict=False, fonts_by_image=False): self.corpus = corpus self.fonts = fonts self.bgs = bgs self.out_width = width self.out_height = height self.clip_max_chars = clip_max_chars self.max_chars = math.floor(width / 4) - 1 self.debug = debug self.gpu = gpu self.strict = strict self.cfg = cfg self.fonts_by_image = fonts_by_image self.timer = Timer() self.liner = Liner(cfg) self.noiser = Noiser(cfg) self.texture = Texture(cfg) self.remaper = Remaper(cfg) self.start = time.time() self.create_kernals() if self.strict: self.font_unsupport_chars = font_utils.get_unsupported_chars( self.fonts, corpus.chars_file, self.fonts_by_image) def gen_img(self, img_index): word, font, word_size, font2 = self.pick_font(img_index) self.dmsg("after pick font") # Background's height should much larger than raw word image's height, # to make sure we can crop full word image after apply perspective bg = self.gen_bg(width=word_size[0] * 8, height=word_size[1] * 8) word_img, text_box_pnts, word_color = self.draw_text_on_bg( word, font, bg, font2) self.dmsg("After draw_text_on_bg") if apply(self.cfg.crop): text_box_pnts = self.apply_crop(text_box_pnts, self.cfg.crop) if apply(self.cfg.line): word_img, text_box_pnts = self.liner.apply(word_img, text_box_pnts) self.dmsg("After draw line") if self.debug: word_img = draw_box(word_img, text_box_pnts, (0, 255, 155)) if apply(self.cfg.curve): word_img, text_box_pnts = self.remaper.apply( word_img, text_box_pnts, word_color) self.dmsg("After remapping") if self.debug: word_img = draw_box(word_img, text_box_pnts, (155, 255, 0)) word_img = self.mix_seamless_bg(word_img, bg) if apply(self.cfg.extra_words): word_img = self.draw_extra_random_word(word_img, text_box_pnts, img_index) self.dmsg("After add extra words") word_img, img_pnts_transformed, text_box_pnts_transformed = \ self.apply_perspective_transform(word_img, text_box_pnts, max_x=self.cfg.perspective_transform.max_x, max_y=self.cfg.perspective_transform.max_y, max_z=self.cfg.perspective_transform.max_z, gpu=self.gpu) self.dmsg("After perspective transform") if self.debug: _, crop_bbox = self.crop_img(word_img, text_box_pnts_transformed) word_img = draw_bbox(word_img, crop_bbox, (255, 0, 0)) else: word_img, crop_bbox = self.crop_img(word_img, text_box_pnts_transformed) self.dmsg("After crop_img") if apply(self.cfg.noise): word_img = np.clip(word_img, 0., 255.) word_img = self.noiser.apply(word_img) self.dmsg("After noiser") if apply(self.cfg.blur): word_img = self.apply_blur_on_output(word_img) self.dmsg("After blur") word_img = np.clip(word_img, 0., 255.) if apply(self.cfg.reverse_color): word_img = self.reverse_img(word_img) self.dmsg("After reverse_img") if apply(self.cfg.emboss): word_img = self.apply_emboss(word_img) self.dmsg("After emboss") if apply(self.cfg.sharp): word_img = self.apply_sharp(word_img) self.dmsg("After sharp") return word_img, word def dmsg(self, msg): if self.debug: print(msg) def random_xy_offset(self, src_height, src_width, dst_height, dst_width): """ Get random left-top point for putting a small rect in a large rect. Normally dst_height>src_height and dst_width>src_width """ y_max_offset = 0 if dst_height > src_height: y_max_offset = dst_height - src_height x_max_offset = 0 if dst_width > src_width: x_max_offset = dst_width - src_width y_offset = 0 if y_max_offset != 0: y_offset = random.randint(0, y_max_offset) x_offset = 0 if x_max_offset != 0: x_offset = random.randint(0, x_max_offset) return x_offset, y_offset def crop_img(self, img, text_box_pnts_transformed): """ Crop text from large input image :param img: image to crop :param text_box_pnts_transformed: text_bbox_pnts after apply_perspective_transform :return: dst: image with desired output size, height=32, width=flags.img_width crop_bbox: bounding box on input image """ bbox = cv2.boundingRect(text_box_pnts_transformed) bbox_width = bbox[2] bbox_height = bbox[3] # Output shape is (self.out_width, self.out_height) # We randomly put bounding box of transformed text in the output shape # so the max value of dst_height is out_height # TODO: If rotate angle(z) of text is too big, text will become very small, # we should do something to prevent text too small # dst_height and dst_width is used to leave some padding around text bbox dst_height = random.randint(self.out_height // 4 * 3, self.out_height) if self.out_width == 0: scale = bbox_height / dst_height else: dst_width = self.out_width scale = max(bbox_height / dst_height, bbox_width / self.out_width) s_bbox_width = math.ceil(bbox_width / scale) s_bbox_height = math.ceil(bbox_height / scale) if self.out_width == 0: padding = random.randint(s_bbox_width // 10, s_bbox_width // 8) dst_width = s_bbox_width + padding * 2 s_bbox = (np.around(bbox[0] / scale), np.around(bbox[1] / scale), np.around(bbox[2] / scale), np.around(bbox[3] / scale)) x_offset, y_offset = self.random_xy_offset(s_bbox_height, s_bbox_width, self.out_height, dst_width) dst_bbox = (self.int_around((s_bbox[0] - x_offset) * scale), self.int_around((s_bbox[1] - y_offset) * scale), self.int_around(dst_width * scale), self.int_around(self.out_height * scale)) # It's important do crop first and than do resize for speed consider dst = img[dst_bbox[1]:dst_bbox[1] + dst_bbox[3], dst_bbox[0]:dst_bbox[0] + dst_bbox[2]] dst = cv2.resize(dst, (dst_width, self.out_height), interpolation=cv2.INTER_CUBIC) return dst, dst_bbox def int_around(self, val): return int(np.around(val)) def get_word_color(self): colors = [i for i in self.cfg.font_color] p = [self.cfg.font_color[i].fraction for i in self.cfg.font_color] # pick color by fraction color_name = np.random.choice(colors, p=p) l_boundary = [ int(i) for i in self.cfg.font_color[color_name].l_boundary.split(',') ] h_boundary = [ int(i) for i in self.cfg.font_color[color_name].h_boundary.split(',') ] # random color by low and high RGB boundary if color_name == 'black' or color_name == 'gray': r = g = b = np.random.randint(l_boundary[0], h_boundary[0]) else: r = np.random.randint(l_boundary[0], h_boundary[0]) g = np.random.randint(l_boundary[1], h_boundary[1]) b = np.random.randint(l_boundary[2], h_boundary[2]) return (b, g, r) def draw_text_on_bg(self, word, font, bg, font2): """ Draw word in the center of background :param word: word to draw :param font: font to draw word :param bg: background numpy image :return: np_img: word image text_box_pnts: left-top, right-top, right-bottom, left-bottom """ bg_height = bg.shape[0] bg_width = bg.shape[1] word_size = self.get_word_size(font, word) word_height = word_size[1] word_width = word_size[0] offset = (0, 0) if self.fonts_by_image else font.getoffset(word) pure_bg = np.ones((bg_height, bg_width, 3)) * 255 pil_img = Image.fromarray(np.uint8(pure_bg)) draw = ImageDraw.Draw(pil_img) # Draw text in the center of bg text_x = int((bg_width - word_width) / 2) text_y = int((bg_height - word_height) / 2) word_color = self.get_word_color() if apply(self.cfg.random_space): text_x, text_y, word_width, word_height = self.draw_text_with_random_space( draw, font, word, word_color, bg_width, bg_height, pure_bg) else: if apply(self.cfg.texture) and not self.fonts_by_image: pure_bg = Image.new('RGBA', (bg_width, bg_height), (255, 255, 255, 255)) pil_img = Image.new('RGBA', (bg_width, bg_height), (255, 255, 255, 0)) draw = ImageDraw.Draw(pil_img) _ = self.draw_text_wrapper(draw, word, text_x - offset[0], text_y - offset[1], font, word_color, font2, pil_img) pil_img = self.texture.apply_cloud_texture(pure_bg, pil_img) else: if self.fonts_by_image: pil_img = self.draw_text_wrapper(draw, word, text_x - offset[0], text_y - offset[1], font, word_color, font2, pure_bg) else: _ = self.draw_text_wrapper(draw, word, text_x - offset[0], text_y - offset[1], font, word_color, font2, pure_bg) # draw.text((text_x - offset[0], text_y - offset[1]), word, fill=word_color, font=font) np_img = np.array(pil_img).astype(np.float32) text_box_pnts = [[text_x, text_y], [text_x + word_width, text_y], [text_x + word_width, text_y + word_height], [text_x, text_y + word_height]] return np_img, text_box_pnts, word_color def mix_seamless_bg(self, text_img, bg, anchor=None): text_img = np.array(text_img).astype(np.uint8) text_mask = 255 * np.ones(text_img.shape, text_img.dtype) if self.fonts_by_image and anchor: h, w = text_img.shape[:2] center = (int(anchor[0] + w / 2), int(anchor[1] + h / 2)) bg = bg.astype(np.uint8) else: center = (bg.shape[1] // 2, bg.shape[0] // 2) mixed_clone = cv2.seamlessClone(text_img, bg, text_mask, center, cv2.MIXED_CLONE) return mixed_clone def draw_extra_random_word(self, text_img, text_box_pnts, img_index): pil_img = Image.fromarray(np.uint8(text_img)) draw = ImageDraw.Draw(pil_img) word_color = self.get_word_color() word, font, word_size, font2 = self.pick_font(img_index) word_len = np.random.randint(1, len(word)) word_height = word_size[1] word_width = word_size[0] # calculate text x,y text_x = np.random.randint(text_box_pnts[0][0], text_box_pnts[1][0]) text_y_b = text_box_pnts[2][1] text_y_t = 2 * text_box_pnts[0][1] - \ text_box_pnts[2][1] - word_height * 0.5 text_y = np.random.choice([text_y_t, text_y_b], p=[ self.cfg.extra_words.top.fraction, self.cfg.extra_words.bottom.fraction ]) _ = self.draw_text_wrapper(draw, word[:word_len], text_x, text_y, font, word_color, font2, np.uint8(text_img)) np_img = np.array(pil_img).astype(np.float32) return np_img def draw_text_with_random_space(self, draw, font, word, word_color, bg_width, bg_height, pil_img): """ If random_space applied, text_x, text_y, word_width, word_height may change""" width = 0 height = 0 chars_size = [] y_offset = 10**5 for c in word: if self.fonts_by_image: size = font[c].shape[:2] if c in font else (font_size, font_size) else: size = font.getsize(c) chars_size.append(size) width += size[0] # set max char height as word height if size[1] > height: height = size[1] # Min chars y offset as word y offset # Assume only y offset c_offset = (0, 0) if self.fonts_by_image else font.getoffset(c) if c_offset[1] < y_offset: y_offset = c_offset[1] char_space_width = int(height * np.random.uniform( self.cfg.random_space.min, self.cfg.random_space.max)) width += (char_space_width * (len(word) - 1)) text_x = int((bg_width - width) / 2) text_y = int((bg_height - height) / 2) c_x = text_x c_y = text_y for i, c in enumerate(word): # self.draw_text_wrapper(draw, c, c_x, c_y - y_offset, font, word_color, force_text_border) if self.fonts_by_image: pil_img = self.mix_seamless_bg(font[c], pil_img, (c_x, c_y - y_offset)) else: draw.text((c_x, c_y - y_offset), c, fill=word_color, font=font) c_x += (chars_size[i][0] + char_space_width) return text_x, text_y, width, height def draw_text_wrapper(self, draw, text, x, y, font, text_color, font2, pil_img): """ :param x/y: 应该是移除了 offset 的 """ if apply(self.cfg.text_border): self.draw_border_text(draw, text, x, y, font, text_color) #todo: 整合draw_text_wrapper 与 draw_text_with_random_space elif apply(self.cfg.second_font): if self.cfg.second_font.font_color_change: text_color2 = self.get_word_color() else: text_color2 = text_color for i, c in enumerate(text): if random.random( ) < self.cfg.second_font.change_rate and not self.fonts_by_image: y_offset = np.random.uniform(0, font2.getoffset(c)[1]) draw.text((x, y + y_offset), c, fill=text_color2, font=font2) x += font2.getsize(c)[0] else: if self.fonts_by_image: pil_img = self.mix_seamless_bg(font[c], pil_img, (x, y)) x += font[c].shape[1] else: draw.text((x, y), c, fill=text_color, font=font) x += font.getsize(c)[0] else: if self.fonts_by_image: for c in text: pil_img = self.mix_seamless_bg(font[c], pil_img, (x, y)) x += font[c].shape[1] else: draw.text((x, y), text, fill=text_color, font=font) return pil_img def draw_border_text(self, draw, text, x, y, font, text_color): """ :param x/y: 应该是移除了 offset 的 """ # thickness larger than 1 may give bad border result thickness = 1 choices = [] p = [] if self.cfg.text_border.light.enable: choices.append(0) p.append(self.cfg.text_border.light.fraction) if self.cfg.text_border.dark.enable: choices.append(1) p.append(self.cfg.text_border.dark.fraction) light_or_dark = np.random.choice(choices, p=p) if light_or_dark == 0: border_color = text_color + \ np.random.randint(0, 255 - text_color - 1) elif light_or_dark == 1: border_color = text_color - np.random.randint(0, text_color + 1) # thin border draw.text((x - thickness, y), text, font=font, fill=border_color) draw.text((x + thickness, y), text, font=font, fill=border_color) draw.text((x, y - thickness), text, font=font, fill=border_color) draw.text((x, y + thickness), text, font=font, fill=border_color) # thicker border draw.text((x - thickness, y - thickness), text, font=font, fill=border_color) draw.text((x + thickness, y - thickness), text, font=font, fill=border_color) draw.text((x - thickness, y + thickness), text, font=font, fill=border_color) draw.text((x + thickness, y + thickness), text, font=font, fill=border_color) # now draw the text over it draw.text((x, y), text, font=font, fill=text_color) def gen_bg(self, width, height): bg = self.gen_bg_from_image(int(width), int(height)) return bg def gen_bg_from_image(self, width, height): """ Resize background, let bg_width>=width, bg_height >=height, and random crop from resized background """ assert width > height bg = random.choice(self.bgs) scale = max(width / bg.shape[1], height / bg.shape[0]) rand_scale = scale * random.random() scale = scale if width > rand_scale * bg.shape[1] else rand_scale out = cv2.resize(bg, None, fx=scale, fy=scale) x_offset, y_offset = self.random_xy_offset(height, width, out.shape[0], out.shape[1]) out = out[y_offset:y_offset + height, x_offset:x_offset + width] # out = self.apply_gauss_blur(out, ks=[7, 11, 13, 15, 17]) 尝试不再模糊背景 # TODO: find a better way to deal with background # alpha = 255 / bg_mean # 对比度 # beta = np.random.randint(bg_mean // 4, bg_mean // 2) # 亮度 # out = np.uint8(np.clip((alpha * out + beta), 0, 255)) return out @retry def pick_font(self, img_index): """ :param img_index when use list corpus, this param is used :return: font: truetype size: word size, removed offset (width, height) """ word = self.corpus.get_sample(img_index) if self.clip_max_chars and len(word) > self.max_chars: word = word[:self.max_chars] font_path = random.choice(self.fonts) font_path_2 = random.choice(self.fonts) if self.strict: unsupport_chars = self.font_unsupport_chars[font_path] # unsupport_chars.extend(self.font_unsupport_chars[font_path_2]) for c in word: if c == ' ': continue if c in unsupport_chars: print( 'Retry pick_font(), \'%s\' contains chars \'%s\' not supported by font %s' % (word, c, font_path)) raise Exception # Font size in point font_size = random.randint(self.cfg.font_size.min, self.cfg.font_size.max) if self.fonts_by_image: font = self.loads_fonts_by_image(font_path, font_size) else: font = ImageFont.truetype(font_path, font_size) if self.cfg.second_font.font_size_change: font_size_2 = font_size - random.randint(1, 10) else: font_size_2 = font_size if self.cfg.second_font.font_change: if self.fonts_by_image: font2 = self.loads_fonts_by_image(font_path_2, font_size_2) else: font2 = ImageFont.truetype(font_path_2, font_size_2) else: if self.fonts_by_image: font2 = self.loads_fonts_by_image(font_path, font_size_2) else: font2 = ImageFont.truetype(font_path, font_size_2) return word, font, self.get_word_size(font, word, font_size), font2 def loads_fonts_by_image(self, font_path, font_size): fonts = {} for x in os.listdir(font_path): img = cv2.imdecode( np.fromfile(font_path + "/" + x, dtype=np.uint8), cv2.IMREAD_COLOR) h, w = img.shape[:2] fonts[x.replace(".png", "")] = cv2.resize(img, (font_size, h * font_size // w)) return fonts def get_word_size(self, font, word, default=None): """ Get word size removed offset :param font: truetype :param word: :return: size: word size, removed offset (width, height) """ if self.fonts_by_image: offset = (0, 0) w_list, h_list = [], [] for c in word: if c in font: h, w = font[c].shape[:2] else: h, w = default, default w_list.append(w) h_list.append(h) size = (sum(w_list), max(h_list)) else: offset = font.getoffset(word) size = font.getsize(word) size = (size[0] - offset[0], size[1] - offset[1]) return size def apply_perspective_transform(self, img, text_box_pnts, max_x, max_y, max_z, gpu=False): """ Apply perspective transform on image :param img: origin numpy image :param text_box_pnts: four corner points of text :param x: max rotate angle around X-axis :param y: max rotate angle around Y-axis :param z: max rotate angle around Z-axis :return: dst_img: dst_img_pnts: points of whole word image after apply perspective transform dst_text_pnts: points of text after apply perspective transform """ x = math_utils.cliped_rand_norm(0, max_x) y = math_utils.cliped_rand_norm(0, max_y) z = math_utils.cliped_rand_norm(0, max_z) # print("x: %f, y: %f, z: %f" % (x, y, z)) transformer = math_utils.PerspectiveTransform(x, y, z, scale=1.0, fovy=50) dst_img, M33, dst_img_pnts = transformer.transform_image(img, gpu) dst_text_pnts = transformer.transform_pnts(text_box_pnts, M33) return dst_img, dst_img_pnts, dst_text_pnts def apply_blur_on_output(self, img): p = [] funcs = [] if self.cfg.blur.gauss.enable: p.append(self.cfg.blur.gauss.fraction) funcs.append(self.apply_gauss_blur) if self.cfg.blur.norm.enable: p.append(self.cfg.blur.norm.fraction) funcs.append(self.apply_norm_blur) if self.cfg.blur.motion.enable: p.append(self.cfg.blur.motion.fraction) funcs.append(self.apply_motion_blur) if self.cfg.blur.prydown.enable: p.append(self.cfg.blur.prydown.fraction) funcs.append(self.apply_prydown) if len(p) == 0: return img blur_func = np.random.choice(funcs, p=p) return blur_func(img) def apply_gauss_blur(self, img, ks=None): if ks is None: ks = [7, 9, 11, 13] ksize = random.choice(ks) sigmas = [0, 1, 2, 3, 4, 5, 6, 7] sigma = 0 if ksize <= 3: sigma = random.choice(sigmas) img = cv2.GaussianBlur(img, (ksize, ksize), sigma) return img def apply_norm_blur(self, img, ks=None): # kernel == 1, the output image will be the same if ks is None: ks = [2, 3] kernel = random.choice(ks) img = cv2.blur(img, (kernel, kernel)) return img def apply_motion_blur(self, img, ks=None, angle=None): if ks is None: ks = [7, 9, 11, 13] ksize = random.choice(ks) if angle is None: angle = random.randint(0, 360) kernel = np.zeros((ksize, ksize), dtype=np.float32) kernel[(ksize - 1) // 2, :] = np.ones(ksize, dtype=np.float32) kernel = cv2.warpAffine( kernel, cv2.getRotationMatrix2D((ksize / 2 - 0.5, ksize / 2 - 0.5), angle, 1.0), (ksize, ksize)) kernel = kernel * (1.0 / np.sum(kernel)) img = cv2.filter2D(img, -1, kernel) return img def apply_prydown(self, img): """ 模糊图像,模拟小图片放大的效果 """ scale = random.uniform( 1, self.cfg.blur.prydown.max_scale) # todo: use different h/w scale height = img.shape[0] width = img.shape[1] out = cv2.resize(img, (int(width / scale), int(height / scale)), interpolation=cv2.INTER_AREA) return cv2.resize(out, (width, height), interpolation=cv2.INTER_AREA) def reverse_img(self, word_img): offset = np.random.randint(-10, 10) return 255 + offset - word_img def create_kernals(self): self.emboss_kernal = np.array([[-2, -1, 0], [-1, 1, 1], [0, 1, 2]]) self.sharp_kernel = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]]) def apply_emboss(self, word_img): return cv2.filter2D(word_img, -1, self.emboss_kernal) def apply_sharp(self, word_img): return cv2.filter2D(word_img, -1, self.sharp_kernel) def apply_crop(self, text_box_pnts, crop_cfg): """ Random crop text box height top or bottom, we don't need image information in this step, only change box pnts :param text_box_pnts: bbox of text [left-top, right-top, right-bottom, left-bottom] :param crop_cfg: :return: croped_text_box_pnts """ height = abs(text_box_pnts[0][1] - text_box_pnts[3][1]) scale = float(height) / float(self.out_height) croped_text_box_pnts = text_box_pnts if prob(0.5): top_crop = int( random.randint(crop_cfg.top.min, crop_cfg.top.max) * scale) self.dmsg("top crop %d" % top_crop) croped_text_box_pnts[0][1] += top_crop croped_text_box_pnts[1][1] += top_crop else: bottom_crop = int( random.randint(crop_cfg.bottom.min, crop_cfg.bottom.max) * scale) self.dmsg("bottom crop %d " % bottom_crop) croped_text_box_pnts[2][1] -= bottom_crop croped_text_box_pnts[3][1] -= bottom_crop return croped_text_box_pnts