def apply_left(self, img: PILImage, text_bbox: BBox) -> Tuple[PILImage, BBox]: in_offset, thickness, out_offset = self._get_lr_param() new_w = img.width + thickness + in_offset + out_offset new_h = img.height new_img = transparent_img((new_w, new_h)) new_img.paste(img, (thickness + in_offset + out_offset, 0)) draw = ImageDraw.Draw(new_img) text_bbox.offset_(text_bbox.right_top, (new_w, 0)) text_bbox.left -= in_offset draw.line( list(text_bbox.left_top) + list(text_bbox.left_bottom), fill=self._get_line_color(img, text_bbox), width=thickness, ) text_bbox.left -= thickness text_bbox.left -= out_offset return new_img, text_bbox
def gen_single_corpus(self) -> Tuple[PILImage, str]: font_text = self.corpus.sample() bg = self.bg_manager.get_bg() text_color = self.corpus.cfg.text_color_cfg.get_color(bg) text_mask = draw_text_on_bg(font_text, text_color, char_spacing=self.corpus.cfg.char_spacing) if self.cfg.corpus_effects is not None: text_mask, _ = self.cfg.corpus_effects.apply_effects( text_mask, BBox.from_size(text_mask.size)) if self.cfg.perspective_transform is not None: transformer = PerspectiveTransform(self.cfg.perspective_transform) # TODO: refactor this, now we must call get_transformed_size to call gen_warp_matrix _ = transformer.get_transformed_size(text_mask.size) ( transformed_text_mask, transformed_text_pnts, ) = transformer.do_warp_perspective(text_mask) else: transformed_text_mask = text_mask img = self.paste_text_mask_on_bg(bg, transformed_text_mask) return img, font_text.text
def apply(self, img: PILImage, text_bbox: BBox) -> Tuple[PILImage, BBox]: w_ratio = np.random.uniform(*self.w_ratio) h_ratio = np.random.uniform(*self.h_ratio) new_w = int(img.width + img.width * w_ratio) new_h = int(img.height + img.height * h_ratio) new_img = transparent_img((new_w, new_h)) xy = random_xy_offset(img.size, (new_w, new_h)) new_img.paste(img, xy) new_bbox = text_bbox.move_origin(xy) return new_img, new_bbox
def apply(self, img: PILImage, text_bbox: BBox) -> Tuple[PILImage, BBox]: max_val = np.random.uniform(*self.amplitude) word_img = np.array(img) h, w = word_img.shape[:2] img_x = np.zeros((h, w), np.float32) img_y = np.zeros((h, w), np.float32) xmin = text_bbox.left xmax = text_bbox.right ymin = text_bbox.top ymax = text_bbox.bottom remap_y_min = ymin remap_y_max = ymax for y in range(h): for x in range(w): remaped_y = y + self._remap_y(x, max_val) if y == ymin: if remaped_y < remap_y_min: remap_y_min = remaped_y if y == ymax: if remaped_y > remap_y_max: remap_y_max = remaped_y img_y[y, x] = remaped_y img_x[y, x] = x dst = cv2.remap(word_img, img_x, img_y, cv2.INTER_CUBIC) bbox = BBox(left=xmin, top=remap_y_min, right=xmax, bottom=remap_y_max) bbox = bbox.offset((bbox.left, bbox.top), (0, 0)) return Image.fromarray(dst), bbox
def apply(self, text_bboxes: List[BBox], img_bboxes: List[BBox],) -> List[BBox]: avg_height = sum([it.height for it in img_bboxes]) / len(img_bboxes) for i in range(0, len(img_bboxes) - 1): h_spacing_scale = np.random.uniform(*self.h_spacing) h_spacing = int(avg_height * h_spacing_scale) img_bboxes[i].right += h_spacing merged_bbox = BBox.from_bboxes(img_bboxes) img_bboxes[0].offset_(img_bboxes[0].left_cnt, merged_bbox.left_cnt) for i in range(1, len(img_bboxes)): img_bboxes[i].offset_(img_bboxes[i].left_cnt, img_bboxes[i - 1].right_cnt) return img_bboxes
def __call__(self, *args, **kwargs) -> Tuple[np.ndarray, str]: try: if self._should_apply_layout(): img, text = self.gen_multi_corpus() else: img, text = self.gen_single_corpus() if self.cfg.render_effects is not None: img, _ = self.cfg.render_effects.apply_effects( img, BBox.from_size(img.size)) img = img.convert("RGB") np_img = np.array(img) np_img = cv2.cvtColor(np_img, cv2.COLOR_RGB2BGR) np_img = self.norm(np_img) return np_img, text except Exception as e: logger.exception(e) raise e
def __call__(self, *args, **kwargs) -> Tuple[np.ndarray, str]: try: if self._should_apply_layout(): img, text, cropped_bg, transformed_text_mask = self.gen_multi_corpus( ) else: img, text, cropped_bg, transformed_text_mask = self.gen_single_corpus( ) if self.cfg.render_effects is not None: img, _ = self.cfg.render_effects.apply_effects( img, BBox.from_size(img.size)) if self.cfg.return_bg_and_mask: gray_text_mask = np.array(transformed_text_mask.convert("L")) _, gray_text_mask = cv2.threshold( gray_text_mask, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) transformed_text_mask = Image.fromarray(255 - gray_text_mask) merge_target = Image.new("RGBA", (img.width * 3, img.height)) merge_target.paste(img, (0, 0)) merge_target.paste(cropped_bg, (img.width, 0)) merge_target.paste( transformed_text_mask, (img.width * 2, 0), mask=transformed_text_mask, ) np_img = np.array(merge_target) np_img = cv2.cvtColor(np_img, cv2.COLOR_RGBA2BGR) np_img = self.norm(np_img) else: img = img.convert("RGB") np_img = np.array(img) np_img = cv2.cvtColor(np_img, cv2.COLOR_RGB2BGR) np_img = self.norm(np_img) return np_img, text except Exception as e: logger.exception(e) raise e
def gen_single_corpus(self) -> Tuple[PILImage, str, PILImage, PILImage]: font_text = self.corpus.sample() bg = self.bg_manager.get_bg() if self.cfg.text_color_cfg is not None: text_color = self.cfg.text_color_cfg.get_color(bg) # corpus text_color has higher priority than RenderCfg.text_color_cfg if self.corpus.cfg.text_color_cfg is not None: text_color = self.corpus.cfg.text_color_cfg.get_color(bg) text_mask = draw_text_on_bg(font_text, text_color, char_spacing=self.corpus.cfg.char_spacing) if self.cfg.corpus_effects is not None: text_mask, _ = self.cfg.corpus_effects.apply_effects( text_mask, BBox.from_size(text_mask.size)) if self.cfg.perspective_transform is not None: transformer = PerspectiveTransform(self.cfg.perspective_transform) # TODO: refactor this, now we must call get_transformed_size to call gen_warp_matrix _ = transformer.get_transformed_size(text_mask.size) try: ( transformed_text_mask, transformed_text_pnts, ) = transformer.do_warp_perspective(text_mask) except Exception as e: logger.exception(e) logger.error(font_text.font_path, "text", font_text.text) raise e else: transformed_text_mask = text_mask img, cropped_bg = self.paste_text_mask_on_bg(bg, transformed_text_mask) return img, font_text.text, cropped_bg, transformed_text_mask
def gen_single_corpus(self) -> Tuple[PILImage, str]: font_text = self.corpus.sample() bg = self.bg_manager.get_bg() text_color = self.corpus.cfg.text_color_cfg.get_color(bg) text_mask = draw_text_on_bg(font_text, text_color, char_spacing=self.corpus.cfg.char_spacing) if self.cfg.corpus_effects is not None: text_mask, _ = self.cfg.corpus_effects.apply_effects( text_mask, BBox.from_size(text_mask.size)) if self.cfg.perspective_transform is not None: transformer = PerspectiveTransform(self.cfg.perspective_transform) # TODO: refactor this, now we must call get_transformed_size to call gen_warp_matrix _ = transformer.get_transformed_size(text_mask.size) try: ( transformed_text_mask, transformed_text_pnts, ) = transformer.do_warp_perspective(text_mask) except Exception as e: logger.exception(e) logger.error(font_text.font_path, "text", font_text.text) raise e else: transformed_text_mask = text_mask img = self.paste_text_mask_on_bg(bg, transformed_text_mask) # After pasting the text mask on the background we draw bbox for each character on the transformed image. img = self.lay_bbox_over_image(image=img, font_text=font_text, text_color=text_color) return img, font_text.text
def test_offset(): bbox1 = BBox(0, 0, 100, 32) bbox2 = BBox(0, 0, 50, 16) bbox2.offset_(bbox2.left_cnt, bbox1.right_cnt) assert bbox2 == BBox(100, 8, 150, 24)
def gen_multi_corpus(self) -> Tuple[PILImage, str]: font_texts: List[FontText] = [it.sample() for it in self.corpus] bg = self.bg_manager.get_bg() text_color = None if self.cfg.text_color_cfg is not None: text_color = self.cfg.text_color_cfg.get_color(bg) text_masks, text_bboxes = [], [] for i in range(len(font_texts)): font_text = font_texts[i] if text_color is None: _text_color = self.corpus[i].cfg.text_color_cfg.get_color(bg) else: _text_color = text_color text_mask = draw_text_on_bg( font_text, _text_color, char_spacing=self.corpus[i].cfg.char_spacing) text_bbox = BBox.from_size(text_mask.size) if self.cfg.corpus_effects is not None: effects = self.cfg.corpus_effects[i] if effects is not None: text_mask, text_bbox = effects.apply_effects( text_mask, text_bbox) text_masks.append(text_mask) text_bboxes.append(text_bbox) text_mask_bboxes, merged_text = self.layout( font_texts, [it.copy() for it in text_bboxes], [BBox.from_size(it.size) for it in text_masks], ) if len(text_mask_bboxes) != len(text_bboxes): raise PanicError( "points and text_bboxes should have same length after layout output" ) merged_bbox = BBox.from_bboxes(text_mask_bboxes) merged_text_mask = transparent_img(merged_bbox.size) for text_mask, bbox in zip(text_masks, text_mask_bboxes): merged_text_mask.paste(text_mask, bbox.left_top) if self.cfg.perspective_transform is not None: transformer = PerspectiveTransform(self.cfg.perspective_transform) # TODO: refactor this, now we must call get_transformed_size to call gen_warp_matrix _ = transformer.get_transformed_size(merged_text_mask.size) ( transformed_text_mask, transformed_text_pnts, ) = transformer.do_warp_perspective(merged_text_mask) else: transformed_text_mask = merged_text_mask if self.cfg.layout_effects is not None: transformed_text_mask, _ = self.cfg.layout_effects.apply_effects( transformed_text_mask, BBox.from_size(transformed_text_mask.size)) img = self.paste_text_mask_on_bg(bg, transformed_text_mask) return img, merged_text