Python Noiser.apply Examples

Programming Language: Python

Namespace/Package Name: textrenderer.noiser

Class/Type: Noiser

Method/Function: apply

Examples at hotexamples.com: 4

Python Noiser.apply - 4 examples found. These are the top rated real world Python examples of textrenderer.noiser.Noiser.apply extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Noiser(6)

apply(4)

Frequently Used Methods

Noiser (6)

apply (4)

Example #1

Show file

class Renderer(object):
    def __init__(self, corpus, fonts, bgs, cfg, width=256, height=32, space_ratio=0.0,
                 clip_max_chars=False, max_chars=None, debug=False, gpu=False, strict=False):
        self.corpus = corpus
        self.fonts = fonts
        self.bgs = bgs
        self.out_width = width
        self.out_height = height
        self.space_ratio = space_ratio
        self.clip_max_chars = clip_max_chars
        self.max_chars = math.floor(width / 4) - 1 if max_chars is None else max_chars
        self.debug = debug
        self.gpu = gpu
        self.strict = strict
        self.cfg = cfg

        self.timer = Timer()
        self.liner = Liner(cfg)
        self.noiser = Noiser(cfg)
        self.remaper = Remaper(cfg)

        self.create_kernals()

        if self.strict:
            self.font_unsupport_chars = font_utils.get_unsupported_chars(self.fonts, corpus.chars_file)

    def gen_img(self, img_index):
        word, font, word_size = self.pick_font(img_index)
        if self.space_ratio > 0 and len(word) > 2:
            word = list(word)
            for i in range(1, len(word) - 1):
                if word[i-1] != ' ' and random.random() < self.space_ratio:
                    # 不允许出现连续的空格
                    word[i] = ' '
            word = ''.join(word)

        self.dmsg("after pick font")

        # Background's height should much larger than raw word image's height,
        # to make sure we can crop full word image after apply perspective
        bg = self.gen_bg(width=word_size[0] * 8, height=word_size[1] * 8)
        word_img, text_box_pnts, word_color = self.draw_text_on_bg(word, font, bg)
        self.dmsg("After draw_text_on_bg")

        if apply(self.cfg.crop):
            text_box_pnts = self.apply_crop(text_box_pnts, self.cfg.crop)

        if apply(self.cfg.line):
            word_img, text_box_pnts = self.liner.apply(word_img, text_box_pnts, word_color)
            self.dmsg("After draw line")

        if self.debug:
            word_img = draw_box(word_img, text_box_pnts, (0, 255, 155))

        if apply(self.cfg.curve):
            word_img, text_box_pnts = self.remaper.apply(word_img, text_box_pnts, word_color)
            self.dmsg("After remapping")

        if self.debug:
            word_img = draw_box(word_img, text_box_pnts, (155, 255, 0))

        word_img, img_pnts_transformed, text_box_pnts_transformed = \
            self.apply_perspective_transform(word_img, text_box_pnts,
                                             max_x=self.cfg.perspective_transform.max_x,
                                             max_y=self.cfg.perspective_transform.max_y,
                                             max_z=self.cfg.perspective_transform.max_z,
                                             gpu=self.gpu)

        self.dmsg("After perspective transform")

        if self.debug:
            _, crop_bbox = self.crop_img(word_img, text_box_pnts_transformed)
            word_img = draw_bbox(word_img, crop_bbox, (255, 0, 0))
        else:
            word_img, crop_bbox = self.crop_img(word_img, text_box_pnts_transformed)

        self.dmsg("After crop_img")

        if apply(self.cfg.noise):
            word_img = np.clip(word_img, 0., 255.)
            word_img = self.noiser.apply(word_img)
            self.dmsg("After noiser")

        blured = False
        if apply(self.cfg.blur):
            blured = True
            word_img = self.apply_blur_on_output(word_img)
            self.dmsg("After blur")

        if not blured:
            if apply(self.cfg.prydown):
                word_img = self.apply_prydown(word_img)
                self.dmsg("After prydown")

        word_img = np.clip(word_img, 0., 255.)

        if apply(self.cfg.reverse_color):
            word_img = self.reverse_img(word_img)
            self.dmsg("After reverse_img")

        if apply(self.cfg.emboss):
            word_img = self.apply_emboss(word_img)
            self.dmsg("After emboss")

        if apply(self.cfg.sharp):
            word_img = self.apply_sharp(word_img)
            self.dmsg("After sharp")

        # word_img = cv2.resize(word_img, None, fx=0.5, fy=0.5)
        return word_img, word

    def dmsg(self, msg):
        if self.debug:
            print(msg)

    def random_xy_offset(self, src_height, src_width, dst_height, dst_width):
        """
        Get random left-top point for putting a small rect in a large rect.
        Normally dst_height>src_height and dst_width>src_width
        """
        y_max_offset = 0
        if dst_height > src_height:
            y_max_offset = dst_height - src_height

        x_max_offset = 0
        if dst_width > src_width:
            x_max_offset = dst_width - src_width

        y_offset = 0
        if y_max_offset != 0:
            y_offset = random.randint(0, y_max_offset)

        x_offset = 0
        if x_max_offset != 0:
            x_offset = random.randint(0, x_max_offset)

        return x_offset, y_offset

    def crop_img(self, img, text_box_pnts_transformed):
        """
        Crop text from large input image
        :param img: image to crop
        :param text_box_pnts_transformed: text_bbox_pnts after apply_perspective_transform
        :return:
            dst: image with desired output size, height=32, width=flags.img_width
            crop_bbox: bounding box on input image
        """
        bbox = cv2.boundingRect(text_box_pnts_transformed)
        bbox_width = bbox[2]
        bbox_height = bbox[3]

        # Output shape is (self.out_width, self.out_height)
        # We randomly put bounding box of transformed text in the output shape
        # so the max value of dst_height is out_height

        # TODO: If rotate angle(z) of text is too big, text will become very small,
        # we should do something to prevent text too small

        # dst_height and dst_width is used to leave some padding around text bbox
        dst_height = random.randint(self.out_height // 4 * 3, self.out_height)

        if self.out_width == 0:
            scale = bbox_height / dst_height
        else:
            dst_width = self.out_width
            scale = max(bbox_height / dst_height, bbox_width / self.out_width)

        s_bbox_width = math.ceil(bbox_width / scale)
        s_bbox_height = math.ceil(bbox_height / scale)

        if self.out_width == 0:
            padding = random.randint(s_bbox_width // 10, s_bbox_width // 8)
            dst_width = s_bbox_width + padding * 2

        s_bbox = (np.around(bbox[0] / scale),
                  np.around(bbox[1] / scale),
                  np.around(bbox[2] / scale),
                  np.around(bbox[3] / scale))

        x_offset, y_offset = self.random_xy_offset(s_bbox_height, s_bbox_width, self.out_height, dst_width)

        dst_bbox = (
            self.int_around((s_bbox[0] - x_offset) * scale),
            self.int_around((s_bbox[1] - y_offset) * scale),
            self.int_around(dst_width * scale),
            self.int_around(self.out_height * scale)
        )

        # It's important do crop first and than do resize for speed consider
        dst = img[dst_bbox[1]:dst_bbox[1] + dst_bbox[3], dst_bbox[0]:dst_bbox[0] + dst_bbox[2]]

        dst = cv2.resize(dst, (dst_width, self.out_height), interpolation=cv2.INTER_CUBIC)

        return dst, dst_bbox

    def int_around(self, val):
        return int(np.around(val))

    def get_word_color(self, bg, text_x, text_y, word_height, word_width):
        """
        Only use word roi area to get word color
        """
        offset = 10
        ymin = text_y - offset
        ymax = text_y + word_height + offset
        xmin = text_x - offset
        xmax = text_x + word_width + offset

        word_roi_bg = bg[ymin: ymax, xmin: xmax]

        bg_mean = int(np.mean(word_roi_bg) * (2 / 3))
        word_color = random.randint(0, bg_mean)
        return word_color

    def draw_text_on_bg(self, word, font, bg):
        """
        Draw word in the center of background
        :param word: word to draw
        :param font: font to draw word
        :param bg: background numpy image
        :return:
            np_img: word image
            text_box_pnts: left-top, right-top, right-bottom, left-bottom
        """
        bg_height = bg.shape[0]
        bg_width = bg.shape[1]

        word_size = self.get_word_size(font, word)
        word_height = word_size[1]
        word_width = word_size[0]

        offset = font.getoffset(word)

        pil_img = Image.fromarray(np.uint8(bg))
        draw = ImageDraw.Draw(pil_img)

        # Draw text in the center of bg
        text_x = int((bg_width - word_width) / 2)
        text_y = int((bg_height - word_height) / 2)

        word_color = self.get_word_color(bg, text_x, text_y, word_height, word_width)

        if apply(self.cfg.random_space):
            text_x, text_y, word_width, word_height = self.draw_text_with_random_space(draw, font, word, word_color,
                                                                                       bg_width, bg_height)
            np_img = np.array(pil_img).astype(np.float32)
        else:
            if apply(self.cfg.seamless_clone):
                np_img = self.draw_text_seamless(font, bg, word, word_color, word_height, word_width, offset)
            else:
                self.draw_text_wrapper(draw, word, text_x - offset[0], text_y - offset[1], font, word_color)
                # draw.text((text_x - offset[0], text_y - offset[1]), word, fill=word_color, font=font)

                np_img = np.array(pil_img).astype(np.float32)

        text_box_pnts = [
            [text_x, text_y],
            [text_x + word_width, text_y],
            [text_x + word_width, text_y + word_height],
            [text_x, text_y + word_height]
        ]

        return np_img, text_box_pnts, word_color

    def draw_text_seamless(self, font, bg, word, word_color, word_height, word_width, offset):
        # For better seamlessClone
        seamless_offset = 6

        # Draw text on a white image, than draw it on background
        white_bg = np.ones((word_height + seamless_offset, word_width + seamless_offset)) * 255
        text_img = Image.fromarray(np.uint8(white_bg))
        draw = ImageDraw.Draw(text_img)

        # draw.text((0 + seamless_offset // 2, 0 - offset[1] + seamless_offset // 2), word,
        #           fill=word_color, font=font)

        self.draw_text_wrapper(draw, word,
                               0 + seamless_offset // 2,
                               0 - offset[1] + seamless_offset // 2,
                               font, word_color)

        # assume whole text_img as mask
        text_img = np.array(text_img).astype(np.uint8)
        text_mask = 255 * np.ones(text_img.shape, text_img.dtype)

        # This is where the CENTER of the airplane will be placed
        center = (bg.shape[1] // 2, bg.shape[0] // 2)

        # opencv seamlessClone require bgr image
        text_img_bgr = np.ones((text_img.shape[0], text_img.shape[1], 3), np.uint8)
        bg_bgr = np.ones((bg.shape[0], bg.shape[1], 3), np.uint8)
        cv2.cvtColor(text_img, cv2.COLOR_GRAY2BGR, text_img_bgr)
        cv2.cvtColor(bg, cv2.COLOR_GRAY2BGR, bg_bgr)

        flag = np.random.choice([
            cv2.NORMAL_CLONE,
            cv2.MIXED_CLONE,
            cv2.MONOCHROME_TRANSFER
        ])

        mixed_clone = cv2.seamlessClone(text_img_bgr, bg_bgr, text_mask, center, flag)

        np_img = cv2.cvtColor(mixed_clone, cv2.COLOR_BGR2GRAY)

        return np_img

    def draw_text_with_random_space(self, draw, font, word, word_color, bg_width, bg_height):
        """ If random_space applied, text_x, text_y, word_width, word_height may change"""
        width = 0
        height = 0
        chars_size = []
        y_offset = 10 ** 5
        for c in word:
            size = font.getsize(c)
            chars_size.append(size)

            width += size[0]
            # set max char height as word height
            if size[1] > height:
                height = size[1]

            # Min chars y offset as word y offset
            # Assume only y offset
            c_offset = font.getoffset(c)
            if c_offset[1] < y_offset:
                y_offset = c_offset[1]

        char_space_width = int(height * np.random.uniform(self.cfg.random_space.min, self.cfg.random_space.max))

        width += (char_space_width * (len(word) - 1))

        text_x = int((bg_width - width) / 2)
        text_y = int((bg_height - height) / 2)

        c_x = text_x
        c_y = text_y

        for i, c in enumerate(word):
            # self.draw_text_wrapper(draw, c, c_x, c_y - y_offset, font, word_color, force_text_border)
            draw.text((c_x, c_y - y_offset), c, fill=word_color, font=font)

            c_x += (chars_size[i][0] + char_space_width)

        return text_x, text_y, width, height

    def draw_text_wrapper(self, draw, text, x, y, font, text_color):
        """
        :param x/y: 应该是移除了 offset 的
        """
        if apply(self.cfg.text_border):
            self.draw_border_text(draw, text, x, y, font, text_color)
        else:
            draw.text((x, y), text, fill=text_color, font=font)

    def draw_border_text(self, draw, text, x, y, font, text_color):
        """
        :param x/y: 应该是移除了 offset 的
        """
        # thickness larger than 1 may give bad border result
        thickness = 1

        choices = []
        p = []
        if self.cfg.text_border.light.enable:
            choices.append(0)
            p.append(self.cfg.text_border.light.fraction)
        if self.cfg.text_border.dark.enable:
            choices.append(1)
            p.append(self.cfg.text_border.dark.fraction)

        light_or_dark = np.random.choice(choices, p=p)

        if light_or_dark == 0:
            border_color = text_color + np.random.randint(0, 255 - text_color - 1)
        elif light_or_dark == 1:
            border_color = text_color - np.random.randint(0, text_color + 1)

        # thin border
        draw.text((x - thickness, y), text, font=font, fill=border_color)
        draw.text((x + thickness, y), text, font=font, fill=border_color)
        draw.text((x, y - thickness), text, font=font, fill=border_color)
        draw.text((x, y + thickness), text, font=font, fill=border_color)

        # thicker border
        draw.text((x - thickness, y - thickness), text, font=font, fill=border_color)
        draw.text((x + thickness, y - thickness), text, font=font, fill=border_color)
        draw.text((x - thickness, y + thickness), text, font=font, fill=border_color)
        draw.text((x + thickness, y + thickness), text, font=font, fill=border_color)

        # now draw the text over it
        draw.text((x, y), text, font=font, fill=text_color)

    def gen_bg(self, width, height):
        if apply(self.cfg.img_bg):
            bg = self.gen_bg_from_image(int(width), int(height))
        else:
            bg = self.gen_rand_bg(int(width), int(height))
        return bg

    def gen_rand_bg(self, width, height):
        """
        Generate random background
        """
        bg_high = random.uniform(220, 255)
        bg_low = bg_high - random.uniform(1, 60)

        bg = np.random.randint(bg_low, bg_high, (height, width)).astype(np.uint8)

        bg = self.apply_gauss_blur(bg)

        return bg

    def gen_bg_from_image(self, width, height):
        """
        Resize background, let bg_width>=width, bg_height >=height, and random crop from resized background
        """
        assert width > height

        bg = random.choice(self.bgs)

        scale = max(width / bg.shape[1], height / bg.shape[0])

        out = cv2.resize(bg, None, fx=scale, fy=scale)

        x_offset, y_offset = self.random_xy_offset(height, width, out.shape[0], out.shape[1])

        out = out[y_offset:y_offset + height, x_offset:x_offset + width]

        out = self.apply_gauss_blur(out, ks=[7, 11, 13, 15, 17])

        # bg_mean = int(np.mean(out))

        # TODO: find a better way to deal with background
        # alpha = 255 / bg_mean  # 对比度
        # beta = np.random.randint(bg_mean // 4, bg_mean // 2)  # 亮度
        # out = np.uint8(np.clip((alpha * out + beta), 0, 255))

        return out

    @retry
    def pick_font(self, img_index):
        """
        :param img_index when use list corpus, this param is used
        :return:
            font: truetype
            size: word size, removed offset (width, height)
        """
        word = self.corpus.get_sample(img_index)

        if self.clip_max_chars and len(word) > self.max_chars:
            word = self.clip_chars(word)

        font_path = random.choice(self.fonts)

        if self.strict:
            unsupport_chars = self.font_unsupport_chars[font_path]
            for c in word:
                if c == ' ':
                    continue
                if c in unsupport_chars:
                    print('Retry pick_font(), \'%s\' contains chars \'%s\' not supported by font %s' % (
                        word, c, font_path))
                    raise Exception

        # Font size in point
        font_size = random.randint(self.cfg.font_size.min, self.cfg.font_size.max)
        font = ImageFont.truetype(font_path, font_size)

        return word, font, self.get_word_size(font, word)

    def clip_chars(self, word):
        for _ in range(6):
            start = random.randint(0, len(word) - self.max_chars)
            if word[start] != ' ' and word[start + self.max_chars - 1] != ' ':
                # 保证边上的不是空格字符
                return word[start: start + self.max_chars]

        raise Exception

    def get_word_size(self, font, word):
        """
        Get word size removed offset
        :param font: truetype
        :param word:
        :return:
            size: word size, removed offset (width, height)
        """
        offset = font.getoffset(word)
        size = font.getsize(word)
        size = (size[0] - offset[0], size[1] - offset[1])
        return size

    def apply_perspective_transform(self, img, text_box_pnts, max_x, max_y, max_z, gpu=False):
        """
        Apply perspective transform on image
        :param img: origin numpy image
        :param text_box_pnts: four corner points of text
        :param x: max rotate angle around X-axis
        :param y: max rotate angle around Y-axis
        :param z: max rotate angle around Z-axis
        :return:
            dst_img:
            dst_img_pnts: points of whole word image after apply perspective transform
            dst_text_pnts: points of text after apply perspective transform
        """

        x = math_utils.cliped_rand_norm(0, max_x)
        y = math_utils.cliped_rand_norm(0, max_y)
        z = math_utils.cliped_rand_norm(0, max_z)

        # print("x: %f, y: %f, z: %f" % (x, y, z))

        transformer = math_utils.PerspectiveTransform(x, y, z, scale=1.0, fovy=50)

        dst_img, M33, dst_img_pnts = transformer.transform_image(img, gpu)
        dst_text_pnts = transformer.transform_pnts(text_box_pnts, M33)

        return dst_img, dst_img_pnts, dst_text_pnts

    def apply_blur_on_output(self, img):
        if prob(0.5):
            return self.apply_gauss_blur(img, [3, 5])
        else:
            return self.apply_norm_blur(img)

    def apply_gauss_blur(self, img, ks=None):
        if ks is None:
            ks = [7, 9, 11, 13]
        ksize = random.choice(ks)

        sigmas = [0, 1, 2, 3, 4, 5, 6, 7]
        sigma = 0
        if ksize <= 3:
            sigma = random.choice(sigmas)
        img = cv2.GaussianBlur(img, (ksize, ksize), sigma)
        return img

    def apply_norm_blur(self, img, ks=None):
        # kernel == 1, the output image will be the same
        if ks is None:
            ks = [2, 3]
        kernel = random.choice(ks)
        img = cv2.blur(img, (kernel, kernel))
        return img

    def apply_prydown(self, img):
        """
        模糊图像，模拟小图片放大的效果
        """
        scale = random.uniform(1, self.cfg.prydown.max_scale)
        height = img.shape[0]
        width = img.shape[1]

        out = cv2.resize(img, (int(width / scale), int(height / scale)), interpolation=cv2.INTER_AREA)
        return cv2.resize(out, (width, height), interpolation=cv2.INTER_AREA)

    def reverse_img(self, word_img):
        offset = np.random.randint(-10, 10)
        return 255 + offset - word_img

    def create_kernals(self):
        self.emboss_kernal = np.array([
            [-2, -1, 0],
            [-1, 1, 1],
            [0, 1, 2]
        ])

        self.sharp_kernel = np.array([
            [-1, -1, -1],
            [-1, 9, -1],
            [-1, -1, -1]
        ])

    def apply_emboss(self, word_img):
        return cv2.filter2D(word_img, -1, self.emboss_kernal)

    def apply_sharp(self, word_img):
        return cv2.filter2D(word_img, -1, self.sharp_kernel)

    def apply_crop(self, text_box_pnts, crop_cfg):
        """
        Random crop text box height top or bottom, we don't need image information in this step, only change box pnts
        :param text_box_pnts: bbox of text [left-top, right-top, right-bottom, left-bottom]
        :param crop_cfg:
        :return:
            croped_text_box_pnts
        """
        height = abs(text_box_pnts[0][1] - text_box_pnts[3][1])
        scale = float(height) / float(self.out_height)

        croped_text_box_pnts = text_box_pnts

        if prob(0.5):
            top_crop = int(random.randint(crop_cfg.top.min, crop_cfg.top.max) * scale)
            self.dmsg("top crop %d" % top_crop)
            croped_text_box_pnts[0][1] += top_crop
            croped_text_box_pnts[1][1] += top_crop
        else:
            bottom_crop = int(random.randint(crop_cfg.bottom.min, crop_cfg.bottom.max) * scale)
            self.dmsg("bottom crop %d " % bottom_crop)
            croped_text_box_pnts[2][1] -= bottom_crop
            croped_text_box_pnts[3][1] -= bottom_crop

        return croped_text_box_pnts

Example #2

Show file

File: renderer.py Project: liangshuang1993/text_renderer

class Renderer(object):
    def __init__(self,
                 corpus,
                 fonts,
                 bgs,
                 cfg,
                 width=256,
                 height=32,
                 clip_max_chars=False,
                 debug=False,
                 gpu=False,
                 strict=False):
        self.corpus = corpus
        self.fonts = fonts
        self.bgs = bgs
        self.out_width = width
        self.out_height = height
        self.clip_max_chars = clip_max_chars
        self.max_chars = math.floor(width / 4) - 1
        self.debug = debug
        self.gpu = gpu
        self.strict = strict
        self.cfg = cfg

        self.timer = Timer()
        self.liner = Liner(cfg)
        self.noiser = Noiser(cfg)

        if self.strict:
            self.font_chars = get_fonts_chars(self.fonts, corpus.chars_file)

    def gen_img(self):
        word, font, word_size = self.pick_font()

        # Background's height should much larger than raw word image's height,
        # to make sure we can crop full word image after apply perspective
        bg = self.gen_bg(width=word_size[0] * 8, height=word_size[1] * 8)

        word_img, text_box_pnts, word_color = self.draw_text_on_bg(
            word, font, bg)

        if apply(self.cfg.line):
            word_img, text_box_pnts = self.liner.apply(word_img, text_box_pnts,
                                                       word_color)

        word_img, img_pnts_transformed, text_box_pnts_transformed = \
            self.apply_perspective_transform(word_img, text_box_pnts,
                                             max_x=self.cfg.perspective_transform.max_x,
                                             max_y=self.cfg.perspective_transform.max_y,
                                             max_z=self.cfg.perspective_transform.max_z,
                                             gpu=self.gpu)

        if self.debug:
            word_img = draw_box(word_img, img_pnts_transformed, (0, 255, 0))
            word_img = draw_box(word_img, text_box_pnts_transformed,
                                (0, 0, 255))
            _, crop_bbox = self.crop_img(word_img, text_box_pnts_transformed)
            word_img = draw_bbox(word_img, crop_bbox, (255, 0, 0))
        else:
            word_img, crop_bbox = self.crop_img(word_img,
                                                text_box_pnts_transformed)

        if apply(self.cfg.noise):
            word_img = np.clip(word_img, 0., 255.)
            word_img = self.noiser.apply(word_img)

        blured = False
        if apply(self.cfg.blur):
            blured = True
            word_img = self.apply_blur_on_output(word_img)

        if not blured:
            if apply(self.cfg.prydown):
                word_img = self.apply_prydown(word_img)

        word_img = np.clip(word_img, 0., 255.)

        if apply(self.cfg.reverse_color):
            word_img = self.reverse_img(word_img)

        return word_img, word

    def random_xy_offset(self, src_height, src_width, dst_height, dst_width):
        """
        Get random left-top point for putting a small rect in a large rect.
        Normally dst_height>src_height and dst_width>src_width
        """
        y_max_offset = 0
        if dst_height > src_height:
            y_max_offset = dst_height - src_height

        x_max_offset = 0
        if dst_width > src_width:
            x_max_offset = dst_width - src_width

        y_offset = 0
        if y_max_offset != 0:
            y_offset = random.randint(0, y_max_offset)

        x_offset = 0
        if x_max_offset != 0:
            x_offset = random.randint(0, x_max_offset)

        return x_offset, y_offset

    def crop_img(self, img, text_box_pnts_transformed):
        """
        :param img: image to crop
        :param text_box_pnts_transformed: text_bbox_pnts after apply_perspective_transform
        :return:
            dst: image with desired output size, height=32, width=flags.img_width
            crop_bbox: bounding box on input image
        """
        bbox = cv2.boundingRect(text_box_pnts_transformed)
        bbox_width = bbox[2]
        bbox_height = bbox[3]

        # Output shape is (self.out_width, self.out_height)
        # We randomly put bounding box of transformed text in the output shape
        # so the max value of dst_height is out_height

        # TODO: prevent text too small
        dst_height = random.randint(self.out_height // 4 * 3, self.out_height)

        scale = max(bbox_height / dst_height, bbox_width / self.out_width)

        s_bbox_width = math.ceil(bbox_width / scale)
        s_bbox_height = math.ceil(bbox_height / scale)

        s_bbox = (np.around(bbox[0] / scale), np.around(bbox[1] / scale),
                  np.around(bbox[2] / scale), np.around(bbox[3] / scale))

        x_offset, y_offset = self.random_xy_offset(s_bbox_height, s_bbox_width,
                                                   self.out_height,
                                                   self.out_width)

        def int_around(val):
            return int(np.around(val))

        dst_bbox = (int_around((s_bbox[0] - x_offset) * scale),
                    int_around((s_bbox[1] - y_offset) * scale),
                    int_around(self.out_width * scale),
                    int_around(self.out_height * scale))

        # It's important do crop first and than do resize for speed consider
        dst = img[dst_bbox[1]:dst_bbox[1] + dst_bbox[3],
                  dst_bbox[0]:dst_bbox[0] + dst_bbox[2]]

        dst = cv2.resize(dst, (self.out_width, self.out_height),
                         interpolation=cv2.INTER_CUBIC)

        return dst, dst_bbox

    def get_word_color(self, bg, text_x, text_y, word_height, word_width):
        """
        Only use word roi area to get word color
        """
        offset = 10
        ymin = text_y - offset
        ymax = text_y + word_height + offset
        xmin = text_x - offset
        xmax = text_x + word_width + offset

        word_roi_bg = bg[ymin:ymax, xmin:xmax]

        bg_mean = int(np.mean(word_roi_bg) * (2 / 3))
        word_color = random.randint(0, bg_mean)
        return word_color

    def draw_text_on_bg(self, word, font, bg):
        """
        Draw word in the center of background
        :param word: word to draw
        :param font: font to draw word
        :param bg: background numpy image
        :return:
            np_img: word image
            text_box_pnts: left-top, right-top, right-bottom, left-bottom
        """
        bg_height = bg.shape[0]
        bg_width = bg.shape[1]

        word_size = self.get_word_size(font, word)
        word_height = word_size[1]
        word_width = word_size[0]

        offset = font.getoffset(word)

        pil_img = Image.fromarray(np.uint8(bg))
        draw = ImageDraw.Draw(pil_img)

        # Draw text in the center of bg
        text_x = int((bg_width - word_width) / 2)
        text_y = int((bg_height - word_height) / 2)

        word_color = self.get_word_color(bg, text_x, text_y, word_height,
                                         word_width)

        if apply(self.cfg.random_space):
            text_x, text_y, word_width, word_height = self.draw_text_with_random_space(
                draw, font, word, word_color, bg_width, bg_height)
        else:
            draw.text((text_x - offset[0], text_y - offset[1]),
                      word,
                      fill=word_color,
                      font=font)

        np_img = np.array(pil_img).astype(np.float32)

        text_box_pnts = [[text_x, text_y], [text_x + word_width, text_y],
                         [text_x + word_width, text_y + word_height],
                         [text_x, text_y + word_height]]

        return np_img, text_box_pnts, word_color

    def draw_text_with_random_space(self, draw, font, word, word_color,
                                    bg_width, bg_height):
        """ If random_space applied, text_x, text_y, word_width, word_height may change"""
        width = 0
        height = 0
        chars_size = []
        y_offset = 10**5
        for c in word:
            size = font.getsize(c)
            chars_size.append(size)

            width += size[0]
            # set max char height as word height
            if size[1] > height:
                height = size[1]

            # Min chars y offset as word y offset
            # Assume only y offset
            c_offset = font.getoffset(c)
            if c_offset[1] < y_offset:
                y_offset = c_offset[1]

        char_space_width = int(height * np.random.uniform(
            self.cfg.random_space.min, self.cfg.random_space.max))

        width += (char_space_width * (len(word) - 1))

        text_x = int((bg_width - width) / 2)
        text_y = int((bg_height - height) / 2)

        c_x = text_x
        c_y = text_y
        for i, c in enumerate(word):
            draw.text((c_x, c_y - y_offset), c, fill=word_color, font=font)

            c_x += (chars_size[i][0] + char_space_width)

        return text_x, text_y, width, height

    def gen_bg(self, width, height):
        if apply(self.cfg.img_bg):
            bg = self.gen_bg_from_image(int(width), int(height))
        else:
            bg = self.gen_rand_bg(int(width), int(height))
        return bg

    def gen_rand_bg(self, width, height):
        """
        Generate random background
        """
        bg_high = random.uniform(220, 255)
        bg_low = bg_high - random.uniform(1, 60)

        bg = np.random.randint(bg_low, bg_high,
                               (height, width)).astype(np.uint8)

        bg = self.apply_gauss_blur(bg)

        return bg

    def gen_bg_from_image(self, width, height):
        """
        Resize background, let bg_width>=width, bg_height >=height, and random crop from resized background
        """
        assert width > height

        bg = random.choice(self.bgs)

        scale = max(width / bg.shape[1], height / bg.shape[0])

        out = cv2.resize(bg, None, fx=scale, fy=scale)

        x_offset, y_offset = self.random_xy_offset(height, width, out.shape[0],
                                                   out.shape[1])

        out = out[y_offset:y_offset + height, x_offset:x_offset + width]

        out = self.apply_gauss_blur(out, ks=[7, 11, 13, 15, 17])

        bg_mean = int(np.mean(out))

        alpha = 255 / bg_mean  # 对比度
        beta = np.random.randint(bg_mean // 2, bg_mean)  # 亮度
        out = np.uint8(np.clip((alpha * out + beta), 0, 255))

        return out

    @retry
    def pick_font(self):
        """
        :return:
            font: truetype
            size: word size, removed offset (width, height)
        """
        word = self.corpus.get_sample()

        if self.clip_max_chars and len(word) > self.max_chars:
            word = word[:self.max_chars]

        font_path = random.choice(self.fonts)

        if self.strict:
            supported_chars = self.font_chars[font_path]
            for c in word:
                if c == ' ':
                    continue
                if c not in supported_chars:
                    print(
                        'Retry pick_font(), \'%s\' contains chars \'%s\' not supported by font %s'
                        % (word, c, font_path))
                    raise Exception

        # Font size in point
        font_size = random.randint(self.cfg.font_size.min,
                                   self.cfg.font_size.max)
        font = ImageFont.truetype(font_path, font_size)

        return word, font, self.get_word_size(font, word)

    def get_word_size(self, font, word):
        """
        Get word size removed offset
        :param font: truetype
        :param word:
        :return:
            size: word size, removed offset (width, height)
        """
        offset = font.getoffset(word)
        size = font.getsize(word)
        size = (size[0] - offset[0], size[1] - offset[1])
        return size

    def apply_perspective_transform(self,
                                    img,
                                    text_box_pnts,
                                    max_x,
                                    max_y,
                                    max_z,
                                    gpu=False):
        """
        Apply perspective transform on image
        :param img: origin numpy image
        :param text_box_pnts: four corner points of text
        :param x: max rotate angle around X-axis
        :param y: max rotate angle around Y-axis
        :param z: max rotate angle around Z-axis
        :return:
            dst_img:
            dst_img_pnts: points of whole word image after apply perspective transform
            dst_text_pnts: points of text after apply perspective transform
        """

        x = math_utils.cliped_rand_norm(0, max_x)
        y = math_utils.cliped_rand_norm(0, max_y)
        z = math_utils.cliped_rand_norm(0, max_z)

        # print("x: %f, y: %f, z: %f" % (x, y, z))

        transformer = math_utils.PerspectiveTransform(x,
                                                      y,
                                                      z,
                                                      scale=1.0,
                                                      fovy=50)

        dst_img, M33, dst_img_pnts = transformer.transform_image(img, gpu)
        dst_text_pnts = transformer.transform_pnts(text_box_pnts, M33)

        return dst_img, dst_img_pnts, dst_text_pnts

    def apply_blur_on_output(self, img):
        if prob(0.5):
            return self.apply_gauss_blur(img, [3, 5])
        else:
            return self.apply_norm_blur(img)

    def apply_gauss_blur(self, img, ks=None):
        if ks is None:
            ks = [7, 9, 11, 13]
        ksize = random.choice(ks)

        sigmas = [0, 1, 2, 3, 4, 5, 6, 7]
        sigma = 0
        if ksize <= 3:
            sigma = random.choice(sigmas)
        img = cv2.GaussianBlur(img, (ksize, ksize), sigma)
        return img

    def apply_norm_blur(self, img, ks=None):
        # kernel == 1, the output image will be the same
        if ks is None:
            ks = [2, 3]
        kernel = random.choice(ks)
        img = cv2.blur(img, (kernel, kernel))
        return img

    def apply_prydown(self, img):
        """
        模糊图像，模拟小图片放大的效果
        """
        scale = random.uniform(1, self.cfg.prydown.max_scale)
        height = img.shape[0]
        width = img.shape[1]

        out = cv2.resize(img, (int(width / scale), int(height / scale)),
                         interpolation=cv2.INTER_AREA)
        return cv2.resize(out, (width, height), interpolation=cv2.INTER_AREA)

    def reverse_img(self, word_img):
        offset = np.random.randint(-10, 10)
        return 255 + offset - word_img

Example #3

Show file

class Renderer(object):
    def __init__(self,
                 corpus,
                 fonts,
                 bgs,
                 cfg,
                 width=256,
                 height=32,
                 debug=False,
                 gpu=False,
                 strict=False):
        self.corpus = corpus
        self.fonts = fonts
        self.bgs = bgs
        self.out_width = width
        self.out_height = height
        self.debug = debug
        self.gpu = gpu
        self.strict = strict
        self.cfg = cfg

        self.timer = Timer()
        self.liner = Liner(cfg)
        self.noiser = Noiser(cfg)

        if self.strict:
            self.font_chars = get_fonts_chars(self.fonts, corpus.chars_file)

    def gen_img(self):
        word, font, word_size = self.pick_font()

        # Background's height should much larger than raw word image's height,
        # to make sure we can crop full word image after apply perspective
        bg = self.gen_bg(width=word_size[0] * 8, height=word_size[1] * 8)

        word_img, char_pnts, text_box_pnts, word_color = self.draw_text_on_bg_char_pts(
            word, font, bg)

        if self.cfg.line.enable and prob(self.cfg.line.fraction):
            word_img, text_box_pnts = self.liner.apply(word_img, text_box_pnts,
                                                       word_color)

        word_img, img_pnts_transformed, char_pnts_transformed = \
            self.apply_perspective_transform(word_img, char_pnts,
                                             max_x=self.cfg.perspective_transform.max_x,
                                             max_y=self.cfg.perspective_transform.max_y,
                                             max_z=self.cfg.perspective_transform.max_z,
                                             gpu=self.gpu)
        if self.debug:
            word_img = draw_box(word_img, img_pnts_transformed, (0, 255, 0))
            word_img = draw_box(word_img, char_pnts_transformed, (0, 0, 255))
            # _, crop_bbox = self.crop_img(word_img, char_pnts_transformed)
            # word_img = draw_bbox(word_img, crop_bbox, (255, 0, 0))
        else:
            word_img, char_pnts_transformed = self.crop_img(
                word_img, char_pnts_transformed)

        if self.cfg.noise.enable and prob(self.cfg.noise.fraction):
            word_img = np.clip(word_img, 0., 255.)
            word_img = self.noiser.apply(word_img)

        blured = False
        if self.cfg.blur.enable and prob(self.cfg.blur.fraction):
            blured = True
            word_img = self.apply_blur_on_output(word_img)

        if not blured:
            if self.cfg.prydown.enable and prob(self.cfg.prydown.fraction):
                word_img = self.apply_prydown(word_img)

        word_img = np.clip(word_img, 0., 255.)

        return word_img, word, char_pnts_transformed

    def random_xy_offset(self, src_height, src_width, dst_height, dst_width):
        y_max_offset = 0
        if dst_height > src_height:
            y_max_offset = dst_height - src_height

        x_max_offset = 0
        if dst_width > src_width:
            x_max_offset = dst_width - src_width

        y_offset = 0
        if y_max_offset != 0:
            y_offset = random.randint(0, y_max_offset)

        x_offset = 0
        if x_max_offset != 0:
            x_offset = random.randint(0, x_max_offset)

        return x_offset, y_offset

    def crop_img(self, img, text_box_pnts_transformed):
        """
        :param img: image to crop
        :param text_box_pnts_transformed: text_bbox_pnts after apply_perspective_transform
        :return:
            dst: image with desired output size, height=32, width=flags.img_width
            crop_bbox: bounding box on input image
        """
        bbox = cv2.boundingRect(text_box_pnts_transformed)
        bbox_width = bbox[2]
        bbox_height = bbox[3]

        # Output shape is (self.out_width, self.out_height)
        # We randomly put bounding box of transformed text in the output shape
        # so the max value of dst_height is out_height

        # TODO: prevent text too small
        dst_height = random.randint(self.out_height // 4 * 3, self.out_height)

        scale = max(bbox_height / dst_height, bbox_width / self.out_width)

        s_bbox_width = math.ceil(bbox_width / scale)
        s_bbox_height = math.ceil(bbox_height / scale)

        s_bbox = (np.around(bbox[0] / scale), np.around(bbox[1] / scale),
                  np.around(bbox[2] / scale), np.around(bbox[3] / scale))

        x_offset, y_offset = self.random_xy_offset(self.out_height,
                                                   self.out_width,
                                                   s_bbox_height, s_bbox_width)

        def int_around(val):
            return int(np.around(val))

        dst_bbox = (int_around((s_bbox[0] - x_offset) * scale),
                    int_around((s_bbox[1] - y_offset) * scale),
                    int_around(self.out_width * scale),
                    int_around(self.out_height * scale))

        # It's important do crop first and than do resize for speed consider
        dst = img[dst_bbox[1]:dst_bbox[1] + dst_bbox[3],
                  dst_bbox[0]:dst_bbox[0] + dst_bbox[2]]
        sc_w, sc_h = self.out_width / dst.shape[
            1], self.out_height / dst.shape[0]
        dst = cv2.resize(dst, (self.out_width, self.out_height),
                         interpolation=cv2.INTER_CUBIC)

        offseted_pts = []
        for pt in text_box_pnts_transformed:
            offseted_pts.append([
                int_around(
                    ((pt[0] / scale - x_offset) * scale - dst_bbox[0]) * sc_w),
                int_around(
                    ((pt[1] / scale - y_offset) * scale - dst_bbox[1]) * sc_h)
            ])

        return dst, offseted_pts

    def draw_text_on_bg(self, word, font, bg):
        """
        Draw word in the center of background
        :param word: word to draw
        :param font: font to draw word
        :param bg: background numpy image
        :return:
            np_img: word image
            text_box_pnts: left-top, right-top, right-bottom, left-bottom
        """
        bg_height = bg.shape[0]
        bg_width = bg.shape[1]

        word_size = self.get_word_size(font, word)
        word_height = word_size[1]
        word_width = word_size[0]

        offset = font.getoffset(word)

        pil_img = Image.fromarray(np.uint8(bg))
        draw = ImageDraw.Draw(pil_img)

        # Draw text in the center of bg
        text_x = int((bg_width - word_width) / 2)
        text_y = int((bg_height - word_height) / 2)

        bg_mean = int(np.mean(bg))
        word_color = random.randint(0, int(bg_mean / 3 * 2))

        draw.text((text_x - offset[0], text_y - offset[1]),
                  word,
                  fill=word_color,
                  font=font)

        np_img = np.array(pil_img).astype(np.float32)

        text_box_pnts = [[text_x, text_y], [text_x + word_width, text_y],
                         [text_x + word_width, text_y + word_height],
                         [text_x, text_y + word_height]]

        return np_img, text_box_pnts, word_color

    def draw_text_on_bg_char_pts(self, word, font, bg):
        """
        Draw word in the center of background
        :param word: word to draw
        :param font: font to draw word
        :param bg: background numpy image
        :return:
            np_img: word image
            text_box_pnts: left-top, right-top, right-bottom, left-bottom
        """
        bg_height = bg.shape[0]
        bg_width = bg.shape[1]

        word_size = self.get_word_size(font, word)
        word_height = word_size[1]
        word_width = word_size[0]

        offset = font.getoffset(word)

        pil_img = Image.fromarray(np.uint8(bg))
        draw = ImageDraw.Draw(pil_img)

        # Draw text in the center of bg
        text_x = int((bg_width - word_width) / 2)
        text_y = int((bg_height - word_height) / 2)

        bg_mean = int(np.mean(bg))
        word_color = random.randint(0, int(bg_mean / 3 * 2))

        draw.text((text_x - offset[0], text_y - offset[1]),
                  word,
                  fill=word_color,
                  font=font)

        np_img = np.array(pil_img).astype(np.float32)

        chars_box_pnts = []
        x, y = text_x, text_y

        maxh = 0
        for char in word:
            char_offset = font.getoffset(char)
            char_size = font.getsize(char)
            char_size = (char_size[0] - char_offset[0],
                         char_size[1] - char_offset[1])
            maxh = max(char_size[1], maxh)

        for char in word:
            char_offset = font.getoffset(char)
            char_size = font.getsize(char)
            char_size = (char_size[0] - char_offset[0],
                         char_size[1] - char_offset[1])

            h, w = char_size[1], char_size[0]

            chars_box_pnts.append([x, y])
            chars_box_pnts.append([x + w, y])
            chars_box_pnts.append([x + w, y + maxh])
            chars_box_pnts.append([x, y + maxh])
            x += w

        text_box_pnts = [[text_x, text_y], [text_x + word_width, text_y],
                         [text_x + word_width, text_y + word_height],
                         [text_x, text_y + word_height]]
        return np_img, chars_box_pnts, text_box_pnts, word_color

    def gen_bg(self, width, height):
        if prob(0.5):
            bg = self.gen_rand_bg(int(width), int(height))
        else:
            bg = self.gen_bg_from_image(int(width), int(height))
        return bg

    def gen_rand_bg(self, width, height):
        """
        Generate random background
        """
        bg_high = random.uniform(220, 255)
        bg_low = bg_high - random.uniform(1, 60)

        bg = np.random.randint(bg_low, bg_high,
                               (height, width)).astype(np.uint8)

        bg = self.apply_gauss_blur(bg)

        return bg

    def gen_bg_from_image(self, width, height):
        """
        Resize background, let bg_width>=width, bg_height >=height, and random crop from resized background
        """
        assert width > height

        bg = random.choice(self.bgs)

        scale = max(width / bg.shape[1], height / bg.shape[0])

        out = cv2.resize(bg, None, fx=scale, fy=scale)

        x_offset, y_offset = self.random_xy_offset(height, width, out.shape[0],
                                                   out.shape[1])

        out = out[y_offset:y_offset + height, x_offset:x_offset + width]

        return out

    @retry
    def pick_font(self):
        """
        :return:
            font: truetype
            size: word size, removed offset (width, height)
        """
        word = self.corpus.get_sample()
        font_path = random.choice(self.fonts)

        if self.strict:
            supported_chars = self.font_chars[font_path]
            for c in word:
                if c not in supported_chars:
                    print(
                        'Retry pick_font(), \'%s\' contains chars \'%s\' not supported by font %s'
                        % (word, c, font_path))
                    raise Exception

        # Font size in point
        font_size = random.randint(20, 40)
        font = ImageFont.truetype(font_path, font_size)

        return word, font, self.get_word_size(font, word)

    def get_word_size(self, font, word):
        """
        Get word size removed offset
        :param font: truetype
        :param word:
        :return:
            size: word size, removed offset (width, height)
        """
        offset = font.getoffset(word)
        size = font.getsize(word)
        size = (size[0] - offset[0], size[1] - offset[1])
        return size

    def apply_perspective_transform(self,
                                    img,
                                    text_box_pnts,
                                    max_x,
                                    max_y,
                                    max_z,
                                    gpu=False):
        """
        Apply perspective transform on image
        :param img: origin numpy image
        :param text_box_pnts: four corner points of text
        :param x: max rotate angle around X-axis
        :param y: max rotate angle around Y-axis
        :param z: max rotate angle around Z-axis
        :return:
            dst_img:
            dst_img_pnts: points of whole word image after apply perspective transform
            dst_text_pnts: points of text after apply perspective transform
        """

        x = math_utils.cliped_rand_norm(0, max_x)
        y = math_utils.cliped_rand_norm(0, max_y)
        z = math_utils.cliped_rand_norm(0, max_z)

        transformer = math_utils.PerspectiveTransform(x,
                                                      y,
                                                      z,
                                                      scale=1.0,
                                                      fovy=50)

        dst_img, M33, dst_img_pnts = transformer.transform_image(img, gpu)
        dst_text_pnts = transformer.transform_pnts(text_box_pnts, M33)

        return dst_img, dst_img_pnts, dst_text_pnts

    def apply_blur_on_output(self, img):
        if prob(0.5):
            return self.apply_gauss_blur(img, [3, 5])
        else:
            return self.apply_norm_blur(img)

    def apply_gauss_blur(self, img, ks=None):
        if ks is None:
            ks = [7, 9, 11, 13]
        ksize = random.choice(ks)

        sigmas = [0, 1, 2, 3, 4, 5, 6, 7]
        sigma = 0
        if ksize <= 3:
            sigma = random.choice(sigmas)
        img = cv2.GaussianBlur(img, (ksize, ksize), sigma)
        return img

    def apply_norm_blur(self, img, ks=None):
        # kernel == 1, the output image will be the same
        if ks is None:
            ks = [2, 3]
        kernel = random.choice(ks)
        img = cv2.blur(img, (kernel, kernel))
        return img

    def apply_prydown(self, img):
        """
        模糊图像，模拟小图片放大的效果
        """
        scale = random.uniform(1, 1.5)
        height = img.shape[0]
        width = img.shape[1]

        out = cv2.resize(img, (int(width / scale), int(height / scale)),
                         interpolation=cv2.INTER_AREA)
        return cv2.resize(out, (width, height), interpolation=cv2.INTER_AREA)

Example #4

Show file

class Renderer(object):
    def __init__(self,
                 corpus,
                 fonts,
                 bgs,
                 cfg,
                 width=256,
                 height=32,
                 clip_max_chars=False,
                 debug=False,
                 gpu=False,
                 strict=False,
                 fonts_by_image=False):
        self.corpus = corpus
        self.fonts = fonts
        self.bgs = bgs
        self.out_width = width
        self.out_height = height
        self.clip_max_chars = clip_max_chars
        self.max_chars = math.floor(width / 4) - 1
        self.debug = debug
        self.gpu = gpu
        self.strict = strict
        self.cfg = cfg
        self.fonts_by_image = fonts_by_image

        self.timer = Timer()
        self.liner = Liner(cfg)
        self.noiser = Noiser(cfg)
        self.texture = Texture(cfg)
        self.remaper = Remaper(cfg)
        self.start = time.time()

        self.create_kernals()

        if self.strict:
            self.font_unsupport_chars = font_utils.get_unsupported_chars(
                self.fonts, corpus.chars_file, self.fonts_by_image)

    def gen_img(self, img_index):
        word, font, word_size, font2 = self.pick_font(img_index)
        self.dmsg("after pick font")

        # Background's height should much larger than raw word image's height,
        # to make sure we can crop full word image after apply perspective
        bg = self.gen_bg(width=word_size[0] * 8, height=word_size[1] * 8)
        word_img, text_box_pnts, word_color = self.draw_text_on_bg(
            word, font, bg, font2)
        self.dmsg("After draw_text_on_bg")

        if apply(self.cfg.crop):
            text_box_pnts = self.apply_crop(text_box_pnts, self.cfg.crop)

        if apply(self.cfg.line):
            word_img, text_box_pnts = self.liner.apply(word_img, text_box_pnts)
            self.dmsg("After draw line")

        if self.debug:
            word_img = draw_box(word_img, text_box_pnts, (0, 255, 155))

        if apply(self.cfg.curve):
            word_img, text_box_pnts = self.remaper.apply(
                word_img, text_box_pnts, word_color)
            self.dmsg("After remapping")

        if self.debug:
            word_img = draw_box(word_img, text_box_pnts, (155, 255, 0))

        word_img = self.mix_seamless_bg(word_img, bg)
        if apply(self.cfg.extra_words):
            word_img = self.draw_extra_random_word(word_img, text_box_pnts,
                                                   img_index)
            self.dmsg("After add extra words")

        word_img, img_pnts_transformed, text_box_pnts_transformed = \
            self.apply_perspective_transform(word_img, text_box_pnts,
                                             max_x=self.cfg.perspective_transform.max_x,
                                             max_y=self.cfg.perspective_transform.max_y,
                                             max_z=self.cfg.perspective_transform.max_z,
                                             gpu=self.gpu)

        self.dmsg("After perspective transform")

        if self.debug:
            _, crop_bbox = self.crop_img(word_img, text_box_pnts_transformed)
            word_img = draw_bbox(word_img, crop_bbox, (255, 0, 0))
        else:
            word_img, crop_bbox = self.crop_img(word_img,
                                                text_box_pnts_transformed)

        self.dmsg("After crop_img")

        if apply(self.cfg.noise):
            word_img = np.clip(word_img, 0., 255.)
            word_img = self.noiser.apply(word_img)
            self.dmsg("After noiser")

        if apply(self.cfg.blur):
            word_img = self.apply_blur_on_output(word_img)
            self.dmsg("After blur")

        word_img = np.clip(word_img, 0., 255.)

        if apply(self.cfg.reverse_color):
            word_img = self.reverse_img(word_img)
            self.dmsg("After reverse_img")

        if apply(self.cfg.emboss):
            word_img = self.apply_emboss(word_img)
            self.dmsg("After emboss")

        if apply(self.cfg.sharp):
            word_img = self.apply_sharp(word_img)
            self.dmsg("After sharp")

        return word_img, word

    def dmsg(self, msg):
        if self.debug:
            print(msg)

    def random_xy_offset(self, src_height, src_width, dst_height, dst_width):
        """
        Get random left-top point for putting a small rect in a large rect.
        Normally dst_height>src_height and dst_width>src_width
        """
        y_max_offset = 0
        if dst_height > src_height:
            y_max_offset = dst_height - src_height

        x_max_offset = 0
        if dst_width > src_width:
            x_max_offset = dst_width - src_width

        y_offset = 0
        if y_max_offset != 0:
            y_offset = random.randint(0, y_max_offset)

        x_offset = 0
        if x_max_offset != 0:
            x_offset = random.randint(0, x_max_offset)

        return x_offset, y_offset

    def crop_img(self, img, text_box_pnts_transformed):
        """
        Crop text from large input image
        :param img: image to crop
        :param text_box_pnts_transformed: text_bbox_pnts after apply_perspective_transform
        :return:
            dst: image with desired output size, height=32, width=flags.img_width
            crop_bbox: bounding box on input image
        """
        bbox = cv2.boundingRect(text_box_pnts_transformed)
        bbox_width = bbox[2]
        bbox_height = bbox[3]

        # Output shape is (self.out_width, self.out_height)
        # We randomly put bounding box of transformed text in the output shape
        # so the max value of dst_height is out_height

        # TODO: If rotate angle(z) of text is too big, text will become very small,
        # we should do something to prevent text too small

        # dst_height and dst_width is used to leave some padding around text bbox
        dst_height = random.randint(self.out_height // 4 * 3, self.out_height)

        if self.out_width == 0:
            scale = bbox_height / dst_height
        else:
            dst_width = self.out_width
            scale = max(bbox_height / dst_height, bbox_width / self.out_width)

        s_bbox_width = math.ceil(bbox_width / scale)
        s_bbox_height = math.ceil(bbox_height / scale)

        if self.out_width == 0:
            padding = random.randint(s_bbox_width // 10, s_bbox_width // 8)
            dst_width = s_bbox_width + padding * 2

        s_bbox = (np.around(bbox[0] / scale), np.around(bbox[1] / scale),
                  np.around(bbox[2] / scale), np.around(bbox[3] / scale))

        x_offset, y_offset = self.random_xy_offset(s_bbox_height, s_bbox_width,
                                                   self.out_height, dst_width)

        dst_bbox = (self.int_around((s_bbox[0] - x_offset) * scale),
                    self.int_around((s_bbox[1] - y_offset) * scale),
                    self.int_around(dst_width * scale),
                    self.int_around(self.out_height * scale))

        # It's important do crop first and than do resize for speed consider
        dst = img[dst_bbox[1]:dst_bbox[1] + dst_bbox[3],
                  dst_bbox[0]:dst_bbox[0] + dst_bbox[2]]

        dst = cv2.resize(dst, (dst_width, self.out_height),
                         interpolation=cv2.INTER_CUBIC)

        return dst, dst_bbox

    def int_around(self, val):
        return int(np.around(val))

    def get_word_color(self):
        colors = [i for i in self.cfg.font_color]
        p = [self.cfg.font_color[i].fraction for i in self.cfg.font_color]
        # pick color by fraction
        color_name = np.random.choice(colors, p=p)
        l_boundary = [
            int(i)
            for i in self.cfg.font_color[color_name].l_boundary.split(',')
        ]
        h_boundary = [
            int(i)
            for i in self.cfg.font_color[color_name].h_boundary.split(',')
        ]
        # random color by low and high RGB boundary
        if color_name == 'black' or color_name == 'gray':
            r = g = b = np.random.randint(l_boundary[0], h_boundary[0])
        else:
            r = np.random.randint(l_boundary[0], h_boundary[0])
            g = np.random.randint(l_boundary[1], h_boundary[1])
            b = np.random.randint(l_boundary[2], h_boundary[2])
        return (b, g, r)

    def draw_text_on_bg(self, word, font, bg, font2):
        """
        Draw word in the center of background
        :param word: word to draw
        :param font: font to draw word
        :param bg: background numpy image
        :return:
            np_img: word image
            text_box_pnts: left-top, right-top, right-bottom, left-bottom
        """
        bg_height = bg.shape[0]
        bg_width = bg.shape[1]

        word_size = self.get_word_size(font, word)
        word_height = word_size[1]
        word_width = word_size[0]

        offset = (0, 0) if self.fonts_by_image else font.getoffset(word)
        pure_bg = np.ones((bg_height, bg_width, 3)) * 255

        pil_img = Image.fromarray(np.uint8(pure_bg))
        draw = ImageDraw.Draw(pil_img)

        # Draw text in the center of bg
        text_x = int((bg_width - word_width) / 2)
        text_y = int((bg_height - word_height) / 2)

        word_color = self.get_word_color()

        if apply(self.cfg.random_space):
            text_x, text_y, word_width, word_height = self.draw_text_with_random_space(
                draw, font, word, word_color, bg_width, bg_height, pure_bg)
        else:
            if apply(self.cfg.texture) and not self.fonts_by_image:
                pure_bg = Image.new('RGBA', (bg_width, bg_height),
                                    (255, 255, 255, 255))
                pil_img = Image.new('RGBA', (bg_width, bg_height),
                                    (255, 255, 255, 0))
                draw = ImageDraw.Draw(pil_img)
                _ = self.draw_text_wrapper(draw, word, text_x - offset[0],
                                           text_y - offset[1], font,
                                           word_color, font2, pil_img)
                pil_img = self.texture.apply_cloud_texture(pure_bg, pil_img)
            else:
                if self.fonts_by_image:
                    pil_img = self.draw_text_wrapper(draw, word,
                                                     text_x - offset[0],
                                                     text_y - offset[1], font,
                                                     word_color, font2,
                                                     pure_bg)
                else:
                    _ = self.draw_text_wrapper(draw, word, text_x - offset[0],
                                               text_y - offset[1], font,
                                               word_color, font2, pure_bg)
            # draw.text((text_x - offset[0], text_y - offset[1]), word, fill=word_color, font=font)

        np_img = np.array(pil_img).astype(np.float32)

        text_box_pnts = [[text_x, text_y], [text_x + word_width, text_y],
                         [text_x + word_width, text_y + word_height],
                         [text_x, text_y + word_height]]

        return np_img, text_box_pnts, word_color

    def mix_seamless_bg(self, text_img, bg, anchor=None):
        text_img = np.array(text_img).astype(np.uint8)
        text_mask = 255 * np.ones(text_img.shape, text_img.dtype)
        if self.fonts_by_image and anchor:
            h, w = text_img.shape[:2]
            center = (int(anchor[0] + w / 2), int(anchor[1] + h / 2))
            bg = bg.astype(np.uint8)
        else:
            center = (bg.shape[1] // 2, bg.shape[0] // 2)
        mixed_clone = cv2.seamlessClone(text_img, bg, text_mask, center,
                                        cv2.MIXED_CLONE)
        return mixed_clone

    def draw_extra_random_word(self, text_img, text_box_pnts, img_index):
        pil_img = Image.fromarray(np.uint8(text_img))
        draw = ImageDraw.Draw(pil_img)
        word_color = self.get_word_color()
        word, font, word_size, font2 = self.pick_font(img_index)
        word_len = np.random.randint(1, len(word))
        word_height = word_size[1]
        word_width = word_size[0]
        # calculate text x,y
        text_x = np.random.randint(text_box_pnts[0][0], text_box_pnts[1][0])
        text_y_b = text_box_pnts[2][1]
        text_y_t = 2 * text_box_pnts[0][1] - \
            text_box_pnts[2][1] - word_height * 0.5
        text_y = np.random.choice([text_y_t, text_y_b],
                                  p=[
                                      self.cfg.extra_words.top.fraction,
                                      self.cfg.extra_words.bottom.fraction
                                  ])
        _ = self.draw_text_wrapper(draw, word[:word_len], text_x, text_y, font,
                                   word_color, font2, np.uint8(text_img))
        np_img = np.array(pil_img).astype(np.float32)
        return np_img

    def draw_text_with_random_space(self, draw, font, word, word_color,
                                    bg_width, bg_height, pil_img):
        """ If random_space applied, text_x, text_y, word_width, word_height may change"""
        width = 0
        height = 0
        chars_size = []
        y_offset = 10**5
        for c in word:
            if self.fonts_by_image:
                size = font[c].shape[:2] if c in font else (font_size,
                                                            font_size)
            else:
                size = font.getsize(c)
            chars_size.append(size)

            width += size[0]
            # set max char height as word height
            if size[1] > height:
                height = size[1]

            # Min chars y offset as word y offset
            # Assume only y offset
            c_offset = (0, 0) if self.fonts_by_image else font.getoffset(c)
            if c_offset[1] < y_offset:
                y_offset = c_offset[1]

        char_space_width = int(height * np.random.uniform(
            self.cfg.random_space.min, self.cfg.random_space.max))

        width += (char_space_width * (len(word) - 1))

        text_x = int((bg_width - width) / 2)
        text_y = int((bg_height - height) / 2)

        c_x = text_x
        c_y = text_y

        for i, c in enumerate(word):
            # self.draw_text_wrapper(draw, c, c_x, c_y - y_offset, font, word_color, force_text_border)
            if self.fonts_by_image:
                pil_img = self.mix_seamless_bg(font[c], pil_img,
                                               (c_x, c_y - y_offset))
            else:
                draw.text((c_x, c_y - y_offset), c, fill=word_color, font=font)

            c_x += (chars_size[i][0] + char_space_width)

        return text_x, text_y, width, height

    def draw_text_wrapper(self, draw, text, x, y, font, text_color, font2,
                          pil_img):
        """
        :param x/y: 应该是移除了 offset 的
        """
        if apply(self.cfg.text_border):
            self.draw_border_text(draw, text, x, y, font, text_color)

        #todo: 整合draw_text_wrapper 与 draw_text_with_random_space
        elif apply(self.cfg.second_font):
            if self.cfg.second_font.font_color_change:
                text_color2 = self.get_word_color()
            else:
                text_color2 = text_color
            for i, c in enumerate(text):
                if random.random(
                ) < self.cfg.second_font.change_rate and not self.fonts_by_image:
                    y_offset = np.random.uniform(0, font2.getoffset(c)[1])
                    draw.text((x, y + y_offset),
                              c,
                              fill=text_color2,
                              font=font2)
                    x += font2.getsize(c)[0]
                else:
                    if self.fonts_by_image:
                        pil_img = self.mix_seamless_bg(font[c], pil_img,
                                                       (x, y))
                        x += font[c].shape[1]
                    else:
                        draw.text((x, y), c, fill=text_color, font=font)
                        x += font.getsize(c)[0]
        else:
            if self.fonts_by_image:
                for c in text:
                    pil_img = self.mix_seamless_bg(font[c], pil_img, (x, y))
                    x += font[c].shape[1]
            else:
                draw.text((x, y), text, fill=text_color, font=font)
        return pil_img

    def draw_border_text(self, draw, text, x, y, font, text_color):
        """
        :param x/y: 应该是移除了 offset 的
        """
        # thickness larger than 1 may give bad border result
        thickness = 1

        choices = []
        p = []
        if self.cfg.text_border.light.enable:
            choices.append(0)
            p.append(self.cfg.text_border.light.fraction)
        if self.cfg.text_border.dark.enable:
            choices.append(1)
            p.append(self.cfg.text_border.dark.fraction)

        light_or_dark = np.random.choice(choices, p=p)

        if light_or_dark == 0:
            border_color = text_color + \
                np.random.randint(0, 255 - text_color - 1)
        elif light_or_dark == 1:
            border_color = text_color - np.random.randint(0, text_color + 1)

        # thin border
        draw.text((x - thickness, y), text, font=font, fill=border_color)
        draw.text((x + thickness, y), text, font=font, fill=border_color)
        draw.text((x, y - thickness), text, font=font, fill=border_color)
        draw.text((x, y + thickness), text, font=font, fill=border_color)

        # thicker border
        draw.text((x - thickness, y - thickness),
                  text,
                  font=font,
                  fill=border_color)
        draw.text((x + thickness, y - thickness),
                  text,
                  font=font,
                  fill=border_color)
        draw.text((x - thickness, y + thickness),
                  text,
                  font=font,
                  fill=border_color)
        draw.text((x + thickness, y + thickness),
                  text,
                  font=font,
                  fill=border_color)

        # now draw the text over it
        draw.text((x, y), text, font=font, fill=text_color)

    def gen_bg(self, width, height):
        bg = self.gen_bg_from_image(int(width), int(height))
        return bg

    def gen_bg_from_image(self, width, height):
        """
        Resize background, let bg_width>=width, bg_height >=height, and random crop from resized background
        """
        assert width > height

        bg = random.choice(self.bgs)

        scale = max(width / bg.shape[1], height / bg.shape[0])

        rand_scale = scale * random.random()

        scale = scale if width > rand_scale * bg.shape[1] else rand_scale

        out = cv2.resize(bg, None, fx=scale, fy=scale)

        x_offset, y_offset = self.random_xy_offset(height, width, out.shape[0],
                                                   out.shape[1])

        out = out[y_offset:y_offset + height, x_offset:x_offset + width]

        # out = self.apply_gauss_blur(out, ks=[7, 11, 13, 15, 17]) 尝试不再模糊背景

        # TODO: find a better way to deal with background
        # alpha = 255 / bg_mean  # 对比度
        # beta = np.random.randint(bg_mean // 4, bg_mean // 2)  # 亮度
        # out = np.uint8(np.clip((alpha * out + beta), 0, 255))

        return out

    @retry
    def pick_font(self, img_index):
        """
        :param img_index when use list corpus, this param is used
        :return:
            font: truetype
            size: word size, removed offset (width, height)
        """
        word = self.corpus.get_sample(img_index)

        if self.clip_max_chars and len(word) > self.max_chars:
            word = word[:self.max_chars]

        font_path = random.choice(self.fonts)
        font_path_2 = random.choice(self.fonts)

        if self.strict:
            unsupport_chars = self.font_unsupport_chars[font_path]
            # unsupport_chars.extend(self.font_unsupport_chars[font_path_2])
            for c in word:
                if c == ' ':
                    continue
                if c in unsupport_chars:
                    print(
                        'Retry pick_font(), \'%s\' contains chars \'%s\' not supported by font %s'
                        % (word, c, font_path))
                    raise Exception

        # Font size in point
        font_size = random.randint(self.cfg.font_size.min,
                                   self.cfg.font_size.max)
        if self.fonts_by_image:
            font = self.loads_fonts_by_image(font_path, font_size)
        else:
            font = ImageFont.truetype(font_path, font_size)
        if self.cfg.second_font.font_size_change:
            font_size_2 = font_size - random.randint(1, 10)
        else:
            font_size_2 = font_size
        if self.cfg.second_font.font_change:
            if self.fonts_by_image:
                font2 = self.loads_fonts_by_image(font_path_2, font_size_2)
            else:
                font2 = ImageFont.truetype(font_path_2, font_size_2)
        else:
            if self.fonts_by_image:
                font2 = self.loads_fonts_by_image(font_path, font_size_2)
            else:
                font2 = ImageFont.truetype(font_path, font_size_2)

        return word, font, self.get_word_size(font, word, font_size), font2

    def loads_fonts_by_image(self, font_path, font_size):
        fonts = {}
        for x in os.listdir(font_path):
            img = cv2.imdecode(
                np.fromfile(font_path + "/" + x, dtype=np.uint8),
                cv2.IMREAD_COLOR)
            h, w = img.shape[:2]
            fonts[x.replace(".png",
                            "")] = cv2.resize(img,
                                              (font_size, h * font_size // w))
        return fonts

    def get_word_size(self, font, word, default=None):
        """
        Get word size removed offset
        :param font: truetype
        :param word:
        :return:
            size: word size, removed offset (width, height)
        """
        if self.fonts_by_image:
            offset = (0, 0)
            w_list, h_list = [], []
            for c in word:
                if c in font:
                    h, w = font[c].shape[:2]
                else:
                    h, w = default, default
                w_list.append(w)
                h_list.append(h)
            size = (sum(w_list), max(h_list))
        else:
            offset = font.getoffset(word)
            size = font.getsize(word)
        size = (size[0] - offset[0], size[1] - offset[1])
        return size

    def apply_perspective_transform(self,
                                    img,
                                    text_box_pnts,
                                    max_x,
                                    max_y,
                                    max_z,
                                    gpu=False):
        """
        Apply perspective transform on image
        :param img: origin numpy image
        :param text_box_pnts: four corner points of text
        :param x: max rotate angle around X-axis
        :param y: max rotate angle around Y-axis
        :param z: max rotate angle around Z-axis
        :return:
            dst_img:
            dst_img_pnts: points of whole word image after apply perspective transform
            dst_text_pnts: points of text after apply perspective transform
        """

        x = math_utils.cliped_rand_norm(0, max_x)
        y = math_utils.cliped_rand_norm(0, max_y)
        z = math_utils.cliped_rand_norm(0, max_z)

        # print("x: %f, y: %f, z: %f" % (x, y, z))

        transformer = math_utils.PerspectiveTransform(x,
                                                      y,
                                                      z,
                                                      scale=1.0,
                                                      fovy=50)

        dst_img, M33, dst_img_pnts = transformer.transform_image(img, gpu)
        dst_text_pnts = transformer.transform_pnts(text_box_pnts, M33)

        return dst_img, dst_img_pnts, dst_text_pnts

    def apply_blur_on_output(self, img):
        p = []
        funcs = []
        if self.cfg.blur.gauss.enable:
            p.append(self.cfg.blur.gauss.fraction)
            funcs.append(self.apply_gauss_blur)

        if self.cfg.blur.norm.enable:
            p.append(self.cfg.blur.norm.fraction)
            funcs.append(self.apply_norm_blur)

        if self.cfg.blur.motion.enable:
            p.append(self.cfg.blur.motion.fraction)
            funcs.append(self.apply_motion_blur)

        if self.cfg.blur.prydown.enable:
            p.append(self.cfg.blur.prydown.fraction)
            funcs.append(self.apply_prydown)

        if len(p) == 0:
            return img

        blur_func = np.random.choice(funcs, p=p)

        return blur_func(img)

    def apply_gauss_blur(self, img, ks=None):
        if ks is None:
            ks = [7, 9, 11, 13]
        ksize = random.choice(ks)

        sigmas = [0, 1, 2, 3, 4, 5, 6, 7]
        sigma = 0
        if ksize <= 3:
            sigma = random.choice(sigmas)
        img = cv2.GaussianBlur(img, (ksize, ksize), sigma)
        return img

    def apply_norm_blur(self, img, ks=None):
        # kernel == 1, the output image will be the same
        if ks is None:
            ks = [2, 3]
        kernel = random.choice(ks)
        img = cv2.blur(img, (kernel, kernel))
        return img

    def apply_motion_blur(self, img, ks=None, angle=None):
        if ks is None:
            ks = [7, 9, 11, 13]
        ksize = random.choice(ks)
        if angle is None:
            angle = random.randint(0, 360)
        kernel = np.zeros((ksize, ksize), dtype=np.float32)
        kernel[(ksize - 1) // 2, :] = np.ones(ksize, dtype=np.float32)
        kernel = cv2.warpAffine(
            kernel,
            cv2.getRotationMatrix2D((ksize / 2 - 0.5, ksize / 2 - 0.5), angle,
                                    1.0), (ksize, ksize))
        kernel = kernel * (1.0 / np.sum(kernel))
        img = cv2.filter2D(img, -1, kernel)
        return img

    def apply_prydown(self, img):
        """
        模糊图像，模拟小图片放大的效果
        """
        scale = random.uniform(
            1,
            self.cfg.blur.prydown.max_scale)  # todo: use different h/w scale
        height = img.shape[0]
        width = img.shape[1]

        out = cv2.resize(img, (int(width / scale), int(height / scale)),
                         interpolation=cv2.INTER_AREA)
        return cv2.resize(out, (width, height), interpolation=cv2.INTER_AREA)

    def reverse_img(self, word_img):
        offset = np.random.randint(-10, 10)
        return 255 + offset - word_img

    def create_kernals(self):
        self.emboss_kernal = np.array([[-2, -1, 0], [-1, 1, 1], [0, 1, 2]])

        self.sharp_kernel = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]])

    def apply_emboss(self, word_img):
        return cv2.filter2D(word_img, -1, self.emboss_kernal)

    def apply_sharp(self, word_img):
        return cv2.filter2D(word_img, -1, self.sharp_kernel)

    def apply_crop(self, text_box_pnts, crop_cfg):
        """
        Random crop text box height top or bottom, we don't need image information in this step, only change box pnts
        :param text_box_pnts: bbox of text [left-top, right-top, right-bottom, left-bottom]
        :param crop_cfg:
        :return:
            croped_text_box_pnts
        """
        height = abs(text_box_pnts[0][1] - text_box_pnts[3][1])
        scale = float(height) / float(self.out_height)

        croped_text_box_pnts = text_box_pnts

        if prob(0.5):
            top_crop = int(
                random.randint(crop_cfg.top.min, crop_cfg.top.max) * scale)
            self.dmsg("top crop %d" % top_crop)
            croped_text_box_pnts[0][1] += top_crop
            croped_text_box_pnts[1][1] += top_crop
        else:
            bottom_crop = int(
                random.randint(crop_cfg.bottom.min, crop_cfg.bottom.max) *
                scale)
            self.dmsg("bottom crop %d " % bottom_crop)
            croped_text_box_pnts[2][1] -= bottom_crop
            croped_text_box_pnts[3][1] -= bottom_crop

        return croped_text_box_pnts