Exemple #1
0
    def __init__(self, font_dir: Path, font_list_file: Path,
                 font_size: Tuple[int, int]):
        assert font_size[0] < font_size[1]
        self.font_size_min = font_size[0]
        self.font_size_max = font_size[1]
        self.font_paths: List[str] = []
        self.font_support_chars_cache: Dict[str, Set] = {}
        # Created in self.update_font_support_chars(), used to filter font_path
        self.font_support_chars_intersection_with_chars: Dict[str, Set] = {}

        with open(str(font_list_file), "r", encoding="utf-8") as f:
            lines = f.readlines()
            lines = [line.strip() for line in lines]

        if len(lines) == 0:
            raise PanicError(f"font list file is empty: {font_list_file}")

        for line in lines:
            font_path = font_dir / line
            if font_path.exists():
                self.font_paths.append(str(font_path))
            else:
                raise PanicError(f"font file not exist: {font_path}")

        self._load_font_support_chars()
    def __init__(
        self,
        cfg: "CorpusCfg",
    ):
        super().__init__(cfg)

        self.cfg: CharCorpusCfg
        self.text = ""

        if len(self.cfg.text_paths) == 0:
            raise PanicError(f"text_paths must not contain path")

        for p in self.cfg.text_paths:
            if not os.path.exists(p):
                raise PanicError(f"text_path not exists: {p}")

            logger.info(f"load: {p}")
            with open(p, "r", encoding="utf-8") as f:
                self.text += "".join(f.readlines())

        if self.cfg.filter_by_chars:
            self.text = Corpus.filter_by_chars(self.text, self.cfg.chars_file)
            self.font_manager.update_font_support_chars(self.cfg.chars_file)
            if self.cfg.filter_font:
                self.font_manager.filter_font_path(
                    self.cfg.filter_font_min_support_chars)

        if len(self.text) < self.cfg.length[1]:
            raise PanicError("too few texts")
Exemple #3
0
    def __init__(self, cfg: "CorpusCfg"):
        super().__init__(cfg)

        self.cfg: EnumCorpusCfg
        if len(self.cfg.text_paths) == 0 and len(self.cfg.items) == 0:
            raise PanicError(f"text_paths or items must not be empty")

        if len(self.cfg.text_paths) != 0 and len(self.cfg.items) != 0:
            raise PanicError(f"only one of text_paths or items can be set")

        self.texts: List[str] = []

        if len(self.cfg.text_paths) != 0:
            for text_path in self.cfg.text_paths:
                with open(str(text_path), "r", encoding="utf-8") as f:
                    for line in f.readlines():
                        self.texts.append(line.strip())

        elif len(self.cfg.items) != 0:
            self.texts = self.cfg.items

        if self.cfg.filter_by_chars:
            self.texts = Corpus.filter_by_chars(self.texts, self.cfg.chars_file)
            self.font_manager.update_font_support_chars(self.cfg.chars_file)
            if self.cfg.filter_font:
                self.font_manager.filter_font_path(
                    self.cfg.filter_font_min_support_chars
                )
    def __init__(self, cfg: "CorpusCfg"):
        super().__init__(cfg)

        self.cfg: WordCorpusCfg
        if len(self.cfg.text_paths) == 0:
            raise PanicError("text_paths must not be empty")

        self.words: List[str] = []

        texts = []
        for text_path in self.cfg.text_paths:
            with open(text_path, "r", encoding="utf-8") as f:
                text = f.read()
                texts.append(text.strip())

        if self.cfg.filter_by_chars:
            texts = Corpus.filter_by_chars(texts, self.cfg.chars_file)
            self.font_manager.update_font_support_chars(self.cfg.chars_file)
            if self.cfg.filter_font:
                self.font_manager.filter_font_path(
                    self.cfg.filter_font_min_support_chars)

        for text in texts:
            self.words.extend(text.split(self.cfg.separator))

        logger.info(f"Load {len(self.words)} words")

        if len(self.words) < self.cfg.num_word[1]:
            raise PanicError("too few words")
Exemple #5
0
    def __init__(self, cfg: "CorpusCfg"):
        super().__init__(cfg)

        self.cfg: RandCorpusCfg
        if self.cfg.chars_file is None or not self.cfg.chars_file.exists():
            raise PanicError(f"chars_file not exists: {self.cfg.chars_file}")

        self.chars = list(Corpus.load_chars_file(self.cfg.chars_file))
Exemple #6
0
    def __init__(self, cfg: "CorpusCfg"):
        super().__init__(cfg)

        self.cfg: RandCorpusCfg
        if self.cfg.chars_file is None or not self.cfg.chars_file.exists():
            raise PanicError(f"chars_file not exists: {self.cfg.chars_file}")

        self.chars = list(load_chars_file(self.cfg.chars_file))

        self.font_manager.update_font_support_chars(self.cfg.chars_file)
        if self.cfg.filter_font:
            self.font_manager.filter_font_path(self.cfg.filter_font_min_support_chars)
Exemple #7
0
def load_chars_file(chars_file, log=False):
    """

    Args:
        chars_file (Path): one char per line
        log (bool): Whether to print log

    Returns:
        Set: chars in file

    """
    assumed_space = False
    with open(str(chars_file), "r", encoding="utf-8") as f:
        lines = f.readlines()
        _lines = []
        for i, line in enumerate(lines):
            line_striped = line.strip()
            if len(line_striped) > 1:
                raise PanicError(
                    f"Line {i} in {chars_file} is invalid, make sure one char one line"
                )

            if len(line_striped) == 0 and SPACE_CHAR in line:
                if assumed_space is True:
                    raise PanicError(f"Find two space in {chars_file}")

                if log:
                    logger.info(
                        f"Find space in line {i} when load {chars_file}")
                assumed_space = True
                _lines.append(SPACE_CHAR)
                continue

            _lines.append(line_striped)

        lines = _lines
        chars = set("".join(lines))
    if log:
        logger.info(f"load {len(chars)} chars from: {chars_file}")
    return chars
    def __init__(self, cfg: RenderCfg):
        self.cfg = cfg
        self.layout = cfg.layout
        if isinstance(cfg.corpus, list) and len(cfg.corpus) == 1:
            self.corpus = cfg.corpus[0]
        else:
            self.corpus = cfg.corpus

        if is_list(self.corpus) and is_list(self.cfg.corpus_effects):
            if len(self.corpus) != len(self.cfg.corpus_effects):
                raise PanicError(
                    f"corpus length({self.corpus}) is not equal to corpus_effects length({self.cfg.corpus_effects})"
                )

        if is_list(self.corpus) and (self.cfg.corpus_effects
                                     and not is_list(self.cfg.corpus_effects)):
            raise PanicError("corpus is list, corpus_effects is not list")

        if not is_list(self.corpus) and is_list(self.cfg.corpus_effects):
            raise PanicError("corpus_effects is list, corpus is not list")

        self.bg_manager = BgManager(cfg.bg_dir, cfg.pre_load_bg_img)
    def filter_by_chars(text, chars_file):
        """
        Filter chars not exist in chars file

        Args:
            text (Union[str, List[str]]): text to filter
            chars_file (Path): one char per line

        Returns:
            Union[str, List[str]]: string(s) removed chars not exist in chars file

        """
        if chars_file is None or not chars_file.exists():
            raise PanicError(f"chars_file not exists: {chars_file}")

        chars = load_chars_file(chars_file, log=True)

        logger.info("filtering text by chars...")

        total_count = 0
        filtered_count = 0

        # TODO: find a more efficient way
        filtered_chars = []
        if isinstance(text, list):
            out = []
            for t in text:
                _text = ""
                for c in t:
                    if c in chars:
                        _text += c
                    else:
                        filtered_count += 1
                        filtered_chars.append(c)
                    total_count += 1
                out.append(_text)
        else:
            out = ""
            for c in text:
                if c in chars:
                    out += c
                else:
                    filtered_count += 1
                    filtered_chars.append(c)
                total_count += 1
        logger.info(
            f"Filter {(filtered_count/total_count)*100:.2f}%({filtered_count}) chars in input text。"
            f"Unique chars({len(set(filtered_chars))}): {set(filtered_chars)}"
        )
        return out
    def __init__(self, cfg: "CorpusCfg"):
        super().__init__(cfg)

        self.cfg: WordCorpusCfg
        if len(self.cfg.text_paths) == 0:
            raise PanicError("text_paths must not be empty")

        self.words: List[str] = []

        texts = []
        for text_path in self.cfg.text_paths:
            with open(text_path, "r", encoding="utf-8") as f:
                text = f.read()
                texts.append(text)

        if self.cfg.filter_by_chars:
            texts = Corpus.filter_by_chars(texts, self.cfg.chars_file)

        for text in texts:
            self.words.extend(text.split(self.cfg.separator))

        if len(self.words) < self.cfg.num_word[1]:
            raise PanicError("too few words")
Exemple #11
0
    def __init__(
        self, font_dir: Path, font_list_file: Path, font_size: Tuple[int, int]
    ):
        assert font_size[0] < font_size[1]
        self.font_size_min = font_size[0]
        self.font_size_max = font_size[1]
        self.font_paths: List[str] = []
        self.font_support_chars_cache: Dict[str, Set] = {}

        with open(str(font_list_file), "r", encoding="utf-8") as f:
            lines = f.readlines()
            lines = [line.strip() for line in lines]

        for line in lines:
            font_path = font_dir / line
            if font_path.exists():
                self.font_paths.append(str(font_path))
            else:
                raise PanicError(f"font file not exist: {font_path}")
    def gen_multi_corpus(self) -> Tuple[PILImage, str]:
        font_texts: List[FontText] = [it.sample() for it in self.corpus]

        bg = self.bg_manager.get_bg()

        text_color = None
        if self.cfg.text_color_cfg is not None:
            text_color = self.cfg.text_color_cfg.get_color(bg)

        text_masks, text_bboxes = [], []
        for i in range(len(font_texts)):
            font_text = font_texts[i]

            if text_color is None:
                _text_color = self.corpus[i].cfg.text_color_cfg.get_color(bg)
            else:
                _text_color = text_color
            text_mask = draw_text_on_bg(
                font_text,
                _text_color,
                char_spacing=self.corpus[i].cfg.char_spacing)

            text_bbox = BBox.from_size(text_mask.size)
            if self.cfg.corpus_effects is not None:
                effects = self.cfg.corpus_effects[i]
                if effects is not None:
                    text_mask, text_bbox = effects.apply_effects(
                        text_mask, text_bbox)
            text_masks.append(text_mask)
            text_bboxes.append(text_bbox)

        text_mask_bboxes, merged_text = self.layout(
            font_texts,
            [it.copy() for it in text_bboxes],
            [BBox.from_size(it.size) for it in text_masks],
        )
        if len(text_mask_bboxes) != len(text_bboxes):
            raise PanicError(
                "points and text_bboxes should have same length after layout output"
            )

        merged_bbox = BBox.from_bboxes(text_mask_bboxes)
        merged_text_mask = transparent_img(merged_bbox.size)
        for text_mask, bbox in zip(text_masks, text_mask_bboxes):
            merged_text_mask.paste(text_mask, bbox.left_top)

        if self.cfg.perspective_transform is not None:
            transformer = PerspectiveTransform(self.cfg.perspective_transform)
            # TODO: refactor this, now we must call get_transformed_size to call gen_warp_matrix
            _ = transformer.get_transformed_size(merged_text_mask.size)

            (
                transformed_text_mask,
                transformed_text_pnts,
            ) = transformer.do_warp_perspective(merged_text_mask)
        else:
            transformed_text_mask = merged_text_mask

        if self.cfg.layout_effects is not None:
            transformed_text_mask, _ = self.cfg.layout_effects.apply_effects(
                transformed_text_mask,
                BBox.from_size(transformed_text_mask.size))

        img = self.paste_text_mask_on_bg(bg, transformed_text_mask)

        return img, merged_text