def __init__(self, font_dir: Path, font_list_file: Path, font_size: Tuple[int, int]): assert font_size[0] < font_size[1] self.font_size_min = font_size[0] self.font_size_max = font_size[1] self.font_paths: List[str] = [] self.font_support_chars_cache: Dict[str, Set] = {} # Created in self.update_font_support_chars(), used to filter font_path self.font_support_chars_intersection_with_chars: Dict[str, Set] = {} with open(str(font_list_file), "r", encoding="utf-8") as f: lines = f.readlines() lines = [line.strip() for line in lines] if len(lines) == 0: raise PanicError(f"font list file is empty: {font_list_file}") for line in lines: font_path = font_dir / line if font_path.exists(): self.font_paths.append(str(font_path)) else: raise PanicError(f"font file not exist: {font_path}") self._load_font_support_chars()
def __init__( self, cfg: "CorpusCfg", ): super().__init__(cfg) self.cfg: CharCorpusCfg self.text = "" if len(self.cfg.text_paths) == 0: raise PanicError(f"text_paths must not contain path") for p in self.cfg.text_paths: if not os.path.exists(p): raise PanicError(f"text_path not exists: {p}") logger.info(f"load: {p}") with open(p, "r", encoding="utf-8") as f: self.text += "".join(f.readlines()) if self.cfg.filter_by_chars: self.text = Corpus.filter_by_chars(self.text, self.cfg.chars_file) self.font_manager.update_font_support_chars(self.cfg.chars_file) if self.cfg.filter_font: self.font_manager.filter_font_path( self.cfg.filter_font_min_support_chars) if len(self.text) < self.cfg.length[1]: raise PanicError("too few texts")
def __init__(self, cfg: "CorpusCfg"): super().__init__(cfg) self.cfg: EnumCorpusCfg if len(self.cfg.text_paths) == 0 and len(self.cfg.items) == 0: raise PanicError(f"text_paths or items must not be empty") if len(self.cfg.text_paths) != 0 and len(self.cfg.items) != 0: raise PanicError(f"only one of text_paths or items can be set") self.texts: List[str] = [] if len(self.cfg.text_paths) != 0: for text_path in self.cfg.text_paths: with open(str(text_path), "r", encoding="utf-8") as f: for line in f.readlines(): self.texts.append(line.strip()) elif len(self.cfg.items) != 0: self.texts = self.cfg.items if self.cfg.filter_by_chars: self.texts = Corpus.filter_by_chars(self.texts, self.cfg.chars_file) self.font_manager.update_font_support_chars(self.cfg.chars_file) if self.cfg.filter_font: self.font_manager.filter_font_path( self.cfg.filter_font_min_support_chars )
def __init__(self, cfg: "CorpusCfg"): super().__init__(cfg) self.cfg: WordCorpusCfg if len(self.cfg.text_paths) == 0: raise PanicError("text_paths must not be empty") self.words: List[str] = [] texts = [] for text_path in self.cfg.text_paths: with open(text_path, "r", encoding="utf-8") as f: text = f.read() texts.append(text.strip()) if self.cfg.filter_by_chars: texts = Corpus.filter_by_chars(texts, self.cfg.chars_file) self.font_manager.update_font_support_chars(self.cfg.chars_file) if self.cfg.filter_font: self.font_manager.filter_font_path( self.cfg.filter_font_min_support_chars) for text in texts: self.words.extend(text.split(self.cfg.separator)) logger.info(f"Load {len(self.words)} words") if len(self.words) < self.cfg.num_word[1]: raise PanicError("too few words")
def __init__(self, cfg: "CorpusCfg"): super().__init__(cfg) self.cfg: RandCorpusCfg if self.cfg.chars_file is None or not self.cfg.chars_file.exists(): raise PanicError(f"chars_file not exists: {self.cfg.chars_file}") self.chars = list(Corpus.load_chars_file(self.cfg.chars_file))
def __init__(self, cfg: "CorpusCfg"): super().__init__(cfg) self.cfg: RandCorpusCfg if self.cfg.chars_file is None or not self.cfg.chars_file.exists(): raise PanicError(f"chars_file not exists: {self.cfg.chars_file}") self.chars = list(load_chars_file(self.cfg.chars_file)) self.font_manager.update_font_support_chars(self.cfg.chars_file) if self.cfg.filter_font: self.font_manager.filter_font_path(self.cfg.filter_font_min_support_chars)
def load_chars_file(chars_file, log=False): """ Args: chars_file (Path): one char per line log (bool): Whether to print log Returns: Set: chars in file """ assumed_space = False with open(str(chars_file), "r", encoding="utf-8") as f: lines = f.readlines() _lines = [] for i, line in enumerate(lines): line_striped = line.strip() if len(line_striped) > 1: raise PanicError( f"Line {i} in {chars_file} is invalid, make sure one char one line" ) if len(line_striped) == 0 and SPACE_CHAR in line: if assumed_space is True: raise PanicError(f"Find two space in {chars_file}") if log: logger.info( f"Find space in line {i} when load {chars_file}") assumed_space = True _lines.append(SPACE_CHAR) continue _lines.append(line_striped) lines = _lines chars = set("".join(lines)) if log: logger.info(f"load {len(chars)} chars from: {chars_file}") return chars
def __init__(self, cfg: RenderCfg): self.cfg = cfg self.layout = cfg.layout if isinstance(cfg.corpus, list) and len(cfg.corpus) == 1: self.corpus = cfg.corpus[0] else: self.corpus = cfg.corpus if is_list(self.corpus) and is_list(self.cfg.corpus_effects): if len(self.corpus) != len(self.cfg.corpus_effects): raise PanicError( f"corpus length({self.corpus}) is not equal to corpus_effects length({self.cfg.corpus_effects})" ) if is_list(self.corpus) and (self.cfg.corpus_effects and not is_list(self.cfg.corpus_effects)): raise PanicError("corpus is list, corpus_effects is not list") if not is_list(self.corpus) and is_list(self.cfg.corpus_effects): raise PanicError("corpus_effects is list, corpus is not list") self.bg_manager = BgManager(cfg.bg_dir, cfg.pre_load_bg_img)
def filter_by_chars(text, chars_file): """ Filter chars not exist in chars file Args: text (Union[str, List[str]]): text to filter chars_file (Path): one char per line Returns: Union[str, List[str]]: string(s) removed chars not exist in chars file """ if chars_file is None or not chars_file.exists(): raise PanicError(f"chars_file not exists: {chars_file}") chars = load_chars_file(chars_file, log=True) logger.info("filtering text by chars...") total_count = 0 filtered_count = 0 # TODO: find a more efficient way filtered_chars = [] if isinstance(text, list): out = [] for t in text: _text = "" for c in t: if c in chars: _text += c else: filtered_count += 1 filtered_chars.append(c) total_count += 1 out.append(_text) else: out = "" for c in text: if c in chars: out += c else: filtered_count += 1 filtered_chars.append(c) total_count += 1 logger.info( f"Filter {(filtered_count/total_count)*100:.2f}%({filtered_count}) chars in input text。" f"Unique chars({len(set(filtered_chars))}): {set(filtered_chars)}" ) return out
def __init__(self, cfg: "CorpusCfg"): super().__init__(cfg) self.cfg: WordCorpusCfg if len(self.cfg.text_paths) == 0: raise PanicError("text_paths must not be empty") self.words: List[str] = [] texts = [] for text_path in self.cfg.text_paths: with open(text_path, "r", encoding="utf-8") as f: text = f.read() texts.append(text) if self.cfg.filter_by_chars: texts = Corpus.filter_by_chars(texts, self.cfg.chars_file) for text in texts: self.words.extend(text.split(self.cfg.separator)) if len(self.words) < self.cfg.num_word[1]: raise PanicError("too few words")
def __init__( self, font_dir: Path, font_list_file: Path, font_size: Tuple[int, int] ): assert font_size[0] < font_size[1] self.font_size_min = font_size[0] self.font_size_max = font_size[1] self.font_paths: List[str] = [] self.font_support_chars_cache: Dict[str, Set] = {} with open(str(font_list_file), "r", encoding="utf-8") as f: lines = f.readlines() lines = [line.strip() for line in lines] for line in lines: font_path = font_dir / line if font_path.exists(): self.font_paths.append(str(font_path)) else: raise PanicError(f"font file not exist: {font_path}")
def gen_multi_corpus(self) -> Tuple[PILImage, str]: font_texts: List[FontText] = [it.sample() for it in self.corpus] bg = self.bg_manager.get_bg() text_color = None if self.cfg.text_color_cfg is not None: text_color = self.cfg.text_color_cfg.get_color(bg) text_masks, text_bboxes = [], [] for i in range(len(font_texts)): font_text = font_texts[i] if text_color is None: _text_color = self.corpus[i].cfg.text_color_cfg.get_color(bg) else: _text_color = text_color text_mask = draw_text_on_bg( font_text, _text_color, char_spacing=self.corpus[i].cfg.char_spacing) text_bbox = BBox.from_size(text_mask.size) if self.cfg.corpus_effects is not None: effects = self.cfg.corpus_effects[i] if effects is not None: text_mask, text_bbox = effects.apply_effects( text_mask, text_bbox) text_masks.append(text_mask) text_bboxes.append(text_bbox) text_mask_bboxes, merged_text = self.layout( font_texts, [it.copy() for it in text_bboxes], [BBox.from_size(it.size) for it in text_masks], ) if len(text_mask_bboxes) != len(text_bboxes): raise PanicError( "points and text_bboxes should have same length after layout output" ) merged_bbox = BBox.from_bboxes(text_mask_bboxes) merged_text_mask = transparent_img(merged_bbox.size) for text_mask, bbox in zip(text_masks, text_mask_bboxes): merged_text_mask.paste(text_mask, bbox.left_top) if self.cfg.perspective_transform is not None: transformer = PerspectiveTransform(self.cfg.perspective_transform) # TODO: refactor this, now we must call get_transformed_size to call gen_warp_matrix _ = transformer.get_transformed_size(merged_text_mask.size) ( transformed_text_mask, transformed_text_pnts, ) = transformer.do_warp_perspective(merged_text_mask) else: transformed_text_mask = merged_text_mask if self.cfg.layout_effects is not None: transformed_text_mask, _ = self.cfg.layout_effects.apply_effects( transformed_text_mask, BBox.from_size(transformed_text_mask.size)) img = self.paste_text_mask_on_bg(bg, transformed_text_mask) return img, merged_text