def __call__(self, text: str) -> str: for t in self.cleaner_types: if t == "tacotron": text = tacotron_cleaner.cleaners.custom_english_cleaners(text) elif t == "jaconv": text = jaconv.normalize(text) elif t == "vietnamese": text = vietnamese_cleaners.vietnamese_cleaner(text) else: raise RuntimeError(f"Not supported: type={t}") return text
def __call__(self, text: str) -> str: for t in self.cleaner_types: if t == "tacotron": text = tacotron_cleaner.cleaners.custom_english_cleaners(text) elif t == "jaconv": text = jaconv.normalize(text) elif t == "vietnamese": if vietnamese_cleaners is None: raise RuntimeError("Please install underthesea") text = vietnamese_cleaners.vietnamese_cleaner(text) elif t == "korean_cleaner": text = KoreanCleaner.normalize_text(text) else: raise RuntimeError(f"Not supported: type={t}") return text
#!/usr/bin/env python3 # Copyright 2018 Nagoya University (Tomoki Hayashi) and Kính Phan (@enamoria) # Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) import argparse import codecs from vietnamese_cleaner.vietnamese_cleaners import vietnamese_cleaner if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("text", type=str, help="text to be cleaned") args = parser.parse_args() lines = {} with codecs.open(args.text, "r", "utf-8") as fid: for line in fid.readlines(): id, _, content = line.split("|") clean_content = vietnamese_cleaner(content) lines[id] = clean_content for id in sorted(lines.keys()): print(f"{id} {lines[id]}")