예제 #1
0
def get_all_symbols(prep_dir: str) -> Set[str]:
    all_text_names = get_subfolder_names(prep_dir)
    all_symbols: Set[str] = set()
    for text_name in all_text_names:
        text_dir = get_text_dir(prep_dir, text_name, create=False)
        text_symbol_ids = load_text_symbol_converter(text_dir)
        all_symbols |= text_symbol_ids.get_all_symbols()

    return all_symbols
예제 #2
0
def get_infer_sentences(base_dir: str, prep_name: str,
                        text_name: str) -> InferSentenceList:
    prep_dir = get_prepared_dir(base_dir, prep_name, create=False)
    text_dir = get_text_dir(prep_dir, text_name, create=False)
    if not os.path.isdir(text_dir):
        print(f"The text '{text_name}' doesn't exist.")
        assert False
    result = InferSentenceList.from_sentences(
        sentences=load_text_csv(text_dir),
        accents=load_prep_accents_ids(prep_dir),
        symbols=load_text_symbol_converter(text_dir))

    return result
예제 #3
0
def _accent_template(base_dir: str, prep_name: str, text_name: str):
    prep_dir = get_prepared_dir(base_dir, prep_name, create=False)
    text_dir = get_text_dir(prep_dir, text_name, create=False)
    if not os.path.isdir(text_dir):
        print("Please add text first.")
    else:
        print("Updating accent template...")
        accented_symbol_list = infer_accents_template(
            sentences=load_text_csv(text_dir),
            text_symbols=load_text_symbol_converter(text_dir),
            accent_ids=load_prep_accents_ids(prep_dir),
        )
        _save_accents_csv(text_dir, accented_symbol_list)
예제 #4
0
def normalize_text(base_dir: str, prep_name: str, text_name: str):
    prep_dir = get_prepared_dir(base_dir, prep_name, create=False)
    text_dir = get_text_dir(prep_dir, text_name, create=False)
    if not os.path.isdir(text_dir):
        print("Please add text first.")
    else:
        print("Normalizing text...")
        symbol_ids, updated_sentences = infer_norm(
            sentences=load_text_csv(text_dir),
            text_symbols=load_text_symbol_converter(text_dir))
        print("\n" + updated_sentences.get_formatted(
            symbol_id_dict=symbol_ids,
            accent_id_dict=load_prep_accents_ids(prep_dir)))
        _save_text_csv(text_dir, updated_sentences)
        save_text_symbol_converter(text_dir, symbol_ids)
        _accent_template(base_dir, prep_name, text_name)
        _check_for_unknown_symbols(base_dir, prep_name, text_name)
예제 #5
0
def accent_apply(base_dir: str, prep_name: str, text_name: str):
    prep_dir = get_prepared_dir(base_dir, prep_name, create=False)
    text_dir = get_text_dir(prep_dir, text_name, create=False)
    if not os.path.isdir(text_dir):
        print("Please add text first.")
    else:
        print("Applying accents...")
        updated_sentences = infer_accents_apply(
            sentences=load_text_csv(text_dir),
            accented_symbols=_load_accents_csv(text_dir),
            accent_ids=load_prep_accents_ids(prep_dir),
        )
        print("\n" + updated_sentences.get_formatted(
            symbol_id_dict=load_text_symbol_converter(text_dir),
            accent_id_dict=load_prep_accents_ids(prep_dir)))
        _save_text_csv(text_dir, updated_sentences)
        _check_for_unknown_symbols(base_dir, prep_name, text_name)
예제 #6
0
def add_text(base_dir: str, prep_name: str, text_name: str, filepath: str,
             lang: Language):
    prep_dir = get_prepared_dir(base_dir, prep_name, create=False)
    if not os.path.isdir(prep_dir):
        print("Please prepare data first.")
    else:
        print("Adding text...")
        symbol_ids, data = infer_add(
            text=read_text(filepath),
            lang=lang,
        )
        print(
            "\n" +
            data.get_formatted(symbol_id_dict=symbol_ids,
                               accent_id_dict=load_prep_accents_ids(prep_dir)))
        text_dir = get_text_dir(prep_dir, text_name, create=True)
        _save_text_csv(text_dir, data)
        save_text_symbol_converter(text_dir, symbol_ids)
        _accent_template(base_dir, prep_name, text_name)
        _check_for_unknown_symbols(base_dir, prep_name, text_name)
예제 #7
0
def ipa_convert_text(base_dir: str,
                     prep_name: str,
                     text_name: str,
                     ignore_tones: bool = False,
                     ignore_arcs: bool = True):
    prep_dir = get_prepared_dir(base_dir, prep_name, create=False)
    text_dir = get_text_dir(prep_dir, text_name, create=False)
    if not os.path.isdir(text_dir):
        print("Please add text first.")
    else:
        print("Converting text to IPA...")
        symbol_ids, updated_sentences = infer_convert_ipa(
            sentences=load_text_csv(text_dir),
            text_symbols=load_text_symbol_converter(text_dir),
            ignore_tones=ignore_tones,
            ignore_arcs=ignore_arcs)
        print("\n" + updated_sentences.get_formatted(
            symbol_id_dict=symbol_ids,
            accent_id_dict=load_prep_accents_ids(prep_dir)))
        _save_text_csv(text_dir, updated_sentences)
        save_text_symbol_converter(text_dir, symbol_ids)
        _accent_template(base_dir, prep_name, text_name)
        _check_for_unknown_symbols(base_dir, prep_name, text_name)
예제 #8
0
def map_text(base_dir: str,
             prep_name: str,
             text_name: str,
             symbols_map_path: str,
             ignore_arcs: bool = True):
    prep_dir = get_prepared_dir(base_dir, prep_name, create=False)
    text_dir = get_text_dir(prep_dir, text_name, create=False)
    if not os.path.isdir(text_dir):
        print("Please add text first.")
    else:
        symbol_ids, updated_sentences = sents_map(
            sentences=load_text_csv(text_dir),
            text_symbols=load_text_symbol_converter(text_dir),
            symbols_map=SymbolsMap.load(symbols_map_path),
            ignore_arcs=ignore_arcs)

        print("\n" + updated_sentences.get_formatted(
            symbol_id_dict=symbol_ids,
            accent_id_dict=load_prep_accents_ids(prep_dir)))
        _save_text_csv(text_dir, updated_sentences)
        save_text_symbol_converter(text_dir, symbol_ids)
        _accent_template(base_dir, prep_name, text_name)
        _check_for_unknown_symbols(base_dir, prep_name, text_name)