Python SymbolIdDict.get_symbol Examples

Programming Language: Python

Namespace/Package Name: text_utils

Class/Type: SymbolIdDict

Method/Function: get_symbol

Examples at hotexamples.com: 3

Python SymbolIdDict.get_symbol - 3 examples found. These are the top rated real world Python examples of text_utils.SymbolIdDict.get_symbol extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

get_symbols(14)

init_from_symbols(7)

symbols_to_text(4)

get_all_symbols(3)

get_symbol(3)

init_from_symbols_with_pad(2)

raw(2)

SymbolIdDict(1)

from_raw(1)

get_all_symbol_ids(1)

get_id(1)

has_unknown_symbols(1)

load_from_file(1)

replace_unknown_symbols_with_pad(1)

save(1)

Example #1

Show file

def filter_symbols(data: MergedDataset, symbols: SymbolIdDict,
                   accent_ids: AccentsDict, speakers: SpeakersDict,
                   allowed_symbol_ids: Set[int],
                   logger: Logger) -> MergedDatasetContainer:
    # maybe check all symbol ids are valid before
    allowed_symbols = [symbols.get_symbol(x) for x in allowed_symbol_ids]
    not_allowed_symbols = [
        symbols.get_symbol(x) for x in symbols.get_all_symbol_ids()
        if x not in allowed_symbol_ids
    ]
    logger.info(
        f"Keep utterances with these symbols: {' '.join(allowed_symbols)}")
    logger.info(
        f"Remove utterances with these symbols: {' '.join(not_allowed_symbols)}"
    )
    logger.info("Statistics before filtering:")
    log_stats(data, symbols, accent_ids, speakers, logger)
    result = MergedDataset([
        x for x in data.items() if contains_only_allowed_symbols(
            deserialize_list(x.serialized_symbol_ids), allowed_symbol_ids)
    ])
    if len(result) > 0:
        logger.info(
            f"Removed {len(data) - len(result)} from {len(data)} total entries and got {len(result)} entries ({len(result)/len(data)*100:.2f}%)."
        )
    else:
        logger.info("Removed all utterances!")
    new_symbol_ids = update_symbols(result, symbols)
    new_accent_ids = update_accents(result, accent_ids)
    new_speaker_ids = update_speakers(result, speakers)
    logger.info("Statistics after filtering:")
    log_stats(result, new_symbol_ids, new_accent_ids, new_speaker_ids, logger)

    res = MergedDatasetContainer(
        name=None,
        data=result,
        accent_ids=new_accent_ids,
        speaker_ids=new_speaker_ids,
        symbol_ids=new_symbol_ids,
    )
    return res

Example #2

Show file

def sims_to_csv(sims: Dict[int, List[Tuple[int, float]]],
                symbols: SymbolIdDict) -> pd.DataFrame:
    lines = []
    assert len(sims) == len(symbols)
    for symbol_id, similarities in sims.items():
        sims = [f"{symbols.get_symbol(symbol_id)}", "<=>"]
        for other_symbol_id, similarity in similarities:
            sims.append(symbols.get_symbol(other_symbol_id))
            sims.append(f"{similarity:.2f}")
        lines.append(sims)
    df = pd.DataFrame(lines)
    return df

Example #3

Show file

def plot_embeddings(
        symbols: SymbolIdDict, emb: torch.Tensor,
        logger: Logger) -> Tuple[pd.DataFrame, go.Figure, go.Figure]:
    assert emb.shape[0] == len(symbols)

    logger.info(f"Emb size {emb.shape}")
    logger.info(f"Sym len {len(symbols)}")

    sims = get_similarities(emb.numpy())
    df = sims_to_csv(sims, symbols)
    all_symbols_sorted = [symbols.get_symbol(x) for x in range(len(symbols))]
    emb_normed = norm2emb(emb)
    fig_2d = emb_plot_2d(emb_normed, all_symbols_sorted)
    fig_3d = emb_plot_3d(emb_normed, all_symbols_sorted)

    return df, fig_2d, fig_3d