def test_phoneme_table_get_auxiliary_label_id():
    phoneme_table = PhonemeTable()
    phoneme_table.add_label('a')
    phoneme_table.set_auxiliary_label('#0')
    phoneme_table.set_auxiliary_label('#1')
    assert phoneme_table.get_label_id('a') == 2
    assert phoneme_table.get_auxiliary_label_id('#0') == 3
    assert phoneme_table.get_auxiliary_label_id('#1') == 4
Example #2
0
def test_lexicon_create_fst_without_homophones(workdir,
                                               words_without_homophones):
    vocab = get_vocabulary_table(workdir, words_without_homophones)
    lexicon = get_lexicon(words_without_homophones)

    phoneme_table = PhonemeTable()
    phoneme_table.add_labels(phonemes)
    epsilon_id = phoneme_table.get_epsilon_id()
    a = phoneme_table.get_label_id('a')
    i = phoneme_table.get_label_id('i')
    o = phoneme_table.get_label_id('o')

    fst = lexicon.create_fst(phoneme_table, vocab, min_freq=0)
    assert (fst.num_states() == 7)
    aux0 = phoneme_table.get_auxiliary_label_id('#0')

    state = 0
    assert (fst.num_arcs(0) == 2)
    gen = fst.arcs(state)
    arc = next(gen)
    is_expected_arc(arc, a, vocab.get_label_id('愛'), 1)
    arc = gen.__next__()
    is_expected_arc(arc, a, vocab.get_label_id('青'), 4)

    state = 1
    assert (fst.num_arcs(state) == 1)
    arc = next(fst.arcs(state))
    is_expected_arc(arc, i, epsilon_id, 2)

    state = 2
    assert (fst.num_arcs(state) == 1)
    arc = next(fst.arcs(state))
    is_expected_arc(arc, aux0, epsilon_id, 3)

    state = 3
    assert (fst.num_arcs(state) == 1)
    arc = next(fst.arcs(state))
    is_expected_arc(arc, epsilon_id, epsilon_id, 0)

    state = 4
    assert (fst.num_arcs(state) == 1)
    arc = next(fst.arcs(state))
    is_expected_arc(arc, o, epsilon_id, 5)

    state = 5
    assert (fst.num_arcs(state) == 1)
    arc = next(fst.arcs(state))
    is_expected_arc(arc, aux0, epsilon_id, 6)

    state = 6
    assert (fst.num_arcs(state) == 1)
    arc = next(fst.arcs(state))
    is_expected_arc(arc, epsilon_id, epsilon_id, 0)
Example #3
0
def test_token_create_fst_with_auxiliary_labels():
    phoneme_table = PhonemeTable()
    phoneme_table.add_labels(['a', 'i'])
    epsilon_id = phoneme_table.get_epsilon_id()
    blank_id = phoneme_table.get_blank_id()
    a = phoneme_table.get_label_id('a')
    i = phoneme_table.get_label_id('i')
    phoneme_table.set_auxiliary_label('#0')
    phoneme_table.set_auxiliary_label('#1')
    aux0 = phoneme_table.get_auxiliary_label_id('#0')
    aux1 = phoneme_table.get_auxiliary_label_id('#1')

    fst = Token().create_fst(phoneme_table)
    assert (fst.num_states() == 5)
    # start state
    state = 0
    assert (fst.num_arcs(state) == 3)
    gen_arc = fst.arcs(state)
    is_expected_arc(next(gen_arc), blank_id, epsilon_id, state)
    is_expected_arc(next(gen_arc), a, a, 3)
    is_expected_arc(next(gen_arc), i, i, 4)
    # second state
    state = 1
    assert (fst.num_arcs(state) == 2)
    gen_arc = fst.arcs(state)
    is_expected_arc(next(gen_arc), blank_id, epsilon_id, state)
    is_expected_arc(next(gen_arc), epsilon_id, epsilon_id, 2)
    # final(auxiliary) state
    state = 2
    assert (fst.num_arcs(state) == 3)
    gen_arc = fst.arcs(state)
    is_expected_arc(next(gen_arc), epsilon_id, epsilon_id, 0)
    is_expected_arc(next(gen_arc), epsilon_id, aux0, state)
    is_expected_arc(next(gen_arc), epsilon_id, aux1, state)
    # a
    state = 3
    assert (fst.num_arcs(state) == 2)
    gen_arc = fst.arcs(state)
    is_expected_arc(next(gen_arc), a, epsilon_id, state)
    is_expected_arc(next(gen_arc), epsilon_id, epsilon_id, 1)
    # b
    state = 4
    assert (fst.num_arcs(state) == 2)
    gen_arc = fst.arcs(state)
    is_expected_arc(next(gen_arc), i, epsilon_id, state)
    is_expected_arc(next(gen_arc), epsilon_id, epsilon_id, 1)