コード例 #1
0
def test_create_dataset_phone():
    output = nmt.create_dataset(
        path=os.path.join(data_dir, "phonemes", "grapheme2phoneme-sample.txt"),
        num_examples=5
    )
    assert [_ for _ in output] == [
        ('<start> title set fire to that lot ! speech <end>',
         '<start> rodney burke i ve got one card here and it s on the same subject , '
         'eh , but this one says , we think the show is great and we dig the beatles '
         'the most , but we still haven t heard a word from ringo yet . <end>',
         '<start> ringo arf ! arf ! arf ! arf ! <end>',
         '<start> rodney and how about him singing ? well , what will you sing for us '
         ', ringo ? will you say a few words ? <end>',
         '<start> ringo hello , there , kiddies . i d like to sing a song for you '
         'today called matchbox . there you go <end>'),
        ('<start> t ay t ah l s eh t f ay er t uw dh ae t l aa t s p iy ch <end>',
         '<start> r aa d n iy b er k g aa t w ah n k aa r d hh iy r ah n d ih t s aa '
         'n dh ah s ey m s ah b jh eh k t eh b ah t dh ih s w ah n s eh z w iy th ih '
         'ng k dh ah sh ow ih z g r ey t ah n d w iy d ih g dh ah b iy t ah l z dh ah '
         'm ow s t b ah t w iy s t ih l aeavehnt hh er d ah w er d f r ah m r iy ng g '
         'ow y eh t <end>',
         '<start> r iy ng g ow ahrf ahrf ahrf ahrf <end>',
         '<start> r aa d n iy ah n d hh aw ah b aw t hh ih m s ih ng ih ng w eh l w '
         'ah t w ih l y uw s ih ng f ao r ah s r iy ng g ow w ih l y uw s ey ah f y '
         'uw w er d z <end>',
         '<start> r iy ng g ow hh ah l ow dh eh r k ih d iy z ih d l ay k t uw s ih '
         'ng ah s ao ng f ao r y uw t ah d ey k ao l d m ae ch b aa k s dh eh r y uw '
         'g ow <end>')]
コード例 #2
0
def test_create_dataset():
    output = nmt.create_dataset(
        path=os.path.join(data_dir, "spa-eng", "spa-sample.txt"),
        num_examples=5
    )
    assert [_ for _ in output] == [
        ('<start> go . <end>', '<start> go . <end>', '<start> go . <end>', '<start> go . <end>', '<start> hi . <end>'),
        ('<start> ve . <end>', '<start> vete . <end>', '<start> vaya . <end>', '<start> vayase . <end>',
         '<start> hola . <end>')
    ]
コード例 #3
0
def eng_fixture():
    path_to_file = os.path.join(parent_dirname, "data", "spa-eng", "spa-sample.txt")
    targ_lang, inp_lang = nmt.create_dataset(path_to_file, 10)
    return inp_lang
コード例 #4
0
    plot_attention(attention_plot, sentence.split(" "), result.split(" "))


if __name__ == "__main__":
    path_to_file = os.path.join(DATA_DIR, "beatles_lyrics_combined",
                                "grapheme2phoneme.txt")
    if not os.path.isfile(path_to_file):
        print(f"cannot find data {path_to_file}. exit")
        sys.exit(1)

    grapheme_sentence = "Baby, you can drive my car"
    phoneme_sentence = "B EY1 B IY0 Y UW1 K AE1 N D R AY1 V M AY1 K AA1 R"
    print(preprocess_sentence(grapheme_sentence))
    print(preprocess_sentence(phoneme_sentence).encode("utf-8"))

    graph, phone = create_dataset(path_to_file, NUM_EXAMPLES)
    print(graph[-1])
    print(phone[-1])

    phone_tensor, graph_tensor, phone_lang, graph_lang = load_dataset(
        path_to_file, NUM_EXAMPLES)
    max_length_graph, max_length_phone = graph_tensor.shape[
        1], phone_tensor.shape[1]
    (
        phone_tensor_train,
        phone_tensor_val,
        graph_tensor_train,
        graph_tensor_val,
    ) = train_test_split(phone_tensor, graph_tensor, test_size=0.2)

    print(
コード例 #5
0
ファイル: spa2eng_training.py プロジェクト: wilsonify/lyrics
    attention_plot = attention_plot[:len(result.split(" ")
                                         ), :len(sentence.split(" "))]
    plot_attention(attention_plot, sentence.split(" "), result.split(" "))


if __name__ == "__main__":
    path_to_file = os.path.join(DATA_DIR, "spa-eng", "spa.txt")
    if not os.path.isfile(path_to_file):
        download_data()

    en_sentence = "May I borrow this book?"
    sp_sentence = "¿Puedo tomar prestado este libro?"
    print(preprocess_sentence(en_sentence))
    print(preprocess_sentence(sp_sentence).encode("utf-8"))

    en, sp = create_dataset(path_to_file, NUM_EXAMPLES)
    print(en[-1])
    print(sp[-1])

    spa_tensor, eng_tensor, spa_lang, eng_lang = load_dataset(
        path_to_file, NUM_EXAMPLES)
    max_length_eng, max_length_spa = eng_tensor.shape[1], spa_tensor.shape[1]
    (
        spa_tensor_train,
        spa_tensor_val,
        eng_tensor_train,
        eng_tensor_val,
    ) = train_test_split(spa_tensor, eng_tensor, test_size=0.2)

    print(
        len(spa_tensor_train),