コード例 #1
0
def tone_and_syllabify_verse(verse,
                             special_tokens,
                             tone_tagger,
                             synalepha=True):
    if verse.strip() == '':
        return []
    if verse in special_tokens.values():
        return [verse]

    toned_verse = ' '.join([
        tone_tagger.tone(w) if w.strip() not in special_tokens.values() else w
        for w in verse.split()
    ])
    syllables = syllabify_verse(toned_verse,
                                special_tokens,
                                synalepha=synalepha)
    return syllables
コード例 #2
0
def get_mean_std_verse_length(toned_verses_syls, synalepha):
    lengths = []
    for verse_syls in toned_verses_syls:
        syllables = [ s.strip() for s in verse_syls if s.strip() not in special_tokens.values()]
        #syllables = [ s for s in syllables if s != '' and s != '\n']
        if syllables:
            lengths.append(len(syllables))
    lengths = np.array(lengths)
    return np.mean(lengths), np.std(lengths)
コード例 #3
0
def is_hendecasyllable(syllables, special_tokens):
    syllables = [
        prettify_text(s, special_tokens).strip() for s in syllables
        if s not in special_tokens.values()
    ]
    if len(syllables) > 9:
        return get_last_tonedsyl_index(syllables, special_tokens) == 9
    else:
        return False
コード例 #4
0
def _apply_synalepha(syllables, special_tokens):

    syllables_cleaned = [
        prettify_text(s, special_tokens).strip() for s in syllables
        if s not in special_tokens.values()
    ]

    if len(syllables_cleaned) <= 9:
        return syllables

    # SMARAGLIATA
    vowels = "ÁÀAaàáÉÈEeèéIÍÌiíìOoóòÚÙUuúùHh'"

    n_synalepha = 0

    i = 1
    while i < (len(syllables) - 1):
        if syllables[i] == special_tokens['WORD_SEP']:
            pre_syl = syllables[i - 1]
            next_syl = syllables[i + 1]
            if pre_syl[-1] in vowels and next_syl[0] in vowels:
                i += 1
                n_synalepha += 1
        i += 1

    last_tonedrv_index = get_last_tonedsyl_index(syllables, special_tokens)

    n_synalepha_needed = last_tonedrv_index - 9

    n_synalepha_to_apply = min(n_synalepha_needed, n_synalepha)

    result = [syllables[0]]
    i = 1
    n_synalepha_applied = 0
    while i < (len(syllables) - 1):
        if syllables[i] == special_tokens['WORD_SEP']:
            pre_syl = syllables[i - 1]
            next_syl = syllables[i + 1]
            if pre_syl[-1] in vowels and next_syl[0] in vowels:

                if n_synalepha_applied < n_synalepha_to_apply:
                    result.append(result[-1] + syllables[i] + next_syl)
                    del result[-2]
                    n_synalepha_applied += 1
                    i += 1
                else:
                    result.append(syllables[i])

            else:
                result.append(syllables[i])
        else:
            result.append(syllables[i])
        i += 1
    result.append(syllables[-1])

    return result
コード例 #5
0
def get_last_tonedsyl_index(syllables, special_tokens):
    # syllables only without <word_sep> token
    syllables = [
        prettify_text(s, special_tokens).strip() for s in syllables
        if s not in special_tokens.values()
    ]
    syllables_rev = syllables[::-1]
    for i, syl in enumerate(syllables_rev):
        if is_toned_syl(syl):
            return len(syllables_rev) - i - 1
コード例 #6
0
def syllabify_verse(verse, special_tokens, synalepha=True):
    if verse.strip() == '':
        return []
    if verse in special_tokens.values():
        return [verse]

    words = [w for w in verse.split()]

    list_of_syllables = [
        syllabify_word(w).split('#')
        if w.strip() not in special_tokens.values() else [w] for w in words
    ]

    ## [['nel'], ['<word_sep>'], ['mez', 'zo'], ['<word_sep>'], ['del'], ['<word_sep>'], ['cam', 'min'], ['<word_sep>'], ['di'], ['<word_sep>'], ['no', 'stra'], ['<word_sep>'], ['vi', 'ta'], ['<end_of_verso>']]

    syllables = []
    for s in list_of_syllables:
        syllables += s
    ## ['nèl', '<word_sep>', 'mèz', 'zo', '<word_sep>', 'dèl', '<word_sep>', 'cam', 'mìn', '<word_sep>', 'di', '<word_sep>', 'nò', 'stra', '<word_sep>', 'vì', 'ta', '<end_of_verso>']

    if synalepha:
        syllables = _apply_synalepha(syllables, special_tokens)

    return syllables
コード例 #7
0

if __name__ == "__main__":

    #    print(special_tokens)

    with open("divina_commedia_toned_cleaned.txt", "r") as f:
        divine_comedy = f.read()

#    divine_comedy = prettify_text(divine_comedy,special_tokens)
    divine_comedy_list = divine_comedy.split("\n")

    # removing special tokens lines
    divine_comedy_list = [
        line for line in divine_comedy_list
        if line.strip() not in special_tokens.values()
    ]

    # tone_tagger = ToneTagger()
    count = 0
    for line in divine_comedy_list[:]:
        syllables = syllabify_verse(line, special_tokens)
        if not is_hendecasyllable(syllables, special_tokens):
            count += 1
#        print(syllables)
        syllables = [
            syl for syl in syllables if syl != special_tokens['WORD_SEP']
        ]
        syllables = [
            syl for syl in syllables if syl != special_tokens['END_OF_VERSO']
        ]
コード例 #8
0
def generate_text(model_rhyme,
                  model_verse,
                  special_tokens,
                  vocab_size_rhyme,
                  vocab_size_verse,
                  syl2idx_rhyme,
                  idx2syl_rhyme,
                  syl2idx_verse,
                  idx2syl_verse,
                  seq_length_rhyme,
                  seq_length_verse,
                  start_seq_rhyme,
                  start_seq_verse,
                  temperature=1.0):
    seq_text_rhyme = start_seq_rhyme
    seq_text_verse = start_seq_verse

    generated_text_list = []

    model_rhyme.reset_states()
    model_verse.reset_states()
    end_of_canto = False
    while not end_of_canto:
        #      and generated_text_list.count(special_tokens['END_OF_VERSO']) < 10:
        #      and generated_text_list.count(special_tokens['END_OF_TERZINA']) < 45 \

        next_syl_rhyme = ''
        end_verse_list = []
        structure_list = []
        while not end_of_canto and next_syl_rhyme != special_tokens[
                'END_OF_VERSO']:

            seq_text_rhyme = seq_text_rhyme[-seq_length_rhyme:]

            sequence_rhyme = [
                syl2idx_rhyme[syl]
                for syl in seq_text_rhyme[-seq_length_rhyme:]
            ]
            sequence_rhyme = tf.keras.preprocessing.sequence.pad_sequences(
                [sequence_rhyme], maxlen=seq_length_rhyme)
            x_rhyme = np.array(sequence_rhyme, dtype='int64')

            prediction_rhyme = model_rhyme.predict(x_rhyme, verbose=0)

            prediction_rhyme = tf.squeeze(prediction_rhyme, 0)[-1]
            prediction_rhyme = prediction_rhyme / temperature
            prediction_rhyme = prediction_rhyme.numpy()

            #    index_rhyme = np.random.choice(len(prediction_rhyme), size=1, p=prediction_rhyme)[0]
            index_rhyme = np.argmax(prediction_rhyme)

            next_syl_rhyme = idx2syl_rhyme[index_rhyme]
            seq_text_rhyme.append(next_syl_rhyme)

            if next_syl_rhyme in special_tokens.values(
            ) and next_syl_rhyme != special_tokens['END_OF_VERSO']:
                structure_list.append(next_syl_rhyme)
            else:
                end_verse_list.append(next_syl_rhyme)

            if next_syl_rhyme == special_tokens['END_OF_CANTO']:
                end_of_canto = True

        generated_text_list += structure_list

        reverse_rhyme_list = end_verse_list[::-1]

        ##        seq_text_verse += structure_list
        seq_text_verse += reverse_rhyme_list

        next_syl_verse = ''

        rest_revese_verse_list = []

        while not end_of_canto and next_syl_verse != special_tokens[
                'END_OF_VERSO']:

            seq_text_verse = seq_text_verse[-seq_length_verse:]

            sequence_verse = [
                syl2idx_verse[syl]
                for syl in seq_text_verse[-seq_length_verse:]
            ]
            sequence_verse = tf.keras.preprocessing.sequence.pad_sequences(
                [sequence_verse], maxlen=seq_length_verse)
            x_verse = np.array(sequence_verse, dtype='int64')

            prediction_verse = model_verse.predict(x_verse, verbose=0)
            prediction_verse = tf.squeeze(prediction_verse, 0)[-1]
            prediction_verse = prediction_verse / temperature
            prediction_verse = prediction_verse.numpy()

            index_verse = np.random.choice(len(prediction_verse),
                                           size=1,
                                           p=prediction_verse)[0]

            next_syl_verse = idx2syl_verse[index_verse]
            if next_syl_verse != special_tokens['END_OF_VERSO']:
                seq_text_verse.append(next_syl_verse)
                rest_revese_verse_list.append(next_syl_verse)

        whole_verse_list = rest_revese_verse_list[::-1] + end_verse_list

        generated_text_list += whole_verse_list

        print(prettify_text(''.join(structure_list), special_tokens),
              end='',
              flush=True)
        # print(prettify_text(''.join(whole_verse_list), special_tokens),  end='', flush=True)
        print(prettify_text(
            ''.join(remove_tone(whole_verse_list, special_tokens)),
            special_tokens),
              end='',
              flush=True)

    generated_text_no_tone_list = remove_tone(generated_text_list,
                                              special_tokens)

    return ''.join(generated_text_list), ''.join(generated_text_no_tone_list)