def is_hendecasyllable(syllables, special_tokens):
    syllables = [
        prettify_text(s, special_tokens).strip() for s in syllables
        if s not in special_tokens.values()
    ]
    if len(syllables) > 9:
        return get_last_tonedsyl_index(syllables, special_tokens) == 9
    else:
        return False
Beispiel #2
0
def get_well_formed_rhymes(toned_verses_syls, synalepha):
    verses = [ ''.join(verse_syls) for verse_syls in toned_verses_syls]
    generated_canto = ''.join(verses)
    generated_canto = prettify_text(generated_canto, special_tokens)

    generated_canto_list = generated_canto.split("\n")
    generated_canto_list = [line.strip() for line in generated_canto_list if line != 'CANTO']
    generated_canto = "\n".join(generated_canto_list)


    n_rhymes = 0
    correct_rhymes = 0

    generated_canto = generated_canto.replace('\n\n', '\n').strip()
    generated_canto_list = generated_canto.split("\n")

    triplets = []
    l = None
    for i, verse in enumerate(generated_canto_list):
        if i % 3 == 0:
            if l:
                triplets.append('\n'.join(l))
            l = [verse]
        else:
            l.append(verse)

    triplets.append('\n'.join(l))

    for i in range(0,len(triplets)-1,1):
        t1 = triplets[i].split('\n')
        t2 = triplets[i+1].split('\n')
            
        if i==0:
            n_rhymes+=1
            if is_rhyme(t1[0], t1[2]):
                correct_rhymes+=1        

        n_rhymes+=1
        
        if i == len(triplets)-2 and len(t2) <3 :

            if is_rhyme(t1[1], t2[0]):
                correct_rhymes+=1

        else:
            if is_rhyme(t1[1], t2[0]):
                correct_rhymes+=1/3         

            if is_rhyme(t2[0], t2[2]):
                correct_rhymes+=1/3

            if is_rhyme(t1[1], t2[2]):
                correct_rhymes+=1/3
            

    return correct_rhymes/n_rhymes
def _apply_synalepha(syllables, special_tokens):

    syllables_cleaned = [
        prettify_text(s, special_tokens).strip() for s in syllables
        if s not in special_tokens.values()
    ]

    if len(syllables_cleaned) <= 9:
        return syllables

    # SMARAGLIATA
    vowels = "ÁÀAaàáÉÈEeèéIÍÌiíìOoóòÚÙUuúùHh'"

    n_synalepha = 0

    i = 1
    while i < (len(syllables) - 1):
        if syllables[i] == special_tokens['WORD_SEP']:
            pre_syl = syllables[i - 1]
            next_syl = syllables[i + 1]
            if pre_syl[-1] in vowels and next_syl[0] in vowels:
                i += 1
                n_synalepha += 1
        i += 1

    last_tonedrv_index = get_last_tonedsyl_index(syllables, special_tokens)

    n_synalepha_needed = last_tonedrv_index - 9

    n_synalepha_to_apply = min(n_synalepha_needed, n_synalepha)

    result = [syllables[0]]
    i = 1
    n_synalepha_applied = 0
    while i < (len(syllables) - 1):
        if syllables[i] == special_tokens['WORD_SEP']:
            pre_syl = syllables[i - 1]
            next_syl = syllables[i + 1]
            if pre_syl[-1] in vowels and next_syl[0] in vowels:

                if n_synalepha_applied < n_synalepha_to_apply:
                    result.append(result[-1] + syllables[i] + next_syl)
                    del result[-2]
                    n_synalepha_applied += 1
                    i += 1
                else:
                    result.append(syllables[i])

            else:
                result.append(syllables[i])
        else:
            result.append(syllables[i])
        i += 1
    result.append(syllables[-1])

    return result
def get_last_tonedsyl_index(syllables, special_tokens):
    # syllables only without <word_sep> token
    syllables = [
        prettify_text(s, special_tokens).strip() for s in syllables
        if s not in special_tokens.values()
    ]
    syllables_rev = syllables[::-1]
    for i, syl in enumerate(syllables_rev):
        if is_toned_syl(syl):
            return len(syllables_rev) - i - 1
Beispiel #5
0
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

from dante_by_tonedrev_syl.text_processing import clean_comedy, prettify_text, special_tokens, remove_all_punctuation
from dante_by_tonedrev_syl.tone import ToneTagger

working_dir = os.path.join(
    os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
    'dante_by_tonedrev_syl')

divine_comedy_file = os.path.join(os.path.dirname(working_dir),
                                  "divina_commedia",
                                  "divina_commedia_accent_UTF-8.txt")

with open(divine_comedy_file, "r") as f:
    divine_comedy = f.read()

divine_comedy = clean_comedy(divine_comedy, special_tokens)
divine_comedy = prettify_text(divine_comedy, special_tokens)
#divine_comedy = remove_all_punctuation(divine_comedy)

tone_tagger = ToneTagger()

print("\nMODEL: {}\n".format(tone_tagger.model_filename))

divine_comedy_words = divine_comedy.split()[:10] + []

for w in divine_comedy_words:
    print(tone_tagger.tone(w), flush=True, end=' ')
start_seq_verse = syls_verse_list[start_idx:index_eov]

generated_text, generated_text_no_tone = generate_text(model_rhyme,
                                                       model_verse,
                                                       special_tokens,
                                                       vocab_size_rhyme,
                                                       vocab_size_verse,
                                                       syl2idx_rhyme,
                                                       idx2syl_rhyme,
                                                       syl2idx_verse,
                                                       idx2syl_verse,
                                                       SEQ_LENGTH_RHYME,
                                                       SEQ_LENGTH_VERSE,
                                                       start_seq_rhyme,
                                                       start_seq_verse,
                                                       temperature=1.0)

#print(prettify_text(generated_text, special_tokens))

with open(output_file, "w") as f:
    f.write(prettify_text(generated_text_no_tone, special_tokens))

with open(raw_output_file, "w") as f:
    f.write(generated_text_no_tone)

with open(output_toned_file, "w") as f:
    f.write(prettify_text(generated_text, special_tokens))

with open(raw_output_toned_file, "w") as f:
    f.write(generated_text)
Beispiel #7
0
def generate_text(model_rhyme,
                  model_verse,
                  special_tokens,
                  vocab_size_rhyme,
                  vocab_size_verse,
                  syl2idx_rhyme,
                  idx2syl_rhyme,
                  syl2idx_verse,
                  idx2syl_verse,
                  seq_length_rhyme,
                  seq_length_verse,
                  start_seq_rhyme,
                  start_seq_verse,
                  temperature=1.0):
    seq_text_rhyme = start_seq_rhyme
    seq_text_verse = start_seq_verse

    generated_text_list = []

    model_rhyme.reset_states()
    model_verse.reset_states()
    end_of_canto = False
    while not end_of_canto:
        #      and generated_text_list.count(special_tokens['END_OF_VERSO']) < 10:
        #      and generated_text_list.count(special_tokens['END_OF_TERZINA']) < 45 \

        next_syl_rhyme = ''
        end_verse_list = []
        structure_list = []
        while not end_of_canto and next_syl_rhyme != special_tokens[
                'END_OF_VERSO']:

            seq_text_rhyme = seq_text_rhyme[-seq_length_rhyme:]

            sequence_rhyme = [
                syl2idx_rhyme[syl]
                for syl in seq_text_rhyme[-seq_length_rhyme:]
            ]
            sequence_rhyme = tf.keras.preprocessing.sequence.pad_sequences(
                [sequence_rhyme], maxlen=seq_length_rhyme)
            x_rhyme = np.array(sequence_rhyme, dtype='int64')

            prediction_rhyme = model_rhyme.predict(x_rhyme, verbose=0)

            prediction_rhyme = tf.squeeze(prediction_rhyme, 0)[-1]
            prediction_rhyme = prediction_rhyme / temperature
            prediction_rhyme = prediction_rhyme.numpy()

            #    index_rhyme = np.random.choice(len(prediction_rhyme), size=1, p=prediction_rhyme)[0]
            index_rhyme = np.argmax(prediction_rhyme)

            next_syl_rhyme = idx2syl_rhyme[index_rhyme]
            seq_text_rhyme.append(next_syl_rhyme)

            if next_syl_rhyme in special_tokens.values(
            ) and next_syl_rhyme != special_tokens['END_OF_VERSO']:
                structure_list.append(next_syl_rhyme)
            else:
                end_verse_list.append(next_syl_rhyme)

            if next_syl_rhyme == special_tokens['END_OF_CANTO']:
                end_of_canto = True

        generated_text_list += structure_list

        reverse_rhyme_list = end_verse_list[::-1]

        ##        seq_text_verse += structure_list
        seq_text_verse += reverse_rhyme_list

        next_syl_verse = ''

        rest_revese_verse_list = []

        while not end_of_canto and next_syl_verse != special_tokens[
                'END_OF_VERSO']:

            seq_text_verse = seq_text_verse[-seq_length_verse:]

            sequence_verse = [
                syl2idx_verse[syl]
                for syl in seq_text_verse[-seq_length_verse:]
            ]
            sequence_verse = tf.keras.preprocessing.sequence.pad_sequences(
                [sequence_verse], maxlen=seq_length_verse)
            x_verse = np.array(sequence_verse, dtype='int64')

            prediction_verse = model_verse.predict(x_verse, verbose=0)
            prediction_verse = tf.squeeze(prediction_verse, 0)[-1]
            prediction_verse = prediction_verse / temperature
            prediction_verse = prediction_verse.numpy()

            index_verse = np.random.choice(len(prediction_verse),
                                           size=1,
                                           p=prediction_verse)[0]

            next_syl_verse = idx2syl_verse[index_verse]
            if next_syl_verse != special_tokens['END_OF_VERSO']:
                seq_text_verse.append(next_syl_verse)
                rest_revese_verse_list.append(next_syl_verse)

        whole_verse_list = rest_revese_verse_list[::-1] + end_verse_list

        generated_text_list += whole_verse_list

        print(prettify_text(''.join(structure_list), special_tokens),
              end='',
              flush=True)
        # print(prettify_text(''.join(whole_verse_list), special_tokens),  end='', flush=True)
        print(prettify_text(
            ''.join(remove_tone(whole_verse_list, special_tokens)),
            special_tokens),
              end='',
              flush=True)

    generated_text_no_tone_list = remove_tone(generated_text_list,
                                              special_tokens)

    return ''.join(generated_text_list), ''.join(generated_text_no_tone_list)