def is_hendecasyllable(syllables, special_tokens): syllables = [ prettify_text(s, special_tokens).strip() for s in syllables if s not in special_tokens.values() ] if len(syllables) > 9: return get_last_tonedsyl_index(syllables, special_tokens) == 9 else: return False
def get_well_formed_rhymes(toned_verses_syls, synalepha): verses = [ ''.join(verse_syls) for verse_syls in toned_verses_syls] generated_canto = ''.join(verses) generated_canto = prettify_text(generated_canto, special_tokens) generated_canto_list = generated_canto.split("\n") generated_canto_list = [line.strip() for line in generated_canto_list if line != 'CANTO'] generated_canto = "\n".join(generated_canto_list) n_rhymes = 0 correct_rhymes = 0 generated_canto = generated_canto.replace('\n\n', '\n').strip() generated_canto_list = generated_canto.split("\n") triplets = [] l = None for i, verse in enumerate(generated_canto_list): if i % 3 == 0: if l: triplets.append('\n'.join(l)) l = [verse] else: l.append(verse) triplets.append('\n'.join(l)) for i in range(0,len(triplets)-1,1): t1 = triplets[i].split('\n') t2 = triplets[i+1].split('\n') if i==0: n_rhymes+=1 if is_rhyme(t1[0], t1[2]): correct_rhymes+=1 n_rhymes+=1 if i == len(triplets)-2 and len(t2) <3 : if is_rhyme(t1[1], t2[0]): correct_rhymes+=1 else: if is_rhyme(t1[1], t2[0]): correct_rhymes+=1/3 if is_rhyme(t2[0], t2[2]): correct_rhymes+=1/3 if is_rhyme(t1[1], t2[2]): correct_rhymes+=1/3 return correct_rhymes/n_rhymes
def _apply_synalepha(syllables, special_tokens): syllables_cleaned = [ prettify_text(s, special_tokens).strip() for s in syllables if s not in special_tokens.values() ] if len(syllables_cleaned) <= 9: return syllables # SMARAGLIATA vowels = "ÁÀAaàáÉÈEeèéIÍÌiíìOoóòÚÙUuúùHh'" n_synalepha = 0 i = 1 while i < (len(syllables) - 1): if syllables[i] == special_tokens['WORD_SEP']: pre_syl = syllables[i - 1] next_syl = syllables[i + 1] if pre_syl[-1] in vowels and next_syl[0] in vowels: i += 1 n_synalepha += 1 i += 1 last_tonedrv_index = get_last_tonedsyl_index(syllables, special_tokens) n_synalepha_needed = last_tonedrv_index - 9 n_synalepha_to_apply = min(n_synalepha_needed, n_synalepha) result = [syllables[0]] i = 1 n_synalepha_applied = 0 while i < (len(syllables) - 1): if syllables[i] == special_tokens['WORD_SEP']: pre_syl = syllables[i - 1] next_syl = syllables[i + 1] if pre_syl[-1] in vowels and next_syl[0] in vowels: if n_synalepha_applied < n_synalepha_to_apply: result.append(result[-1] + syllables[i] + next_syl) del result[-2] n_synalepha_applied += 1 i += 1 else: result.append(syllables[i]) else: result.append(syllables[i]) else: result.append(syllables[i]) i += 1 result.append(syllables[-1]) return result
def get_last_tonedsyl_index(syllables, special_tokens): # syllables only without <word_sep> token syllables = [ prettify_text(s, special_tokens).strip() for s in syllables if s not in special_tokens.values() ] syllables_rev = syllables[::-1] for i, syl in enumerate(syllables_rev): if is_toned_syl(syl): return len(syllables_rev) - i - 1
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' from dante_by_tonedrev_syl.text_processing import clean_comedy, prettify_text, special_tokens, remove_all_punctuation from dante_by_tonedrev_syl.tone import ToneTagger working_dir = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'dante_by_tonedrev_syl') divine_comedy_file = os.path.join(os.path.dirname(working_dir), "divina_commedia", "divina_commedia_accent_UTF-8.txt") with open(divine_comedy_file, "r") as f: divine_comedy = f.read() divine_comedy = clean_comedy(divine_comedy, special_tokens) divine_comedy = prettify_text(divine_comedy, special_tokens) #divine_comedy = remove_all_punctuation(divine_comedy) tone_tagger = ToneTagger() print("\nMODEL: {}\n".format(tone_tagger.model_filename)) divine_comedy_words = divine_comedy.split()[:10] + [] for w in divine_comedy_words: print(tone_tagger.tone(w), flush=True, end=' ')
start_seq_verse = syls_verse_list[start_idx:index_eov] generated_text, generated_text_no_tone = generate_text(model_rhyme, model_verse, special_tokens, vocab_size_rhyme, vocab_size_verse, syl2idx_rhyme, idx2syl_rhyme, syl2idx_verse, idx2syl_verse, SEQ_LENGTH_RHYME, SEQ_LENGTH_VERSE, start_seq_rhyme, start_seq_verse, temperature=1.0) #print(prettify_text(generated_text, special_tokens)) with open(output_file, "w") as f: f.write(prettify_text(generated_text_no_tone, special_tokens)) with open(raw_output_file, "w") as f: f.write(generated_text_no_tone) with open(output_toned_file, "w") as f: f.write(prettify_text(generated_text, special_tokens)) with open(raw_output_toned_file, "w") as f: f.write(generated_text)
def generate_text(model_rhyme, model_verse, special_tokens, vocab_size_rhyme, vocab_size_verse, syl2idx_rhyme, idx2syl_rhyme, syl2idx_verse, idx2syl_verse, seq_length_rhyme, seq_length_verse, start_seq_rhyme, start_seq_verse, temperature=1.0): seq_text_rhyme = start_seq_rhyme seq_text_verse = start_seq_verse generated_text_list = [] model_rhyme.reset_states() model_verse.reset_states() end_of_canto = False while not end_of_canto: # and generated_text_list.count(special_tokens['END_OF_VERSO']) < 10: # and generated_text_list.count(special_tokens['END_OF_TERZINA']) < 45 \ next_syl_rhyme = '' end_verse_list = [] structure_list = [] while not end_of_canto and next_syl_rhyme != special_tokens[ 'END_OF_VERSO']: seq_text_rhyme = seq_text_rhyme[-seq_length_rhyme:] sequence_rhyme = [ syl2idx_rhyme[syl] for syl in seq_text_rhyme[-seq_length_rhyme:] ] sequence_rhyme = tf.keras.preprocessing.sequence.pad_sequences( [sequence_rhyme], maxlen=seq_length_rhyme) x_rhyme = np.array(sequence_rhyme, dtype='int64') prediction_rhyme = model_rhyme.predict(x_rhyme, verbose=0) prediction_rhyme = tf.squeeze(prediction_rhyme, 0)[-1] prediction_rhyme = prediction_rhyme / temperature prediction_rhyme = prediction_rhyme.numpy() # index_rhyme = np.random.choice(len(prediction_rhyme), size=1, p=prediction_rhyme)[0] index_rhyme = np.argmax(prediction_rhyme) next_syl_rhyme = idx2syl_rhyme[index_rhyme] seq_text_rhyme.append(next_syl_rhyme) if next_syl_rhyme in special_tokens.values( ) and next_syl_rhyme != special_tokens['END_OF_VERSO']: structure_list.append(next_syl_rhyme) else: end_verse_list.append(next_syl_rhyme) if next_syl_rhyme == special_tokens['END_OF_CANTO']: end_of_canto = True generated_text_list += structure_list reverse_rhyme_list = end_verse_list[::-1] ## seq_text_verse += structure_list seq_text_verse += reverse_rhyme_list next_syl_verse = '' rest_revese_verse_list = [] while not end_of_canto and next_syl_verse != special_tokens[ 'END_OF_VERSO']: seq_text_verse = seq_text_verse[-seq_length_verse:] sequence_verse = [ syl2idx_verse[syl] for syl in seq_text_verse[-seq_length_verse:] ] sequence_verse = tf.keras.preprocessing.sequence.pad_sequences( [sequence_verse], maxlen=seq_length_verse) x_verse = np.array(sequence_verse, dtype='int64') prediction_verse = model_verse.predict(x_verse, verbose=0) prediction_verse = tf.squeeze(prediction_verse, 0)[-1] prediction_verse = prediction_verse / temperature prediction_verse = prediction_verse.numpy() index_verse = np.random.choice(len(prediction_verse), size=1, p=prediction_verse)[0] next_syl_verse = idx2syl_verse[index_verse] if next_syl_verse != special_tokens['END_OF_VERSO']: seq_text_verse.append(next_syl_verse) rest_revese_verse_list.append(next_syl_verse) whole_verse_list = rest_revese_verse_list[::-1] + end_verse_list generated_text_list += whole_verse_list print(prettify_text(''.join(structure_list), special_tokens), end='', flush=True) # print(prettify_text(''.join(whole_verse_list), special_tokens), end='', flush=True) print(prettify_text( ''.join(remove_tone(whole_verse_list, special_tokens)), special_tokens), end='', flush=True) generated_text_no_tone_list = remove_tone(generated_text_list, special_tokens) return ''.join(generated_text_list), ''.join(generated_text_no_tone_list)