def prepare_input(file, type): ''' Prepares the input from a textfile to analyze either words or meter Parameters file, string: a path to a valid textfile type, string: either "words" or "meter" Returns output, list: the words of the file are filtered for only for alphabetic characters if type == "words" return a list of each word from the file if type == "meters" returns a pair of lists with a list of words in the first index and a list of corressponding meters for those words in the second index Pre/postconditions none additional ''' with open(file) as f: file_string = f.read() filter_fn = lambda c: c.isalpha() or c ==" " filtered_string = "".join(filter(filter_fn, file_string)) if type == "words": output = filtered_string.split() elif type == "meters": tokens = poetrytools.tokenize(filtered_string) output = [tokens[0], poetrytools.scanscion(tokens)[0]] return output
def analyze_syllables(word_lists): """ For each word in the list of lists of words, return its syllable stresses and its syllable count. :param word_lists: A list of lists of word. :return: stresses: Stress notations for each word. Same shape as 'word_lists' :return: syllables: Syllable counts for each word. Same shape as 'word_lists' """ stresses = poetrytools.scanscion(word_lists) # deep copy syllables = copy.deepcopy(stresses) for i in range(len(syllables)): for j in range(len(syllables[i])): syllables[i][j] = len(syllables[i][j]) return stresses, syllables
def get_score(self): score = 0 # score rhymes rhyme_score = 0 rhyme_check = [0, 1, 4, 5, 8, 9] last_word_list = [] # get words at end of each line to check for line in self.sonnet: last_word = line[len(line) - 1].name last_word_list.append(last_word) # check for ababcdcdefefgg rhyme scheme and add to score for each good rhyme for word in rhyme_check: if poetrytools.rhymes(last_word_list[word], last_word_list[word + 2]): rhyme_score += 1 if poetrytools.rhymes(last_word_list[12], last_word_list[13]): rhyme_score += 1 # score syllables syllable_score = 0 for line in self.sonnet: syllable_count = 0 for word in line: syllable_count += word.get_syllables() syllable_score -= abs(10 - syllable_count) #score meter meter_score = 0 str_rep = self.get_str() poem = poetrytools.tokenize(str_rep) meter = poetrytools.scanscion(poem) for line in meter: for word in line: if word == "01": meter_score += 1 score += rhyme_score + syllable_score + meter_score scores = [score, rhyme_score, 140 - abs(syllable_score), meter_score] return scores
for key in poems: poem_dict = {"poem_id": key} print(count) if count == 1234 or count == 1034: # randomly blank poems so skip them count = count + 1 continue mypoem = poetrytools.tokenize(poems[key]) # rhyme_scheme returns "X a b X X a X a b c X c" rhyme_scheme = ' '.join(poetrytools.rhyme_scheme(mypoem)) poem_dict["rhyme_scheme"] = rhyme_scheme # scanscion returns "0 1 1 10 10 / 1 10 1 01 / 0 1 01 1 10 / 1 1 1 0 10 1 / 1 1 0 1 010 / 0 100 1 1 / 1 1 1 01 1 / 0 1 1 1 0 1 / 1 1 1 1 1 10 / 1 1 1 1 1 1 / 10 10 10 / 1 1 1 1 1" scanscions = [] for item in poetrytools.scanscion(mypoem): scanscion = ' '.join(item) scanscions.append(scanscion) # print("scanscion: " + scanscion) poem_dict["scanscion"] = scanscions # stanza_lengths returns "12" stanza_lengths = poetrytools.stanza_lengths(mypoem) poem_dict["stanza_lengths"] = stanza_lengths # meter_guess returns "iambic trimeter" (as a very rough guess) meter_guess = poetrytools.guess_metre(mypoem)[3] poem_dict["meter_guess"] = meter_guess print(poem_dict) count += 1 # load results in database
def get_syllables(self): import poetrytools self.syllables = poetrytools.scanscion(self.x_batches) for line in self.syllables: pass
# i-1 because the observations are 1-indexed translated_emission = [conv_words[i] for i in emission] line = '' for word in translated_emission: line += word + ' ' print(line) print(emission) # Print transition matrix. A_vis = np.around(np.array(hmm.A), 4) print(A_vis) # Obtain the top 10 words for each state. O = np.array(hmm.O) for i in range(len(O)): state_emissions = O[i, :] top_emissions = state_emissions.argsort()[-10:][::-1] top_probs = [state_emissions[j] for j in top_emissions] top_words = [conv_words[j] for j in top_emissions] pos = nltk.pos_tag(top_words) pos = [x[1] for x in pos] stresses = poetrytools.scanscion([top_words])[0] print() print(top_words) print(np.around(top_probs, 4)) print(pos) print(stresses)
import poetrytools x = [["spring"]] syllables = poetrytools.scanscion(x) #y = "spring" #print(poetrytools.stress(y)) def this(): for y in ["spring", "project", "attribute", "insult"]: print(poetrytools.stress(y, "min")) m = poetrytools.stress("nightingale", "all") print m words = "the nightingales thy coming each where sing:".split(" ") m = ''.join([poetrytools.stress(x, "min") for x in words]) print(m) print(len(m))
if TEST: OUTPUT = r"../data/test/input.txt" VOCAB = 300 SUFFIX = "" else: OUTPUT = r"../data/large/gutenberg.txt" VOCAB = 40000 SUFFIX = "NEW" def write_out(text, f): # Write out with open(f, "w") as fobj: fobj.write(text) def read_in(ff): with open(ff, "r") as fobj: return fobj.read() # Prune vocab for most common words text = read_in(OUTPUT) #write_out(text, out) poem = poetrytools.tokenize(text) # need to tokenize the poem first print(poem) x = poetrytools.scanscion(poem) print(x) poetrytools.guess_form(poem, verbose=True)