def prepare_input(file, type): ''' Prepares the input from a textfile to analyze either words or meter Parameters file, string: a path to a valid textfile type, string: either "words" or "meter" Returns output, list: the words of the file are filtered for only for alphabetic characters if type == "words" return a list of each word from the file if type == "meters" returns a pair of lists with a list of words in the first index and a list of corressponding meters for those words in the second index Pre/postconditions none additional ''' with open(file) as f: file_string = f.read() filter_fn = lambda c: c.isalpha() or c ==" " filtered_string = "".join(filter(filter_fn, file_string)) if type == "words": output = filtered_string.split() elif type == "meters": tokens = poetrytools.tokenize(filtered_string) output = [tokens[0], poetrytools.scanscion(tokens)[0]] return output
def analysis(x): print(len(temp_form)) print(type(x)) poem = poetrytools.tokenize(x) temp_form.append(poetrytools.guess_form(poem, verbose=False)) temp_metre.append(poetrytools.guess_metre(poem)[3]) temp_rhyme.append(poetrytools.guess_rhyme_type(poem)[1]) temp_stanza_type.append(poetrytools.guess_stanza_type(poem)[1])
def predict(poem_str): poem = poetrytools.tokenize(poem_str) stanza, meter, rhyme_type = evaluate_poem(poem) #finds proportion of words that are sight words poem_str = poem_str.replace("\n", " ") poem_list = poem_str.split(" ") seussian_sight_prop = count_words(filter(str.isalnum, poem_list)) features = [stanza, meter, rhyme_type, seussian_sight_prop] #for generated text classifier(features)
def get_score(self): score = 0 # score rhymes rhyme_score = 0 rhyme_check = [0, 1, 4, 5, 8, 9] last_word_list = [] # get words at end of each line to check for line in self.sonnet: last_word = line[len(line) - 1].name last_word_list.append(last_word) # check for ababcdcdefefgg rhyme scheme and add to score for each good rhyme for word in rhyme_check: if poetrytools.rhymes(last_word_list[word], last_word_list[word + 2]): rhyme_score += 1 if poetrytools.rhymes(last_word_list[12], last_word_list[13]): rhyme_score += 1 # score syllables syllable_score = 0 for line in self.sonnet: syllable_count = 0 for word in line: syllable_count += word.get_syllables() syllable_score -= abs(10 - syllable_count) #score meter meter_score = 0 str_rep = self.get_str() poem = poetrytools.tokenize(str_rep) meter = poetrytools.scanscion(poem) for line in meter: for word in line: if word == "01": meter_score += 1 score += rhyme_score + syllable_score + meter_score scores = [score, rhyme_score, 140 - abs(syllable_score), meter_score] return scores
def get_features(poem_str): poem = poetrytools.tokenize(poem_str) stanza, meter, rhyme_type = evaluate_poem(poem) #finds proportion of words that are sight words poem_str = poem_str.replace("\n", " ") poem_list = poem_str.split(" ") seussian_sight_prop = count_words(filter(str.isalnum, poem_list)) # analysis = TextBlob(poem_str) # if analysis.sentiment.polarity > 0: # sentiment = 1 # elif analysis.sentiment.polarity == 0: # sentiment = 0 # else: # sentiment = -1 features = [stanza, meter, rhyme_type, seussian_sight_prop] #for generated text return features
print("inside entry") if (entry['poems']): # only add the first poem i = entry['_id'] poems[i] = entry['poems'][0] count = 0 analysis_coll = db.analysisCOLL for key in poems: poem_dict = {"poem_id": key} print(count) if count == 1234 or count == 1034: # randomly blank poems so skip them count = count + 1 continue mypoem = poetrytools.tokenize(poems[key]) # rhyme_scheme returns "X a b X X a X a b c X c" rhyme_scheme = ' '.join(poetrytools.rhyme_scheme(mypoem)) poem_dict["rhyme_scheme"] = rhyme_scheme # scanscion returns "0 1 1 10 10 / 1 10 1 01 / 0 1 01 1 10 / 1 1 1 0 10 1 / 1 1 0 1 010 / 0 100 1 1 / 1 1 1 01 1 / 0 1 1 1 0 1 / 1 1 1 1 1 10 / 1 1 1 1 1 1 / 10 10 10 / 1 1 1 1 1" scanscions = [] for item in poetrytools.scanscion(mypoem): scanscion = ' '.join(item) scanscions.append(scanscion) # print("scanscion: " + scanscion) poem_dict["scanscion"] = scanscions # stanza_lengths returns "12" stanza_lengths = poetrytools.stanza_lengths(mypoem)
import poetrytools input = """freshborneberslifeguardianyou stead prospectingbeesbeesrayedhathlowly john bondsesyfice numb gaveshookingnathunerlylovedhomageni facethorerectryserenely fenhotter'dqueencypiteousdrift vailwordedchildreedsmentfree murs—thewearial none ingsuresshornless graves """ poem = poetrytools.tokenize(input) print(poetrytools.rhymes("see", "sea")) print(poetrytools.guess_metre(poem)) print(poetrytools.rhyme_scheme(poem))
def open_poem(self, poem): with open(os.path.join('poetrytools/poems', poem)) as f: return poetrytools.tokenize(f.read())
if TEST: OUTPUT = r"../data/test/input.txt" VOCAB = 300 SUFFIX = "" else: OUTPUT = r"../data/large/gutenberg.txt" VOCAB = 40000 SUFFIX = "NEW" def write_out(text, f): # Write out with open(f, "w") as fobj: fobj.write(text) def read_in(ff): with open(ff, "r") as fobj: return fobj.read() # Prune vocab for most common words text = read_in(OUTPUT) #write_out(text, out) poem = poetrytools.tokenize(text) # need to tokenize the poem first print(poem) x = poetrytools.scanscion(poem) print(x) poetrytools.guess_form(poem, verbose=True)