def process(file_name): print("1") data=open(glob.glob(file_name).pop()).readlines() print("2") poem=data[9:] print(poem) century=data[2] print("3") p_categories=data[5].split() print(p_categories) os.chdir(r'/media/bryan/Work/graph_poetry/features/') import functions poem = functions.mutateRemoveJunk(poem) linecount = len(poem) metrical_stress = functions.getStress(poem) last_words_ll = functions.get_last_words(poem) rhyme_pattern = functions.make_rhyme_pattern(last_words_ll) last_words_list = functions.get_last_words_list(last_words_ll) poem_vector = [] poem_vector.append(file) import rhyme_detection as rd rhyme_pattern_vector = rd.get_rhyme_pattern_vector(last_words_ll) poem_vector.append(rhyme_pattern_vector) import poem_vector as pv rhyme_vector = pv.get_rhyme_type_count(last_words_ll) for item in rhyme_vector: try: item = float(item)/float(linecount) except: item = 0 poem_vector.append(item) poem_vector.append(pv.get_ratio(last_words_ll)) n = pv.get_couplets(last_words_ll) try: poem_vector.append(float(n)/float(linecount)) except: poem_vector.append("0") n = pv.get_alternating_rhymes(last_words_ll) try: poem_vector.append(float(n)/float(linecount)) except: poem_vector.append("0") n = pv.get_rhymes_two_lines_apart(last_words_ll) try: poem_vector.append(float(n)/float(linecount)) except: poem_vector.append("0") poem_vector.append(linecount) foot_count = pv.foot_count(metrical_stress) total = sum(foot_count) number_of_feet_per_line = pv.get_feet_per_line(metrical_stress) for item in number_of_feet_per_line: poem_vector.append(item) for item in foot_count: try: poem_vector.append(float(item)/float(total)) except: poem_vector.append("0") try: poem_vector.append(float(pv.get_streak(metrical_stress))) except: poem_vector.append("0") try: poem_vector.append(float(pv.get_switches(metrical_stress))/float(total)) except: poem_vector.append("0") import re categories = ["Free","Rhymed","Couplet","Rhymes","Blank","Sonnet","Villanelle","Terza"] categories_compiled = list(map(re.compile,categories)) poem_class = "" poeticTerm ='unknown' wekaClass = 'unknown' for item in categories_compiled: if re.search(free,item): wekaClass = 'NR' break if re.search(rhymed,item): wekaClass = 'R' break if re.search(couplet,item): wekaClass = 'R' break if re.search(rhymes,item): wekaClass = 'R' break if re.search(blank,item): wekaClass = 'NR' break if re.search(sonnet,item): wekaClass = 'R' break if re.search(villanelle,item): wekaClass = 'R' break if re.search(terza,item): wekaClass = 'R' break for item in poem_vector: arff.write(str(item)) arff.write(", ") arff.write(str(wekaClass)) arff.write('\n') os.chdir('./output') # this is where the results will be stored for each poem: output = open(str(file)[:-3]+'out.txt', 'w') output.write("Results" +'\n' +'\n') output.write("Line Count : " +'\n') output.write(str(linecount)) output.write('\n') output.write('Metrical Stress : ' +'\n') for item in metrical_stress: output.write(str(item)) output.write('\n') output.write('\n') output.write("Rhyme Pattern : " +'\n') p = last_words_ll.head while p.nodebelow is not last_words_ll.head: p = p.nodebelow output.write(str(p.outer_letter)) output.write("-") output.write(str(p.letter_designation)) output.write("----") output.write(str(p.word)) output.write("----") output.write(str(p.rhyme_type)) output.write('\n') output.write('\n') output.write("Poem : " +'\n') for line in poem: output.write(str(line)) os.chdir(r'/media/bryan/Work/graph_poetry/poetry_collection/test_site/') os.rename(file, file.replace("txt", "done"))
# print("...splitting poem...") # split_me = "" # for item in poem: # split_me += item # print(split_me) # poem = split_me.split(splitter) # poem = str(poem).split('\n') # print('done') ######################################################################### # we often get a bunch of unicode stuff with the poems from the web: print("...removing unwanted characters...") poem = functions.mutateRemoveJunk(poem) print('done') ######################################################################### # simple enough: print("...counting the number of lines...") linecount = len(poem) print('done') ######################################################################### # only works if poems come formatted in stanzas! # must find better solution! # print('...counting stanzas...') # stanzaCount = functions.getStanzaCount(poem)