def extractFeaturesAndWriteBio(READ_PATH, file_type): global ALL_poems, bio, cnt, start_time inp = 0 sub_cnt = 0 words_total = 0 lines_total = 0 for subdir, dirs, files in os.walk(READ_PATH): for file in files: num_of_files = len(files) - 1 # deduct the DS_store #print (num_of_files,'readDirectory',READ_PATH) if file_type in file and 'readme' not in file: # ID id = file.split(".")[0] #print "\n\n*********\nID:",id filenames.append(id) cnt += 1 # print('') #print('') # print('OPENED:',id) # print('') #print('') ############## # HOW MANY? # ############## sub_cnt += 1 if sub_cnt >= inp: if inp != 0: end_time = time.time() es = end_time - start_time print sub_cnt, "poems,\n", lines_total, "lines, &\n", words_total, "words \ngenerated in\n", ( "%.2f" % es), "seconds" words_total = 0 lines_total = 0 # RESTART sub_cnt = 0 inp = input( "\n\n^^^^^^^^^^^^^^\n\nHow many poems do u want? ") print "\n\n^^^^^^^^^^^^^^^" start_time = time.time() print 'Poem #', sub_cnt + 1 poem_replaced = "" replacement_word = "" author = "" titles = "" title = "" new_title = "" replaced_ls = [] new_titles_ls = [] quit_language = 0 ################################################################# # Load POEM TEXT FILE (based on id extracted from Alchemy JSON) # ################################################################# txt_fn_path = DATA_DIR + READ_TXT_PATH + id.split( "_")[1] + ".txt" #print "txt_fn_path:",txt_fn_path if os.path.isfile(txt_fn_path) and cnt > 0: txt_data = open(txt_fn_path).read() # http://blog.webforefront.com/archives/2011/02/python_ascii_co.html # txt_data.decode('ISO-8859-2') .decode('utf-8') # unicode(txt_data) author = txt_data.split("****!****")[0].strip(' \t\n\r') title = txt_data.split("****!****")[1].strip(' \t\n\r') bio = txt_data.split("****!****")[2] #.strip(' \t\n\r') ###### CLEAN BIO bio.replace("\t", "	") bio.replace("\n", " <br>") bio.replace("\r", " <br>") poem_replaced = bio #print poem_replaced ############################### # REPLACE AUTHOR NAME ############################## author_ln = author.split(" ")[-1] author_fn = author.split(" ")[:-1] # #poem_replaced = poem_replaced.replace(author_ln,"Jhave") ####################### # fake AUTHOR ####################### new_author = " ".join( random.choice(authors).split(" ") [1:-2]) + " " + random.choice(authors).split(" ")[-2] ####################### # replace BOOK TITLES ####################### #print "TITLES"] new_title = getNewTitle("title").encode('utf-8') ############################ # replace years with another ############################ for w1 in poem_replaced.split("("): for w2 in w1.split(")"): if w2 is not None and w2.isdigit(): new_num = random.randint( int(w2) - 5, int(w2) + 5) #print "REPLACING #:",w2,new_num poem_replaced = poem_replaced.replace( w2, str(new_num)) replaced_ls.append(new_num) ################# # Load JSON # ################# response = loadJSONfile(READ_JSON_PATH + "poetryFoundation_" + id.split("_")[1] + "_Alchemy_JSON.txt") if response != "failed": if response.get('entities') is not None: for idx, entity in enumerate(response['entities']): #print idx ce = entity['text'].replace("0xc2", " ") ce = ce.replace("0xe2", "'") ce = re.sub( '(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, ce) ce = ce.encode('utf-8') try: content = ce.decode('utf-8').encode( 'ascii', 'xmlcharrefreplace') except UnicodeDecodeError: "AAAARGGGGHHH!!!!" if content in poem_replaced: ################################################ # Replace similar entities from other JSON # ################################################ replacement_entity = findSimilarEntityinRandomJSON( content, entity['type']) cr = re.sub( '(' + '|'.join( import_utilities.chars.keys()) + ')', import_utilities.replace_chars, replacement_entity) poem_replaced = poem_replaced.replace( content, replacement_entity) replaced_ls.append(replacement_entity) ########################## # POS REPLACMENT # ########################## token_tuples = nltk.word_tokenize(poem_replaced) tt = nltk.pos_tag(token_tuples) ################# # ADJECTIVES # ################# for i in tt: if "/i" not in i[0] and len( i[0]) > 3 and i[0] != "died": origw = re.sub( '(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, i[0]) origw = import_utilities.strip_punctuation(origw) if i[1] == 'JJ': JJr = random.choice(JJ) # # JJr = re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, JJr) # JJr = import_utilities.strip_punctuation(JJr) JJr = import_utilities.moveBeginAndEndPunctuationFromStrToString( i[0], JJr.lstrip().lstrip()) if i[0].istitle(): JJr = JJr.title() poem_replaced = re.sub( r'\b' + import_utilities.strip_punctuation(i[0]) + r'\b', JJr, poem_replaced, 1) #poem_replaced.replace(i[0],JJr,1) replaced_ls.append(JJr) if i[1] == 'RB': RBr = random.choice(RB) RBr = import_utilities.moveBeginAndEndPunctuationFromStrToString( i[0], RBr.lstrip().lstrip()) if i[0].istitle(): RBr = RBr.title() poem_replaced = re.sub( r'\b' + import_utilities.strip_punctuation(i[0]) + r'\b', RBr, poem_replaced, 1) replaced_ls.append(RBr) ######################## # IS IT ENGLISH? # ######################## for line in poem_replaced.split('\n\r'): if len(line) > 0: if "english" not in import_utilities.get_language( line): quit_language += 1 #print "NOT english:",quit_language,line else: quit_language -= 1 ######################### # SYNSET REPLACE # ######################### for idx, word in enumerate(poem_replaced.split(' ')): if "<br>" not in word and "	" not in word and len( word) > 0 and "~~~~!~~~" not in word: words_total += 1 ######################### # PRONOUN ' VERB # ######################### if len(word.split("'")) > 1: if word.split("'")[0] in personal_pronouns: replacement_word = random.choice( personal_pronouns) + "'" + word.split( "'")[1] + ' ' poem_replaced.replace(word, replacement_word) #print "word,",word,"replacement_word:",replacement_word #################################################### # Replacement of OTHERs # #################################################### elif not word.lower().strip( " \n\t\r") in stopwords.words('english'): # take off leading brackets, commas etc... word_punct_nopunct = import_utilities.strip_punctuation_bool( word) word_nopunct = word_punct_nopunct[ 'word'].strip(" \n\t\r") word_punct = word_punct_nopunct['punct'] punct_bool = word_punct_nopunct['punct_bool'] ####################################################### # MAIN EXCHANGE PROCESS CALL >>>>>>> GET THE SYNSET # ####################################################### if word_nopunct[-4:].lower() == "here": similarterm = random.choice( import_utilities.heres) else: #print "WORD:",word_nopunct if len(word_nopunct) > 2: similarterm = import_utilities.find_synset_word( word_nopunct ) #(word.lstrip().rstrip()) ############################################ # manually get rid of some terrible choices ############################################ if similarterm == "ilk": ##print "like" similarterm = "like" if similarterm == "ope": ##print "doth" similarterm = "does" if similarterm == "information technology": ##print "doth" similarterm = "it" ####################################### # abbreviations for f*****g states! # ####################################### if word_nopunct.upper( ) in import_utilities.state_abbrev and word_nopunct.lower( ) not in stopwords.words( 'english') and "me," not in word: tmp = similarterm if word_nopunct == "oh": similarterm = random.choice( import_utilities.exclaims) else: similarterm = random.choice(RESERVOIR) #print word_nopunct," replaced by", tmp, "replaced with:",similarterm, "in:",line ############## # hyphenated # ############## hyp = word.split("-") #print word,len(hyp) if len(hyp) > 1: similarterm = "" for w in hyp: if len(w) > 2: similarterm += import_utilities.find_synset_word( w) + "-" similarterm = import_utilities.strip_underscore( similarterm[:-1]) #print "hyphenated:",word,"replaced by: "+similarterm ######################################################### # is it a TRUNCATED VERB slang as in singin or wishin # ######################################################### if similarterm == word_nopunct and len( word ) > 2 and 'in' in word_nopunct[-2:]: similarterm = import_utilities.find_synset_word( word_nopunct + 'g') ## #print "TRUNCATED SLANG word: '"+word+"'",similarterm interim = import_utilities.lemma( similarterm) ## #print interim similarterm = import_utilities.conjugate( interim, tense=import_utilities.PARTICIPLE, parse=True)[:-1] # # # #print word,"widx:",widx," line_pos_tags[widx][0]:",line_pos_tags[widx][0]," line_pos_tags[widx][1]:",line_pos_tags[widx][1] ################# # SWEAR WORD # ################# ##print "at the garden of if:", word if word_nopunct in import_utilities.curses: similarterm = random.choice( import_utilities.curses) ##print "SWEAR WORD word: '"+word+"'",similarterm if len(hyp) > 1: replacement_word = similarterm else: replacement_word = word.replace( word_nopunct, similarterm) replacement_word = import_utilities.strip_underscore( replacement_word) replacement_word = import_utilities.replaceNumbers( replacement_word) ######################### # RESERVOIR_OF_WEIRDNESS # ######################### if word_nopunct.lower( ) in import_utilities.impera: replacement_word = random.choice( import_utilities.impera) #print word,"IMPERA:",replacement_word elif word_nopunct.lower( ) in import_utilities.conjuncts: replacement_word = random.choice( import_utilities.conjuncts) #print word," CONJUNCTION replaced with",replacement_word elif word_nopunct.lower( ) in import_utilities.indef_prono: replacement_word = random.choice( import_utilities.indef_prono) #print word," INDEF_prono replaced with",replacement_word elif word_nopunct.lower( ) in import_utilities.prepo: replacement_word = random.choice( import_utilities.prepo) #print word," prepo replaced with",replacement_word elif word_nopunct.lower( ) in import_utilities.rel_prono: replacement_word = word #print word," rel_prono LEAVE alone: ",replacement_word elif word_nopunct.lower()[-2:] == "ly": replacement_word = import_utilities.strip_underscore( import_utilities.find_synset_word( word)) #(word[:-2]) #print word," ADVERB: ",replacement_word # if replacement_word[-2:] !="ly": # replacement_word +="ly" else: if len( hyp ) < 2 and "like" not in word_nopunct and import_utilities.singularize( word_nopunct ) == import_utilities.singularize( replacement_word ) and word_nopunct.lower( ) not in import_utilities.stopwords_ls: if word_nopunct not in RESERVOIR and quit_language < 0 and import_utilities.countPunctuation( word ) < 1 and len( word_nopunct ) > 3 and not word_nopunct.istitle(): #print "ADDING",word,"to reservoir" RESERVOIR.append(word) replacement_word = random.choice( RESERVOIR) #print word_nopunct,"replaced from reservoir with", replacement_word # print "'"+word_nopunct+"' vs RESERVOIR replacement_word:",replacement_word #," new_line:",new_line if quit_language > 1 and not word_nopunct.istitle( ): #print quit_language, "Probably foreign language: make a word salad in english" replacement_word = random.choice(RESERVOIR) #print word_nopunct,"OTHER replaced from reservoir with", replacement_word # REPLACEMENT poem_ls = poem_replaced.split(' ') idx = poem_ls.index(word) # #print idx,",", poem_ls[idx],",", word ,",",replacement_word if poem_ls[idx] == word: poem_ls[idx] = replacement_word poem_replaced = " ".join(poem_ls) #poem_replaced = poem_replaced.replace(word,replacement_word) # CORRECT the "A" to "An" for idx, word in enumerate(poem_replaced.split(" ")): # poem_replaced = poem_replaced+"A organism" if len(word) > 0 and word[0].lower( ) in the_vowels and poem_replaced.split(" ")[ idx - 1].lower() == "a": if poem_replaced.split(" ")[idx - 1] == "a": old_str = "a " + poem_replaced.split(" ")[idx] new_str = "an " + poem_replaced.split(" ")[idx] else: old_str = "A " + poem_replaced.split(" ")[idx] new_str = "An " + poem_replaced.split(" ")[idx] poem_replaced = poem_replaced.replace( old_str, new_str) # poem_replaced = poem_replaced+"An consonant" if len(word) > 0 and word[0].lower( ) not in the_vowels and poem_replaced.split(" ")[ idx - 1].lower() == "an": if poem_replaced.split(" ")[idx - 1] == "an": old_str = "an " + poem_replaced.split(" ")[idx] new_str = "a " + poem_replaced.split(" ")[idx] else: old_str = "An " + poem_replaced.split(" ")[idx] new_str = "A " + poem_replaced.split(" ")[idx] poem_replaced = poem_replaced.replace( old_str, new_str) #print "FOUND correction needed",old_str,new_str ######################### # WRITE SINGLE POEM # ######################### tmp_poem = "" # poem_replaced.replace("\t","	") # poem_replaced.replace("\n"," <br>") # poem_replaced.replace("\r"," <br>") HTML_poem = "" for line in poem_replaced.split("\n"): lines_total += 1 #print "LINE", line HTML_poem += line + "<br>" if len(response) > 0 and len(id.split("_")) > 1: # ALL_poems = ALL_poems_intro + " ".join(i for i in ALL_poems.split("</h2>.")[0:])+"<br><br>~~~~~~~~~~~~~~~~~~~~~~~~~~<br>[ A poem generated from template : <b>"+ author+"</b>, <i>"+ title +"</i> ]<br><br><b>"+new_title+"<br><br></b>"+HTML_poem ALL_poems += "<br><br>~~~~~~~~~~~~~~~~~~~~~~~~~~<br>[ A poem generated from template : <b>" + author + "</b>, <i>" + title + "</i> ]<br><br><b>" + new_title + "<br><br></b>" + HTML_poem tmp_poem = "[A poem generated from template: " + author + ", '" + title + "'']\n\n'" + new_title + "'\nby\n" + new_author + "\n\n" + poem_replaced ##################### # # # # # PAUSE IT # # # # # ##################### # sleep_time=0.03*sub_cnt print "sub_cnt=", sub_cnt # ,"sleep_time=",sleep_time # time.sleep(sleep_time) if sub_cnt >= 1: raw_input("Press Enter to continue...") ##################### # # # # # PRINT # # # # # ##################### print "\n******\n" + tmp_poem txt_fn = id.split("_")[1] + "_POEMs.txt" # WRITE_BIO_PATH = DATA_DIR+"generated/POEMS/POEMS_"+datetime.datetime.now().strftime('%Y-%m-%d_%H')+"/" # if not os.path.exists(WRITE_BIO_PATH): # os.makedirs(WRITE_BIO_PATH) txt_fn_path = GENERATED_DIR + txt_fn f_txt = open(txt_fn_path, 'w') f_txt.write(tmp_poem) #.encode('utf-8')) f_txt.close() #print "\nTXT file created at:",txt_fn_path # ####### # # write them all.... wasteful... but useful if run is interrupted.... # ########### # ALL_poems = ALL_poems.replace("$$datetime$$",datetime.datetime.now().strftime('%Y-%m-%d at %H:%M')) # ALL_poems = ALL_poems.replace("$$cnt$$",str(cnt)) # print "cnt",cnt # ALL_poems = ALL_poems.replace("$$gentime$$",str(time.time() - start_time)) # # ALL POEMS # txt_fn = datetime.datetime.now().strftime('%Y-%m-%d_%H')+"_poetryFoundation_generatedPOEMS_"+type_of_run+".html" # txt_fn_path = DATA_DIR+"generated/POEMS/"+txt_fn # f_txt=open(txt_fn_path,'w') # f_txt.write(ALL_poems+"</hmtl>") # f_txt.close(); # print "\nTXT file created at:",txt_fn_path else: "~~~~~~~~~~~~~~~~!!!!!!!!!! EMPTY response:", author
def extractFeaturesAndWriteBio(READ_PATH,file_type): global ALL_poems,bio,cnt, start_time inp=0 sub_cnt=0 words_total=0 lines_total=0 for subdir, dirs, files in os.walk(READ_PATH): for file in files: num_of_files = len(files)-1 # deduct the DS_store #print (num_of_files,'readDirectory',READ_PATH) if file_type in file and 'readme' not in file: # ID id=file.split(".")[0] #print "\n\n*********\nID:",id filenames.append(id) cnt+=1 # print('') #print('') # print('OPENED:',id) # print('') #print('') ############## # HOW MANY? # ############## sub_cnt+=1 if sub_cnt>=inp: if inp != 0: end_time = time.time() es = end_time-start_time print sub_cnt, "poems,\n",lines_total,"lines, &\n",words_total,"words \ngenerated in\n",("%.2f" % es),"seconds" words_total=0 lines_total=0 # RESTART sub_cnt=0 inp = input("\n\n^^^^^^^^^^^^^^\n\nHow many poems do u want? ") print "\n\n^^^^^^^^^^^^^^^" start_time = time.time() print 'Poem #',sub_cnt+1 poem_replaced = "" replacement_word = "" author="" titles="" title="" new_title="" replaced_ls =[] new_titles_ls = [] quit_language=0 ################################################################# # Load POEM TEXT FILE (based on id extracted from Alchemy JSON) # ################################################################# txt_fn_path = DATA_DIR + READ_TXT_PATH + id.split("_")[1]+".txt" #print "txt_fn_path:",txt_fn_path if os.path.isfile(txt_fn_path) and cnt>0: txt_data=open(txt_fn_path).read() # http://blog.webforefront.com/archives/2011/02/python_ascii_co.html # txt_data.decode('ISO-8859-2') .decode('utf-8') # unicode(txt_data) author=txt_data.split("****!****")[0].strip(' \t\n\r') title=txt_data.split("****!****")[1].strip(' \t\n\r') bio=txt_data.split("****!****")[2]#.strip(' \t\n\r') ###### CLEAN BIO bio.replace("\t","	") bio.replace("\n"," <br>") bio.replace("\r"," <br>") poem_replaced=bio #print poem_replaced ############################### # REPLACE AUTHOR NAME ############################## author_ln=author.split(" ")[-1] author_fn=author.split(" ")[:-1] # #poem_replaced = poem_replaced.replace(author_ln,"Jhave") ####################### # fake AUTHOR ####################### new_author= " ".join(random.choice(authors).split(" ")[1:-2])+" "+random.choice(authors).split(" ")[-2] ####################### # replace BOOK TITLES ####################### #print "TITLES"] new_title = getNewTitle("title").encode('utf-8') ############################ # replace years with another ############################ for w1 in poem_replaced.split("("): for w2 in w1.split(")"): if w2 is not None and w2.isdigit(): new_num = random.randint(int(w2)-5,int(w2)+5) #print "REPLACING #:",w2,new_num poem_replaced = poem_replaced.replace(w2,str(new_num)) replaced_ls.append(new_num) ################# # Load JSON # ################# response = loadJSONfile(READ_JSON_PATH+"poetryFoundation_"+id.split("_")[1]+"_Alchemy_JSON.txt") if response != "failed": if response.get('entities') is not None: for idx,entity in enumerate(response['entities']): #print idx ce = entity['text'].replace("0xc2"," ") ce = ce.replace("0xe2","'") ce = re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, ce) ce = ce.encode('utf-8') try: content = ce.decode('utf-8').encode('ascii', 'xmlcharrefreplace') except UnicodeDecodeError: "AAAARGGGGHHH!!!!" if content in poem_replaced: ################################################ # Replace similar entities from other JSON # ################################################ replacement_entity = findSimilarEntityinRandomJSON(content,entity['type']) cr = re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, replacement_entity) poem_replaced = poem_replaced.replace(content,replacement_entity) replaced_ls.append(replacement_entity) ########################## # POS REPLACMENT # ########################## token_tuples = nltk.word_tokenize(poem_replaced) tt = nltk.pos_tag(token_tuples) ################# # ADJECTIVES # ################# for i in tt: if "/i" not in i[0] and len(i[0])>3 and i[0] != "died": origw = re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, i[0]) origw =import_utilities.strip_punctuation(origw) if i[1]=='JJ' : JJr = random.choice(JJ) # # JJr = re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, JJr) # JJr = import_utilities.strip_punctuation(JJr) JJr = import_utilities.moveBeginAndEndPunctuationFromStrToString(i[0],JJr.lstrip().lstrip()) if i[0].istitle(): JJr = JJr.title() poem_replaced = re.sub(r'\b' + import_utilities.strip_punctuation(i[0]) + r'\b', JJr, poem_replaced,1)#poem_replaced.replace(i[0],JJr,1) replaced_ls.append(JJr) if i[1]=='RB': RBr = random.choice(RB) RBr = import_utilities.moveBeginAndEndPunctuationFromStrToString(i[0],RBr.lstrip().lstrip()) if i[0].istitle(): RBr = RBr.title() poem_replaced = re.sub(r'\b' + import_utilities.strip_punctuation(i[0]) + r'\b', RBr, poem_replaced,1) replaced_ls.append(RBr) ######################## # IS IT ENGLISH? # ######################## for line in poem_replaced.split('\n\r'): if len(line)>0 : if "english" not in import_utilities.get_language(line): quit_language+=1 #print "NOT english:",quit_language,line else: quit_language-=1 ######################### # SYNSET REPLACE # ######################### for idx,word in enumerate(poem_replaced.split(' ')): if "<br>" not in word and "	" not in word and len(word)>0 and "~~~~!~~~" not in word: words_total+=1 ######################### # PRONOUN ' VERB # ######################### if len(word.split("'"))>1: if word.split("'")[0] in personal_pronouns: replacement_word = random.choice(personal_pronouns)+"'"+word.split("'")[1]+' ' poem_replaced.replace(word,replacement_word) #print "word,",word,"replacement_word:",replacement_word #################################################### # Replacement of OTHERs # #################################################### elif not word.lower().strip(" \n\t\r") in stopwords.words('english'): # take off leading brackets, commas etc... word_punct_nopunct = import_utilities.strip_punctuation_bool(word) word_nopunct = word_punct_nopunct['word'].strip(" \n\t\r") word_punct = word_punct_nopunct['punct'] punct_bool = word_punct_nopunct['punct_bool'] ####################################################### # MAIN EXCHANGE PROCESS CALL >>>>>>> GET THE SYNSET # ####################################################### if word_nopunct[-4:].lower()=="here": similarterm=random.choice(import_utilities.heres) else: #print "WORD:",word_nopunct if len(word_nopunct)>2: similarterm = import_utilities.find_synset_word(word_nopunct)#(word.lstrip().rstrip()) ############################################ # manually get rid of some terrible choices ############################################ if similarterm == "ilk": ##print "like" similarterm = "like" if similarterm == "ope": ##print "doth" similarterm = "does" if similarterm == "information technology": ##print "doth" similarterm = "it" ####################################### # abbreviations for f*****g states! # ####################################### if word_nopunct.upper() in import_utilities.state_abbrev and word_nopunct.lower() not in stopwords.words('english') and "me," not in word: tmp = similarterm if word_nopunct == "oh": similarterm = random.choice(import_utilities.exclaims) else: similarterm = random.choice(RESERVOIR) #print word_nopunct," replaced by", tmp, "replaced with:",similarterm, "in:",line ############## # hyphenated # ############## hyp =word.split("-") #print word,len(hyp) if len(hyp) >1: similarterm="" for w in hyp: if len(w) > 2: similarterm += import_utilities.find_synset_word(w)+"-" similarterm = import_utilities.strip_underscore(similarterm[:-1]) #print "hyphenated:",word,"replaced by: "+similarterm ######################################################### # is it a TRUNCATED VERB slang as in singin or wishin # ######################################################### if similarterm == word_nopunct and len(word)>2 and 'in' in word_nopunct[-2:]: similarterm = import_utilities.find_synset_word(word_nopunct+'g') ## #print "TRUNCATED SLANG word: '"+word+"'",similarterm interim = import_utilities.lemma(similarterm) ## #print interim similarterm = import_utilities.conjugate(interim, tense=import_utilities.PARTICIPLE, parse=True)[:-1] # # # #print word,"widx:",widx," line_pos_tags[widx][0]:",line_pos_tags[widx][0]," line_pos_tags[widx][1]:",line_pos_tags[widx][1] ################# # SWEAR WORD # ################# ##print "at the garden of if:", word if word_nopunct in import_utilities.curses: similarterm = random.choice(import_utilities.curses) ##print "SWEAR WORD word: '"+word+"'",similarterm if len(hyp) >1: replacement_word = similarterm else: replacement_word = word.replace(word_nopunct, similarterm) replacement_word = import_utilities.strip_underscore(replacement_word) replacement_word = import_utilities.replaceNumbers(replacement_word) ######################### # RESERVOIR_OF_WEIRDNESS # ######################### if word_nopunct.lower() in import_utilities.impera: replacement_word=random.choice(import_utilities.impera) #print word,"IMPERA:",replacement_word elif word_nopunct.lower() in import_utilities.conjuncts: replacement_word=random.choice(import_utilities.conjuncts) #print word," CONJUNCTION replaced with",replacement_word elif word_nopunct.lower() in import_utilities.indef_prono: replacement_word=random.choice(import_utilities.indef_prono) #print word," INDEF_prono replaced with",replacement_word elif word_nopunct.lower() in import_utilities.prepo: replacement_word=random.choice(import_utilities.prepo) #print word," prepo replaced with",replacement_word elif word_nopunct.lower() in import_utilities.rel_prono: replacement_word=word #print word," rel_prono LEAVE alone: ",replacement_word elif word_nopunct.lower()[-2:] =="ly": replacement_word=import_utilities.strip_underscore(import_utilities.find_synset_word(word))#(word[:-2]) #print word," ADVERB: ",replacement_word # if replacement_word[-2:] !="ly": # replacement_word +="ly" else: if len(hyp) <2 and "like" not in word_nopunct and import_utilities.singularize(word_nopunct) == import_utilities.singularize(replacement_word) and word_nopunct.lower() not in import_utilities.stopwords_ls: if word_nopunct not in RESERVOIR and quit_language<0 and import_utilities.countPunctuation(word)<1 and len(word_nopunct)>3 and not word_nopunct.istitle(): #print "ADDING",word,"to reservoir" RESERVOIR.append(word) replacement_word = random.choice(RESERVOIR) #print word_nopunct,"replaced from reservoir with", replacement_word # print "'"+word_nopunct+"' vs RESERVOIR replacement_word:",replacement_word #," new_line:",new_line if quit_language>1 and not word_nopunct.istitle(): #print quit_language, "Probably foreign language: make a word salad in english" replacement_word = random.choice(RESERVOIR) #print word_nopunct,"OTHER replaced from reservoir with", replacement_word # REPLACEMENT poem_ls = poem_replaced.split(' ') idx = poem_ls.index(word) # #print idx,",", poem_ls[idx],",", word ,",",replacement_word if poem_ls[idx]==word: poem_ls[idx]=replacement_word poem_replaced = " ".join(poem_ls) #poem_replaced = poem_replaced.replace(word,replacement_word) # CORRECT the "A" to "An" for idx,word in enumerate(poem_replaced.split(" ")): # poem_replaced = poem_replaced+"A organism" if len(word)>0 and word[0].lower() in the_vowels and poem_replaced.split(" ")[idx-1].lower() =="a" : if poem_replaced.split(" ")[idx-1] =="a": old_str = "a "+poem_replaced.split(" ")[idx] new_str = "an "+poem_replaced.split(" ")[idx] else: old_str = "A "+poem_replaced.split(" ")[idx] new_str = "An "+poem_replaced.split(" ")[idx] poem_replaced = poem_replaced.replace(old_str,new_str) # poem_replaced = poem_replaced+"An consonant" if len(word)>0 and word[0].lower() not in the_vowels and poem_replaced.split(" ")[idx-1].lower() =="an" : if poem_replaced.split(" ")[idx-1] =="an": old_str = "an "+poem_replaced.split(" ")[idx] new_str = "a "+poem_replaced.split(" ")[idx] else: old_str = "An "+poem_replaced.split(" ")[idx] new_str = "A "+poem_replaced.split(" ")[idx] poem_replaced = poem_replaced.replace(old_str,new_str) #print "FOUND correction needed",old_str,new_str ######################### # WRITE SINGLE POEM # ######################### tmp_poem="" # poem_replaced.replace("\t","	") # poem_replaced.replace("\n"," <br>") # poem_replaced.replace("\r"," <br>") HTML_poem="" for line in poem_replaced.split("\n"): lines_total+=1 #print "LINE", line HTML_poem += line+"<br>" if len(response) >0 and len(id.split("_"))>1: # ALL_poems = ALL_poems_intro + " ".join(i for i in ALL_poems.split("</h2>.")[0:])+"<br><br>~~~~~~~~~~~~~~~~~~~~~~~~~~<br>[ A poem generated from template : <b>"+ author+"</b>, <i>"+ title +"</i> ]<br><br><b>"+new_title+"<br><br></b>"+HTML_poem ALL_poems += "<br><br>~~~~~~~~~~~~~~~~~~~~~~~~~~<br>[ A poem generated from template : <b>"+ author+"</b>, <i>"+ title +"</i> ]<br><br><b>"+new_title+"<br><br></b>"+HTML_poem tmp_poem= "[A poem generated from template: "+ author+", '"+ title +"'']\n\n'"+new_title+"'\nby\n"+new_author+"\n\n"+poem_replaced print "\n******\n"+tmp_poem #print "\nORIGINAL:",bio txt_fn = id.split("_")[1]+"_POEMs.txt" # WRITE_BIO_PATH = DATA_DIR+"generated/POEMS/POEMS_"+datetime.datetime.now().strftime('%Y-%m-%d_%H')+"/" # if not os.path.exists(WRITE_BIO_PATH): # os.makedirs(WRITE_BIO_PATH) txt_fn_path = GENERATED_DIR+txt_fn f_txt=open(txt_fn_path,'w') f_txt.write(tmp_poem)#.encode('utf-8')) f_txt.close(); #print "\nTXT file created at:",txt_fn_path # ####### # # write them all.... wasteful... but useful if run is interrupted.... # ########### # ALL_poems = ALL_poems.replace("$$datetime$$",datetime.datetime.now().strftime('%Y-%m-%d at %H:%M')) # ALL_poems = ALL_poems.replace("$$cnt$$",str(cnt)) # print "cnt",cnt # ALL_poems = ALL_poems.replace("$$gentime$$",str(time.time() - start_time)) # # ALL POEMS # txt_fn = datetime.datetime.now().strftime('%Y-%m-%d_%H')+"_poetryFoundation_generatedPOEMS_"+type_of_run+".html" # txt_fn_path = DATA_DIR+"generated/POEMS/"+txt_fn # f_txt=open(txt_fn_path,'w') # f_txt.write(ALL_poems+"</hmtl>") # f_txt.close(); # print "\nTXT file created at:",txt_fn_path else: "~~~~~~~~~~~~~~~~!!!!!!!!!! EMPTY response:", author
def extractFeaturesAndWriteBio(READ_PATH, file_type): global ALL_poems, bio, cnt for subdir, dirs, files in os.walk(READ_PATH): for file in files: num_of_files = len(files) - 1 # deduct the DS_store #print (num_of_files,'readDirectory',READ_PATH) if file_type in file and 'readme' not in file: # ID id = file.split(".")[0] print "\nID:", id.split("_")[1] filenames.append(id) cnt += 1 # print('') # print('') # print('OPENED:',id) # print('') # print('') poem_replaced = "" replacement_word = "" previous_replacement_word = "" author = "" titles = "" title = "" new_title = "" replaced_ls = [] new_titles_ls = [] quit_language = 0 oscillator = 0 # if EXCEPTION is raised... do not add to html SKIP_bool = False ########################## # Load POEM TEXT FILE # ########################## txt_fn_path = DATA_DIR + READ_TXT_PATH + id.split( "_")[1] + ".txt" #print "txt_fn_path:",txt_fn_path if os.path.isfile(txt_fn_path) and cnt > 0: txt_data = open(txt_fn_path).read() # http://blog.webforefront.com/archives/2011/02/python_ascii_co.html # txt_data.decode('ISO-8859-2') .decode('utf-8') # unicode(txt_data) author = txt_data.split("****!****")[0].strip(' \t\n\r') title = txt_data.split("****!****")[1].strip(' \t\n\r') bio = txt_data.split("****!****")[2] #.strip(' \t\n\r') ###### CLEAN BIO bio.replace("\t", "	") bio.replace("\n", " <br>") bio.replace("\r", " <br>") poem_replaced = bio #print poem_replaced ############################### # REPLACE AUTHOR NAME in poem ############################## author_ln = author.split(" ")[-1].lstrip() author_fn = author.split(" ")[:-1] author = " ".join(n for n in author_fn) + author_ln # #poem_replaced = poem_replaced.replace(author_ln,"Jhave") ####################### # replace BOOK TITLES ####################### #print "TITLES"] new_title = getNewTitle("title").encode('utf-8') ####################### # fake AUTHOR ####################### new_author = " ".join( random.choice(authors).split(" ") [1:-2]) + " " + random.choice(authors).split(" ")[-2] #print "new AUTHOR",new_author ############################ # replace years with another ############################ for w1 in poem_replaced.split("("): for w2 in w1.split(")"): if w2 is not None and w2.isdigit(): new_num = random.randint( int(w2) - 5, int(w2) + 5) #print "REPLACING #:",w2,new_num poem_replaced = poem_replaced.replace( w2, str(new_num)) replaced_ls.append(new_num) ################# # Load JSON # ################# response = loadJSONfile(READ_JSON_PATH + "poetryFoundation_" + id.split("_")[1] + "_Alchemy_JSON.txt") if response != "failed": if response.get('entities') is not None: for idx, entity in enumerate(response['entities']): #print idx ce = entity['text'].replace("0xc2", " ") ce = ce.replace("0xe2", "'") ce = re.sub( '(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, ce) ce = ce.encode('utf-8') try: content = ce.decode('utf-8').encode( 'ascii', 'xmlcharrefreplace') except UnicodeDecodeError: "AAAARGGGGHHH!!!!" if content in poem_replaced: ################################################ # Replace similar entities from other JSON # ################################################ replacement_entity = findSimilarEntityinRandomJSON( content, entity['type']) cr = re.sub( '(' + '|'.join( import_utilities.chars.keys()) + ')', import_utilities.replace_chars, replacement_entity) poem_replaced = poem_replaced.replace( content, replacement_entity) replaced_ls.append(replacement_entity) ########################## # POS REPLACMENT # ########################## token_tuples = nltk.word_tokenize(poem_replaced) tt = nltk.pos_tag(token_tuples) ################# # ADJECTIVES # ################# for i in tt: if "/i" not in i[0] and len( i[0]) > 2 and i[0] != "died": origw = re.sub( '(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, i[0]) origw = import_utilities.strip_punctuation(origw) if i[1] == 'JJ': JJr = random.choice(JJ) # # JJr = re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, JJr) # JJr = import_utilities.strip_punctuation(JJr) JJr = import_utilities.moveBeginAndEndPunctuationFromStrToString( i[0], JJr.lstrip().lstrip()) if i[0].istitle(): JJr = JJr.title() poem_replaced = re.sub( r'\b' + import_utilities.strip_punctuation(i[0]) + r'\b', JJr, poem_replaced, 1) #poem_replaced.replace(i[0],JJr,1) replaced_ls.append(JJr) if i[1] == 'RB': RBr = random.choice(RB) RBr = import_utilities.moveBeginAndEndPunctuationFromStrToString( i[0], RBr.lstrip().lstrip()) if i[0].istitle(): RBr = RBr.title() poem_replaced = re.sub( r'\b' + import_utilities.strip_punctuation(i[0]) + r'\b', RBr, poem_replaced, 1) replaced_ls.append(RBr) #print "RBr=",RBr,"repaced",i[0] ######################## # IS IT ENGLISH? # ######################## for line in poem_replaced.split('\n\r'): if len(line) > 0: if "english" not in import_utilities.get_language( line): quit_language += 1 #print "NOT english:",quit_language,line else: quit_language -= 1 ######################### # SYNSET REPLACE # ######################### for idx, word in enumerate(poem_replaced.split(' ')): if "<br>" not in word and "	" not in word and len( word) > 0: ######################### # PRONOUN ' VERB # ######################### if len(word.split("'")) > 1: if word.split("'")[0] in personal_pronouns: replacement_word = random.choice( personal_pronouns) + "'" + word.split( "'")[1] + ' ' poem_replaced.replace(word, replacement_word) #print "word,",word,"replacement_word:",replacement_word #################################################### # Replacement of OTHERs # #################################################### else: # elif not word.lower().strip(" \n\t\r") in stopwords.words('english'): # take off leading brackets, commas etc... word_punct_nopunct = import_utilities.strip_punctuation_bool( word) word_nopunct = word_punct_nopunct[ 'word'].strip(" .\n\t\r") word_punct = word_punct_nopunct['punct'] punct_bool = word_punct_nopunct['punct_bool'] #print "word_nopunct:",word_nopunct ####################################################### # MAIN EXCHANGE PROCESS CALL >>>>>>> GET THE SYNSET # ####################################################### similarterm = "" if word_nopunct[-4:].lower() == "here": similarterm = random.choice( import_utilities.heres) else: #print "WORD:",word_nopunct if len(word_nopunct) > 3: oscillator = oscillator + 1 ############################################ #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # STYLE SWITCH..... should in future use POS # ... i.e. if noun & oscillator%3, do... # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ############################################ similarterm = import_utilities.find_synset_word( word_nopunct) # synset # if oscillator%4==0: # # SYNSET # similarterm = import_utilities.find_synset_word(word_nopunct) # #print "synset", similarterm # elif oscillator%3==0: # # RAP MOUTH # similarterm = random.choice(rap_mouth) # #print "rap",similarterm # # elif oscillator%2==0: # else: # similarterm = import_utilities.find_synset_word(word_nopunct) # # SCIENCE MOUTH # similarterm = random.choice(science_mouth) # if similarterm.endswith('logy'): # similarterm = similarterm[:-4] # if similarterm.endswith('o'): # similarterm = similarterm[:-1] #print "science_mouth",similarterm # if len(similarterm)<6: # similarterm = random.choice(import_utilities.curses) # else: # # FILTH # print "filth" # similarterm = random.choice(import_utilities.curses) ############################################ # manually get rid of some terrible choices ############################################ if similarterm == "ilk": ##print "like" similarterm = "like" if similarterm == "ope": ##print "doth" similarterm = "does" if similarterm == "information technology": ##print "doth" similarterm = "it" if similarterm == "velleity": ##print "doth" similarterm = "want" if similarterm == "Crataegus laevigata": ##print "doth" similarterm = "may" if similarterm == "brunet" or similarterm == "ot": ##print "doth" similarterm = random.choice( import_utilities.curses) if similarterm == "ge": ##print "doth" similarterm = random.choice(science_mouth) if similarterm.lower() == "nox": ##print "doth" similarterm = random.choice(science_mouth) if similarterm.lower() == "paunited": print "################### paUnited ###################" similarterm = word ####################################### # abbreviations for f*****g states! # ####################################### if word_nopunct.upper( ) in import_utilities.state_abbrev and word_nopunct.lower( ) not in stopwords.words( 'english') and "me," not in word: tmp = similarterm if word_nopunct == "oh": similarterm = random.choice( import_utilities.exclaims) else: similarterm = random.choice( rap_mouth) # RESERVOIR)RESERVOIR) #print word_nopunct," replaced by", tmp, "replaced with:",similarterm, "in:",line ############## # hyphenated # ############## hyp = word.split("-") #print word,len(hyp) if len(hyp) > 1: similarterm = "" for w in hyp: if len(w) > 2: similarterm += import_utilities.find_synset_word( w) + "-" similarterm = import_utilities.strip_underscore( similarterm[:-1]) #print "hyphenated:",word,"replaced by: "+similarterm ######################################################### # is it a TRUNCATED VERB slang as in singin or wishin # ######################################################### if similarterm == word_nopunct and len( word ) > 2 and 'in' in word_nopunct[-2:]: similarterm = import_utilities.find_synset_word( word_nopunct + 'g') #print "TRUNCATED SLANG word: '"+word+"'",similarterm interim = import_utilities.lemma( similarterm) ## #print interim similarterm = import_utilities.conjugate( interim, tense=import_utilities.PARTICIPLE, parse=True)[:-1] # # # #print word,"widx:",widx," line_pos_tags[widx][0]:",line_pos_tags[widx][0]," line_pos_tags[widx][1]:",line_pos_tags[widx][1] ################# # SWEAR WORD # ################# ##print "at the garden of if:", word if word_nopunct in import_utilities.curses: similarterm = random.choice( import_utilities.curses) #print "SWEAR WORD word: '"+word+"'",similarterm if len(hyp) > 1: replacement_word = similarterm else: replacement_word = word.replace( word_nopunct, similarterm) replacement_word = import_utilities.strip_underscore( replacement_word) replacement_word = import_utilities.replaceNumbers( replacement_word) #print "replacement_word:",replacement_word ######################### # RESERVOIR_OF_WEIRDNESS # ######################### if word_nopunct.lower( ) in import_utilities.impera: replacement_word = random.choice( import_utilities.impera) #print word,"IMPERA:",replacement_word elif word_nopunct.lower( ) in import_utilities.conjuncts: replacement_word = random.choice( import_utilities.conjuncts) #print word," CONJUNCTION replaced with",replacement_word elif word_nopunct.lower( ) in import_utilities.indef_prono: replacement_word = random.choice( import_utilities.indef_prono) #print word," INDEF_prono replaced with",replacement_word elif word_nopunct.lower( ) in import_utilities.prepo: replacement_word = random.choice( import_utilities.prepo) #print word," prepo replaced with",replacement_word elif word_nopunct.lower( ) in import_utilities.rel_prono: replacement_word = word #print word," rel_prono LEAVE alone: ",replacement_word elif word_nopunct.lower( )[-2:] == "ly" or word_nopunct.lower( )[-3:] == "ly.": replacement_word = import_utilities.strip_underscore( import_utilities.find_synset_word( word)) #(word[:-2]) #print word," ADVERB: ",replacement_word # if replacement_word[-2:] !="ly": # replacement_word +="ly" else: if len( hyp ) < 2 and "like" not in word_nopunct and import_utilities.singularize( word_nopunct ) == import_utilities.singularize( replacement_word ) and word_nopunct.lower( ) not in import_utilities.stopwords_ls: if word not in RESERVOIR and import_utilities.countPunctuation( word ) < 1 and len( word_nopunct ) > 3 and not word_nopunct.istitle(): if len( word ) > 4 and english_dict.check(word): #print "ADDING",word,"to reservoir" RESERVOIR.append(word) #RESERVOIR = list(set()) replacement_word = random.choice( RESERVOIR) #print word_nopunct,"replaced from reservoir with", replacement_word # print "'"+word_nopunct+"' vs RESERVOIR replacement_word:",replacement_word #," new_line:",new_line if quit_language > 1 and not word_nopunct.istitle( ): #print quit_language, "Probably foreign language: make a word salad in english" replacement_word = random.choice( rap_mouth) #RESERVOIR) #print word_nopunct,"OTHER replaced from reservoir with", replacement_word ################################################### # MOST REPLACEMENT occurs here... # ################################################### poem_ls = poem_replaced.split(' ') idx = poem_ls.index(word) #print idx,",", poem_ls[idx],",", word ,",",replacement_word #print word ," --- ",previous_replacement_word,replacement_word try: #print "poem_ls[idx]",poem_ls[idx],"word",word if poem_ls[ idx] == word and "****" not in word and "." != word and "\n" not in word: # if "\n" in word: # replacement_word=replacement_word+"\n" # if replacement_word=="": # replacement_word=random.choice(RESERVOIR) poem_ls[ idx] = replacement_word #.encode('utf-8') "REPLACE", word, "with", replacement_word poem_replaced = " ".join(poem_ls) # store this word so that conjugation can be checked previous_replacement_word = replacement_word except Exception, e: print "PENULTIMATE SKIP_bool replace FAIL", e SKIP_bool = True continue ########################################################################### # testing Pattern.en as parser for conjugation and article replacement # # much more robust than my hand-coded hacks # ########################################################################### # correct CONJUGATion of paticiple verbs with pattern.en parsed = parse(poem_replaced, tags=True) pre_verbal = ["'m", "'s", "'re"] for idx, p in enumerate(parsed.split(" ")): tok = p.split("/")[0] typ = p.split("/")[1] #print idx,tok,typ if tok in pre_verbal: #print "pre_verbal:",tok next_word = parsed.split(" ")[idx + 1].split("/") # try try try for ix, n in enumerate(next_word): next_word[ix] = re.sub( '(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, n).encode('utf-8') try: #print next_word,next_word[0],next_word[1][:2] # if it's a verb that follows if next_word[1][:2] == "VB": before_verb = " ".join( w for w in poem_replaced.split(" ") [:idx]) #.encode('utf-8') after_verb = " ".join( w for w in poem_replaced.split(" ") [idx + 1:]) #.encode('utf-8') new_verb = conjugate( next_word[0], tense=PARTICIPLE, parse=True).encode('utf-8') # insert new #print "CONJUGATION needed, changing:",poem_replaced.split(" ")[idx],"to",parsed.split(" ")[idx],poem_replaced.split(" ")[idx-1]+" "+new_verb poem_replaced = before_verb + " " + new_verb + " " + after_verb except Exception, e: #print "INside parsed COnjugation loop",e continue # correct ARTICLES for idx, word in enumerate(poem_replaced.split(" ")): if len(word) > 0 and idx != 0 and " " not in word: # A or AN if poem_replaced.split(" ")[idx - 1].lower( ) == "a" or poem_replaced.split(" ")[ idx - 1].lower() == "an": #print word,"---",article(word)+" "+word before_article = " ".join( w for w in poem_replaced.split(" ")[:idx - 1]) after_article = " ".join( w for w in poem_replaced.split(" ")[idx + 1:]) new_conj = referenced(word) # capitalize if poem_replaced.split(" ")[idx - 1].istitle(): new_conj = new_conj.split(" ")[0].title( ) + " " + new_conj.split(" ")[1] poem_replaced = before_article + " " + new_conj + " " + after_article ######################### # WRITE SINGLE POEM # ######################### if not SKIP_bool: tmp_poem = "" # poem_replaced.replace("\t","	") # poem_replaced.replace("\n"," <br>") # poem_replaced.replace("\r"," <br>") HTML_poem = "" for line in poem_replaced.split("\n"): #print "LINE", line HTML_poem += line + "<br>" if len(response) > 0 and len(id.split("_")) > 1: # ALL_poems = ALL_poems_intro + " ".join(i for i in ALL_poems.split("</h2>.")[0:])+"<br><br>~~~~~~~~~~~~~~~~~~~~~~~~~~<br>[ A poem generated from template : <b>"+ author+"</b>, <i>"+ title +"</i> ]<br><br><b>"+new_title+"<br><br></b>"+HTML_poem try: ALL_poems = "<br>[ A generated-poem based upon: <i>" + title + "</i> by <b>" + author + "</b>]<br><br><i>" + new_title + "</i><br> by <b>" + new_author + "</b><br>" + HTML_poem + ALL_poems.split( "</h2>")[1].replace(" ", " ") tmp_poem = "[A generated-poem based upon: '" + title + "' by " + author + "]\n\n" + new_title + "\nby " + new_author + "\n" + poem_replaced print "\n~~~\n\n" + tmp_poem #print "\nORIGINAL:",bio txt_fn = id.split("_")[1] + "_POEMs.txt" WRITE_BIO_PATH = DATA_DIR + "generated/POEMS/POEMS_" + datetime.datetime.now( ).strftime('%Y-%m-%d_%H') + "/" if not os.path.exists(WRITE_BIO_PATH): os.makedirs(WRITE_BIO_PATH) txt_fn_path = WRITE_BIO_PATH + txt_fn f_txt = open(txt_fn_path, 'w') f_txt.write(tmp_poem) #.encode('utf-8')) f_txt.close() #print "\nTXT file created at:",txt_fn_path ####### # write them all.... wasteful... but useful if run is interrupted.... ########### # if cnt==1: # ALL_poems = ALL_poems_intro+ALL_poems # else: ALL_poems = ALL_poems_intro + ALL_poems.replace( " ", " ") ALL_poems = ALL_poems.replace( "$$datetime$$", datetime.datetime.now().strftime( '%Y-%m-%d at %H:%M')) ALL_poems = ALL_poems.replace( "$$cnt$$", str(cnt)) #print "cnt",cnt ALL_poems = ALL_poems.replace( "$$gentime$$", str(time.time() - start_time)) # ALL POEMS txt_fn = datetime.datetime.now().strftime( '%Y-%m-%d_%H' ) + "_poetryFoundation_generatedPOEMS_" + type_of_run + ".html" txt_fn_path = DATA_DIR + "generated/POEMS/" + txt_fn f_txt = open(txt_fn_path, 'w') f_txt.write(ALL_poems + "</hmtl>") f_txt.close() #print "\nTXT file created at:",txt_fn_path except Exception, e: print "At the final LOOP", e continue else: print "~! EMPTY response:", author else: cnt = cnt - 1
similarterm += import_utilities.find_synset_word(w)+"-" similarterm = import_utilities.strip_underscore(similarterm[:-1]) #print "hyphenated:",word,"replaced by: "+similarterm ######################################################### # is it a TRUNCATED VERB slang as in singin or wishin # ######################################################### if similarterm == word_nopunct and len(word)>2 and 'in' in word_nopunct[-2:]: similarterm = import_utilities.find_synset_word(word_nopunct+'g') ## #print "TRUNCATED SLANG word: '"+word+"'",similarterm interim = import_utilities.lemma(similarterm) ## #print interim similarterm = import_utilities.conjugate(interim, tense=import_utilities.PARTICIPLE, parse=True)[:-1] # # # #print word,"widx:",widx," line_pos_tags[widx][0]:",line_pos_tags[widx][0]," line_pos_tags[widx][1]:",line_pos_tags[widx][1] ################# # SWEAR WORD # ################# ##print "at the garden of if:", word if word_nopunct in import_utilities.curses: similarterm = random.choice(import_utilities.curses) ##print "SWEAR WORD word: '"+word+"'",similarterm if len(hyp) >1: replacement_word = similarterm else: