def extractFeaturesAndWriteBio(READ_PATH,file_type): global ALL_poems,bio,cnt inp=0 sub_cnt=0 for subdir, dirs, files in os.walk(READ_PATH): for file in files: num_of_files = len(files)-1 # deduct the DS_store #print (num_of_files,'readDirectory',READ_PATH) if file_type in file and 'readme' not in file: # ID id=file.split(".")[0] #print "\n\n*********\nID:",id filenames.append(id) cnt+=1 # print('') #print('') # print('OPENED:',id) # print('') #print('') sub_cnt+=1 if sub_cnt>=inp: sub_cnt=0 inp = input("\n\n^^^^^^^^^^^^^^\n\nHow many poems do u want? ") print "\n\n^^^^^^^^^^^^^^^" print 'Poem #',sub_cnt+1 poem_replaced = "" replacement_word = "" author="" titles="" title="" new_title="" replaced_ls =[] new_titles_ls = [] quit_language=0 ########################## # Load POEM TEXT FILE # ########################## txt_fn_path = DATA_DIR + READ_TXT_PATH + id.split("_")[1]+".txt" #print "txt_fn_path:",txt_fn_path if os.path.isfile(txt_fn_path) and cnt>0: txt_data=open(txt_fn_path).read() # http://blog.webforefront.com/archives/2011/02/python_ascii_co.html # txt_data.decode('ISO-8859-2') .decode('utf-8') # unicode(txt_data) author=txt_data.split("****!****")[0].strip(' \t\n\r') title=txt_data.split("****!****")[1].strip(' \t\n\r') bio=txt_data.split("****!****")[2]#.strip(' \t\n\r') ###### CLEAN BIO bio.replace("\t","	") bio.replace("\n"," <br>") bio.replace("\r"," <br>") poem_replaced=bio #print poem_replaced ############################### # REPLACE AUTHOR NAME ############################## author_ln=author.split(" ")[-1] author_fn=author.split(" ")[:-1] # #poem_replaced = poem_replaced.replace(author_ln,"Jhave") ####################### # fake AUTHOR ####################### new_author= " ".join(random.choice(authors).split(" ")[1:-2])+" "+random.choice(authors).split(" ")[-2] ####################### # replace BOOK TITLES ####################### #print "TITLES"] new_title = getNewTitle("title").encode('utf-8') ############################ # replace years with another ############################ for w1 in poem_replaced.split("("): for w2 in w1.split(")"): if w2 is not None and w2.isdigit(): new_num = random.randint(int(w2)-5,int(w2)+5) #print "REPLACING #:",w2,new_num poem_replaced = poem_replaced.replace(w2,str(new_num)) replaced_ls.append(new_num) ################# # Load JSON # ################# response = loadJSONfile(READ_JSON_PATH+"poetryFoundation_"+id.split("_")[1]+"_Alchemy_JSON.txt") if response != "failed": if response.get('entities') is not None: for idx,entity in enumerate(response['entities']): #print idx ce = entity['text'].replace("0xc2"," ") ce = ce.replace("0xe2","'") ce = re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, ce) ce = ce.encode('utf-8') try: content = ce.decode('utf-8').encode('ascii', 'xmlcharrefreplace') except UnicodeDecodeError: "AAAARGGGGHHH!!!!" if content in poem_replaced: ################################################ # Replace similar entities from other JSON # ################################################ replacement_entity = findSimilarEntityinRandomJSON(content,entity['type']) cr = re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, replacement_entity) poem_replaced = poem_replaced.replace(content,replacement_entity) replaced_ls.append(replacement_entity) ########################## # POS REPLACMENT # ########################## token_tuples = nltk.word_tokenize(poem_replaced) tt = nltk.pos_tag(token_tuples) ################# # ADJECTIVES # ################# for i in tt: if "/i" not in i[0] and len(i[0])>3 and i[0] != "died": origw = re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, i[0]) origw =import_utilities.strip_punctuation(origw) if i[1]=='JJ' : JJr = random.choice(JJ) # # JJr = re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, JJr) # JJr = import_utilities.strip_punctuation(JJr) JJr = import_utilities.moveBeginAndEndPunctuationFromStrToString(i[0],JJr.lstrip().lstrip()) if i[0].istitle(): JJr = JJr.title() poem_replaced = re.sub(r'\b' + import_utilities.strip_punctuation(i[0]) + r'\b', JJr, poem_replaced,1)#poem_replaced.replace(i[0],JJr,1) replaced_ls.append(JJr) if i[1]=='RB': RBr = random.choice(RB) RBr = import_utilities.moveBeginAndEndPunctuationFromStrToString(i[0],RBr.lstrip().lstrip()) if i[0].istitle(): RBr = RBr.title() poem_replaced = re.sub(r'\b' + import_utilities.strip_punctuation(i[0]) + r'\b', RBr, poem_replaced,1) replaced_ls.append(RBr) ######################## # IS IT ENGLISH? # ######################## for line in poem_replaced.split('\n\r'): if len(line)>0 : if "english" not in import_utilities.get_language(line): quit_language+=1 #print "NOT english:",quit_language,line else: quit_language-=1 ######################### # SYNSET REPLACE # ######################### for idx,word in enumerate(poem_replaced.split(' ')): if "<br>" not in word and "	" not in word and len(word)>0 and "~~~~!~~~" not in word: ######################### # PRONOUN ' VERB # ######################### if len(word.split("'"))>1: if word.split("'")[0] in personal_pronouns: replacement_word = random.choice(personal_pronouns)+"'"+word.split("'")[1]+' ' poem_replaced.replace(word,replacement_word) #print "word,",word,"replacement_word:",replacement_word #################################################### # Replacement of OTHERs # #################################################### elif not word.lower().strip(" \n\t\r") in stopwords.words('english'): # take off leading brackets, commas etc... word_punct_nopunct = import_utilities.strip_punctuation_bool(word) word_nopunct = word_punct_nopunct['word'].strip(" \n\t\r") word_punct = word_punct_nopunct['punct'] punct_bool = word_punct_nopunct['punct_bool'] ####################################################### # MAIN EXCHANGE PROCESS CALL >>>>>>> GET THE SYNSET # ####################################################### if word_nopunct[-4:].lower()=="here": similarterm=random.choice(import_utilities.heres) else: #print "WORD:",word_nopunct if len(word_nopunct)>2: similarterm = import_utilities.find_synset_word(word_nopunct)#(word.lstrip().rstrip()) ############################################ # manually get rid of some terrible choices ############################################ if similarterm == "ilk": ##print "like" similarterm = "like" if similarterm == "ope": ##print "doth" similarterm = "does" if similarterm == "information technology": ##print "doth" similarterm = "it" ####################################### # abbreviations for f*****g states! # ####################################### if word_nopunct.upper() in import_utilities.state_abbrev and word_nopunct.lower() not in stopwords.words('english') and "me," not in word: tmp = similarterm if word_nopunct == "oh": similarterm = random.choice(import_utilities.exclaims) else: similarterm = random.choice(RESERVOIR) #print word_nopunct," replaced by", tmp, "replaced with:",similarterm, "in:",line ############## # hyphenated # ############## hyp =word.split("-") #print word,len(hyp) if len(hyp) >1: similarterm="" for w in hyp: if len(w) > 2: similarterm += import_utilities.find_synset_word(w)+"-" similarterm = import_utilities.strip_underscore(similarterm[:-1]) #print "hyphenated:",word,"replaced by: "+similarterm ######################################################### # is it a TRUNCATED VERB slang as in singin or wishin # ######################################################### if similarterm == word_nopunct and len(word)>2 and 'in' in word_nopunct[-2:]: similarterm = import_utilities.find_synset_word(word_nopunct+'g') ## #print "TRUNCATED SLANG word: '"+word+"'",similarterm interim = import_utilities.lemma(similarterm) ## #print interim similarterm = import_utilities.conjugate(interim, tense=import_utilities.PARTICIPLE, parse=True)[:-1] # # # #print word,"widx:",widx," line_pos_tags[widx][0]:",line_pos_tags[widx][0]," line_pos_tags[widx][1]:",line_pos_tags[widx][1] ################# # SWEAR WORD # ################# ##print "at the garden of if:", word if word_nopunct in import_utilities.curses: similarterm = random.choice(import_utilities.curses) ##print "SWEAR WORD word: '"+word+"'",similarterm if len(hyp) >1: replacement_word = similarterm else: replacement_word = word.replace(word_nopunct, similarterm) replacement_word = import_utilities.strip_underscore(replacement_word) replacement_word = import_utilities.replaceNumbers(replacement_word) ######################### # RESERVOIR_OF_WEIRDNESS # ######################### if word_nopunct.lower() in import_utilities.impera: replacement_word=random.choice(import_utilities.impera) #print word,"IMPERA:",replacement_word elif word_nopunct.lower() in import_utilities.conjuncts: replacement_word=random.choice(import_utilities.conjuncts) #print word," CONJUNCTION replaced with",replacement_word elif word_nopunct.lower() in import_utilities.indef_prono: replacement_word=random.choice(import_utilities.indef_prono) #print word," INDEF_prono replaced with",replacement_word elif word_nopunct.lower() in import_utilities.prepo: replacement_word=random.choice(import_utilities.prepo) #print word," prepo replaced with",replacement_word elif word_nopunct.lower() in import_utilities.rel_prono: replacement_word=word #print word," rel_prono LEAVE alone: ",replacement_word elif word_nopunct.lower()[-2:] =="ly": replacement_word=import_utilities.strip_underscore(import_utilities.find_synset_word(word))#(word[:-2]) #print word," ADVERB: ",replacement_word # if replacement_word[-2:] !="ly": # replacement_word +="ly" else: if len(hyp) <2 and "like" not in word_nopunct and import_utilities.singularize(word_nopunct) == import_utilities.singularize(replacement_word) and word_nopunct.lower() not in import_utilities.stopwords_ls: if word_nopunct not in RESERVOIR and quit_language<0 and import_utilities.countPunctuation(word)<1 and len(word_nopunct)>3 and not word_nopunct.istitle(): #print "ADDING",word,"to reservoir" RESERVOIR.append(word) replacement_word = random.choice(RESERVOIR) #print word_nopunct,"replaced from reservoir with", replacement_word # print "'"+word_nopunct+"' vs RESERVOIR replacement_word:",replacement_word #," new_line:",new_line if quit_language>1 and not word_nopunct.istitle(): #print quit_language, "Probably foreign language: make a word salad in english" replacement_word = random.choice(RESERVOIR) #print word_nopunct,"OTHER replaced from reservoir with", replacement_word # REPLACEMENT poem_ls = poem_replaced.split(' ') idx = poem_ls.index(word) # #print idx,",", poem_ls[idx],",", word ,",",replacement_word if poem_ls[idx]==word: poem_ls[idx]=replacement_word poem_replaced = " ".join(poem_ls) #poem_replaced = poem_replaced.replace(word,replacement_word) # CORRECT the "A" to "An" for idx,word in enumerate(poem_replaced.split(" ")): # poem_replaced = poem_replaced+"A organism" if len(word)>0 and word[0].lower() in the_vowels and poem_replaced.split(" ")[idx-1].lower() =="a" : if poem_replaced.split(" ")[idx-1] =="a": old_str = "a "+poem_replaced.split(" ")[idx] new_str = "an "+poem_replaced.split(" ")[idx] else: old_str = "A "+poem_replaced.split(" ")[idx] new_str = "An "+poem_replaced.split(" ")[idx] poem_replaced = poem_replaced.replace(old_str,new_str) # poem_replaced = poem_replaced+"An consonant" if len(word)>0 and word[0].lower() not in the_vowels and poem_replaced.split(" ")[idx-1].lower() =="an" : if poem_replaced.split(" ")[idx-1] =="an": old_str = "an "+poem_replaced.split(" ")[idx] new_str = "a "+poem_replaced.split(" ")[idx] else: old_str = "An "+poem_replaced.split(" ")[idx] new_str = "A "+poem_replaced.split(" ")[idx] poem_replaced = poem_replaced.replace(old_str,new_str) #print "FOUND correction needed",old_str,new_str ######################### # WRITE SINGLE POEM # ######################### tmp_poem="" # poem_replaced.replace("\t","	") # poem_replaced.replace("\n"," <br>") # poem_replaced.replace("\r"," <br>") HTML_poem="" for line in poem_replaced.split("\n"): #print "LINE", line HTML_poem += line+"<br>" if len(response) >0 and len(id.split("_"))>1: # ALL_poems = ALL_poems_intro + " ".join(i for i in ALL_poems.split("</h2>.")[0:])+"<br><br>~~~~~~~~~~~~~~~~~~~~~~~~~~<br>[ A poem generated from template : <b>"+ author+"</b>, <i>"+ title +"</i> ]<br><br><b>"+new_title+"<br><br></b>"+HTML_poem ALL_poems += "<br><br>~~~~~~~~~~~~~~~~~~~~~~~~~~<br>[ A poem generated from template : <b>"+ author+"</b>, <i>"+ title +"</i> ]<br><br><b>"+new_title+"<br><br></b>"+HTML_poem tmp_poem= "[A poem generated from template: "+ author+", '"+ title +"'']\n\n'"+new_title+"'\nby\n"+new_author+"\n\n"+poem_replaced print "\n******\n"+tmp_poem #print "\nORIGINAL:",bio txt_fn = id.split("_")[1]+"_POEMs.txt" # WRITE_BIO_PATH = DATA_DIR+"generated/POEMS/POEMS_"+datetime.datetime.now().strftime('%Y-%m-%d_%H')+"/" # if not os.path.exists(WRITE_BIO_PATH): # os.makedirs(WRITE_BIO_PATH) txt_fn_path = GENERATED_DIR+txt_fn f_txt=open(txt_fn_path,'w') f_txt.write(tmp_poem)#.encode('utf-8')) f_txt.close(); #print "\nTXT file created at:",txt_fn_path # ####### # # write them all.... wasteful... but useful if run is interrupted.... # ########### # ALL_poems = ALL_poems.replace("$$datetime$$",datetime.datetime.now().strftime('%Y-%m-%d at %H:%M')) # ALL_poems = ALL_poems.replace("$$cnt$$",str(cnt)) # print "cnt",cnt # ALL_poems = ALL_poems.replace("$$gentime$$",str(time.time() - start_time)) # # ALL POEMS # txt_fn = datetime.datetime.now().strftime('%Y-%m-%d_%H')+"_poetryFoundation_generatedPOEMS_"+type_of_run+".html" # txt_fn_path = DATA_DIR+"generated/POEMS/"+txt_fn # f_txt=open(txt_fn_path,'w') # f_txt.write(ALL_poems+"</hmtl>") # f_txt.close(); # print "\nTXT file created at:",txt_fn_path else: "~~~~~~~~~~~~~~~~!!!!!!!!!! EMPTY response:", author
def extractFeaturesAndWriteBio(READ_PATH,file_type): global ALL_poems,bio,cnt, start_time inp=0 sub_cnt=0 words_total=0 lines_total=0 for subdir, dirs, files in os.walk(READ_PATH): for file in files: num_of_files = len(files)-1 # deduct the DS_store #print (num_of_files,'readDirectory',READ_PATH) if file_type in file and 'readme' not in file: # ID id=file.split(".")[0] #print "\n\n*********\nID:",id filenames.append(id) cnt+=1 # print('') #print('') # print('OPENED:',id) # print('') #print('') ############## # HOW MANY? # ############## sub_cnt+=1 if sub_cnt>=inp: if inp != 0: end_time = time.time() es = end_time-start_time print sub_cnt, "poems,\n",lines_total,"lines, &\n",words_total,"words \ngenerated in\n",("%.2f" % es),"seconds" words_total=0 lines_total=0 # RESTART sub_cnt=0 inp = input("\n\n^^^^^^^^^^^^^^\n\nHow many poems do u want? ") print "\n\n^^^^^^^^^^^^^^^" start_time = time.time() print 'Poem #',sub_cnt+1 poem_replaced = "" replacement_word = "" author="" titles="" title="" new_title="" replaced_ls =[] new_titles_ls = [] quit_language=0 ################################################################# # Load POEM TEXT FILE (based on id extracted from Alchemy JSON) # ################################################################# txt_fn_path = DATA_DIR + READ_TXT_PATH + id.split("_")[1]+".txt" #print "txt_fn_path:",txt_fn_path if os.path.isfile(txt_fn_path) and cnt>0: txt_data=open(txt_fn_path).read() # http://blog.webforefront.com/archives/2011/02/python_ascii_co.html # txt_data.decode('ISO-8859-2') .decode('utf-8') # unicode(txt_data) author=txt_data.split("****!****")[0].strip(' \t\n\r') title=txt_data.split("****!****")[1].strip(' \t\n\r') bio=txt_data.split("****!****")[2]#.strip(' \t\n\r') ###### CLEAN BIO bio.replace("\t","	") bio.replace("\n"," <br>") bio.replace("\r"," <br>") poem_replaced=bio #print poem_replaced ############################### # REPLACE AUTHOR NAME ############################## author_ln=author.split(" ")[-1] author_fn=author.split(" ")[:-1] # #poem_replaced = poem_replaced.replace(author_ln,"Jhave") ####################### # fake AUTHOR ####################### new_author= " ".join(random.choice(authors).split(" ")[1:-2])+" "+random.choice(authors).split(" ")[-2] ####################### # replace BOOK TITLES ####################### #print "TITLES"] new_title = getNewTitle("title").encode('utf-8') ############################ # replace years with another ############################ for w1 in poem_replaced.split("("): for w2 in w1.split(")"): if w2 is not None and w2.isdigit(): new_num = random.randint(int(w2)-5,int(w2)+5) #print "REPLACING #:",w2,new_num poem_replaced = poem_replaced.replace(w2,str(new_num)) replaced_ls.append(new_num) ################# # Load JSON # ################# response = loadJSONfile(READ_JSON_PATH+"poetryFoundation_"+id.split("_")[1]+"_Alchemy_JSON.txt") if response != "failed": if response.get('entities') is not None: for idx,entity in enumerate(response['entities']): #print idx ce = entity['text'].replace("0xc2"," ") ce = ce.replace("0xe2","'") ce = re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, ce) ce = ce.encode('utf-8') try: content = ce.decode('utf-8').encode('ascii', 'xmlcharrefreplace') except UnicodeDecodeError: "AAAARGGGGHHH!!!!" if content in poem_replaced: ################################################ # Replace similar entities from other JSON # ################################################ replacement_entity = findSimilarEntityinRandomJSON(content,entity['type']) cr = re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, replacement_entity) poem_replaced = poem_replaced.replace(content,replacement_entity) replaced_ls.append(replacement_entity) ########################## # POS REPLACMENT # ########################## token_tuples = nltk.word_tokenize(poem_replaced) tt = nltk.pos_tag(token_tuples) ################# # ADJECTIVES # ################# for i in tt: if "/i" not in i[0] and len(i[0])>3 and i[0] != "died": origw = re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, i[0]) origw =import_utilities.strip_punctuation(origw) if i[1]=='JJ' : JJr = random.choice(JJ) # # JJr = re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, JJr) # JJr = import_utilities.strip_punctuation(JJr) JJr = import_utilities.moveBeginAndEndPunctuationFromStrToString(i[0],JJr.lstrip().lstrip()) if i[0].istitle(): JJr = JJr.title() poem_replaced = re.sub(r'\b' + import_utilities.strip_punctuation(i[0]) + r'\b', JJr, poem_replaced,1)#poem_replaced.replace(i[0],JJr,1) replaced_ls.append(JJr) if i[1]=='RB': RBr = random.choice(RB) RBr = import_utilities.moveBeginAndEndPunctuationFromStrToString(i[0],RBr.lstrip().lstrip()) if i[0].istitle(): RBr = RBr.title() poem_replaced = re.sub(r'\b' + import_utilities.strip_punctuation(i[0]) + r'\b', RBr, poem_replaced,1) replaced_ls.append(RBr) ######################## # IS IT ENGLISH? # ######################## for line in poem_replaced.split('\n\r'): if len(line)>0 : if "english" not in import_utilities.get_language(line): quit_language+=1 #print "NOT english:",quit_language,line else: quit_language-=1 ######################### # SYNSET REPLACE # ######################### for idx,word in enumerate(poem_replaced.split(' ')): if "<br>" not in word and "	" not in word and len(word)>0 and "~~~~!~~~" not in word: words_total+=1 ######################### # PRONOUN ' VERB # ######################### if len(word.split("'"))>1: if word.split("'")[0] in personal_pronouns: replacement_word = random.choice(personal_pronouns)+"'"+word.split("'")[1]+' ' poem_replaced.replace(word,replacement_word) #print "word,",word,"replacement_word:",replacement_word #################################################### # Replacement of OTHERs # #################################################### elif not word.lower().strip(" \n\t\r") in stopwords.words('english'): # take off leading brackets, commas etc... word_punct_nopunct = import_utilities.strip_punctuation_bool(word) word_nopunct = word_punct_nopunct['word'].strip(" \n\t\r") word_punct = word_punct_nopunct['punct'] punct_bool = word_punct_nopunct['punct_bool'] ####################################################### # MAIN EXCHANGE PROCESS CALL >>>>>>> GET THE SYNSET # ####################################################### if word_nopunct[-4:].lower()=="here": similarterm=random.choice(import_utilities.heres) else: #print "WORD:",word_nopunct if len(word_nopunct)>2: similarterm = import_utilities.find_synset_word(word_nopunct)#(word.lstrip().rstrip()) ############################################ # manually get rid of some terrible choices ############################################ if similarterm == "ilk": ##print "like" similarterm = "like" if similarterm == "ope": ##print "doth" similarterm = "does" if similarterm == "information technology": ##print "doth" similarterm = "it" ####################################### # abbreviations for f*****g states! # ####################################### if word_nopunct.upper() in import_utilities.state_abbrev and word_nopunct.lower() not in stopwords.words('english') and "me," not in word: tmp = similarterm if word_nopunct == "oh": similarterm = random.choice(import_utilities.exclaims) else: similarterm = random.choice(RESERVOIR) #print word_nopunct," replaced by", tmp, "replaced with:",similarterm, "in:",line ############## # hyphenated # ############## hyp =word.split("-") #print word,len(hyp) if len(hyp) >1: similarterm="" for w in hyp: if len(w) > 2: similarterm += import_utilities.find_synset_word(w)+"-" similarterm = import_utilities.strip_underscore(similarterm[:-1]) #print "hyphenated:",word,"replaced by: "+similarterm ######################################################### # is it a TRUNCATED VERB slang as in singin or wishin # ######################################################### if similarterm == word_nopunct and len(word)>2 and 'in' in word_nopunct[-2:]: similarterm = import_utilities.find_synset_word(word_nopunct+'g') ## #print "TRUNCATED SLANG word: '"+word+"'",similarterm interim = import_utilities.lemma(similarterm) ## #print interim similarterm = import_utilities.conjugate(interim, tense=import_utilities.PARTICIPLE, parse=True)[:-1] # # # #print word,"widx:",widx," line_pos_tags[widx][0]:",line_pos_tags[widx][0]," line_pos_tags[widx][1]:",line_pos_tags[widx][1] ################# # SWEAR WORD # ################# ##print "at the garden of if:", word if word_nopunct in import_utilities.curses: similarterm = random.choice(import_utilities.curses) ##print "SWEAR WORD word: '"+word+"'",similarterm if len(hyp) >1: replacement_word = similarterm else: replacement_word = word.replace(word_nopunct, similarterm) replacement_word = import_utilities.strip_underscore(replacement_word) replacement_word = import_utilities.replaceNumbers(replacement_word) ######################### # RESERVOIR_OF_WEIRDNESS # ######################### if word_nopunct.lower() in import_utilities.impera: replacement_word=random.choice(import_utilities.impera) #print word,"IMPERA:",replacement_word elif word_nopunct.lower() in import_utilities.conjuncts: replacement_word=random.choice(import_utilities.conjuncts) #print word," CONJUNCTION replaced with",replacement_word elif word_nopunct.lower() in import_utilities.indef_prono: replacement_word=random.choice(import_utilities.indef_prono) #print word," INDEF_prono replaced with",replacement_word elif word_nopunct.lower() in import_utilities.prepo: replacement_word=random.choice(import_utilities.prepo) #print word," prepo replaced with",replacement_word elif word_nopunct.lower() in import_utilities.rel_prono: replacement_word=word #print word," rel_prono LEAVE alone: ",replacement_word elif word_nopunct.lower()[-2:] =="ly": replacement_word=import_utilities.strip_underscore(import_utilities.find_synset_word(word))#(word[:-2]) #print word," ADVERB: ",replacement_word # if replacement_word[-2:] !="ly": # replacement_word +="ly" else: if len(hyp) <2 and "like" not in word_nopunct and import_utilities.singularize(word_nopunct) == import_utilities.singularize(replacement_word) and word_nopunct.lower() not in import_utilities.stopwords_ls: if word_nopunct not in RESERVOIR and quit_language<0 and import_utilities.countPunctuation(word)<1 and len(word_nopunct)>3 and not word_nopunct.istitle(): #print "ADDING",word,"to reservoir" RESERVOIR.append(word) replacement_word = random.choice(RESERVOIR) #print word_nopunct,"replaced from reservoir with", replacement_word # print "'"+word_nopunct+"' vs RESERVOIR replacement_word:",replacement_word #," new_line:",new_line if quit_language>1 and not word_nopunct.istitle(): #print quit_language, "Probably foreign language: make a word salad in english" replacement_word = random.choice(RESERVOIR) #print word_nopunct,"OTHER replaced from reservoir with", replacement_word # REPLACEMENT poem_ls = poem_replaced.split(' ') idx = poem_ls.index(word) # #print idx,",", poem_ls[idx],",", word ,",",replacement_word if poem_ls[idx]==word: poem_ls[idx]=replacement_word poem_replaced = " ".join(poem_ls) #poem_replaced = poem_replaced.replace(word,replacement_word) # CORRECT the "A" to "An" for idx,word in enumerate(poem_replaced.split(" ")): # poem_replaced = poem_replaced+"A organism" if len(word)>0 and word[0].lower() in the_vowels and poem_replaced.split(" ")[idx-1].lower() =="a" : if poem_replaced.split(" ")[idx-1] =="a": old_str = "a "+poem_replaced.split(" ")[idx] new_str = "an "+poem_replaced.split(" ")[idx] else: old_str = "A "+poem_replaced.split(" ")[idx] new_str = "An "+poem_replaced.split(" ")[idx] poem_replaced = poem_replaced.replace(old_str,new_str) # poem_replaced = poem_replaced+"An consonant" if len(word)>0 and word[0].lower() not in the_vowels and poem_replaced.split(" ")[idx-1].lower() =="an" : if poem_replaced.split(" ")[idx-1] =="an": old_str = "an "+poem_replaced.split(" ")[idx] new_str = "a "+poem_replaced.split(" ")[idx] else: old_str = "An "+poem_replaced.split(" ")[idx] new_str = "A "+poem_replaced.split(" ")[idx] poem_replaced = poem_replaced.replace(old_str,new_str) #print "FOUND correction needed",old_str,new_str ######################### # WRITE SINGLE POEM # ######################### tmp_poem="" # poem_replaced.replace("\t","	") # poem_replaced.replace("\n"," <br>") # poem_replaced.replace("\r"," <br>") HTML_poem="" for line in poem_replaced.split("\n"): lines_total+=1 #print "LINE", line HTML_poem += line+"<br>" if len(response) >0 and len(id.split("_"))>1: # ALL_poems = ALL_poems_intro + " ".join(i for i in ALL_poems.split("</h2>.")[0:])+"<br><br>~~~~~~~~~~~~~~~~~~~~~~~~~~<br>[ A poem generated from template : <b>"+ author+"</b>, <i>"+ title +"</i> ]<br><br><b>"+new_title+"<br><br></b>"+HTML_poem ALL_poems += "<br><br>~~~~~~~~~~~~~~~~~~~~~~~~~~<br>[ A poem generated from template : <b>"+ author+"</b>, <i>"+ title +"</i> ]<br><br><b>"+new_title+"<br><br></b>"+HTML_poem tmp_poem= "[A poem generated from template: "+ author+", '"+ title +"'']\n\n'"+new_title+"'\nby\n"+new_author+"\n\n"+poem_replaced print "\n******\n"+tmp_poem #print "\nORIGINAL:",bio txt_fn = id.split("_")[1]+"_POEMs.txt" # WRITE_BIO_PATH = DATA_DIR+"generated/POEMS/POEMS_"+datetime.datetime.now().strftime('%Y-%m-%d_%H')+"/" # if not os.path.exists(WRITE_BIO_PATH): # os.makedirs(WRITE_BIO_PATH) txt_fn_path = GENERATED_DIR+txt_fn f_txt=open(txt_fn_path,'w') f_txt.write(tmp_poem)#.encode('utf-8')) f_txt.close(); #print "\nTXT file created at:",txt_fn_path # ####### # # write them all.... wasteful... but useful if run is interrupted.... # ########### # ALL_poems = ALL_poems.replace("$$datetime$$",datetime.datetime.now().strftime('%Y-%m-%d at %H:%M')) # ALL_poems = ALL_poems.replace("$$cnt$$",str(cnt)) # print "cnt",cnt # ALL_poems = ALL_poems.replace("$$gentime$$",str(time.time() - start_time)) # # ALL POEMS # txt_fn = datetime.datetime.now().strftime('%Y-%m-%d_%H')+"_poetryFoundation_generatedPOEMS_"+type_of_run+".html" # txt_fn_path = DATA_DIR+"generated/POEMS/"+txt_fn # f_txt=open(txt_fn_path,'w') # f_txt.write(ALL_poems+"</hmtl>") # f_txt.close(); # print "\nTXT file created at:",txt_fn_path else: "~~~~~~~~~~~~~~~~!!!!!!!!!! EMPTY response:", author
for w in hyp: if len(w) > 2: similarterm += import_utilities.find_synset_word(w)+"-" similarterm = import_utilities.strip_underscore(similarterm[:-1]) #print "hyphenated:",word,"replaced by: "+similarterm ######################################################### # is it a TRUNCATED VERB slang as in singin or wishin # ######################################################### if similarterm == word_nopunct and len(word)>2 and 'in' in word_nopunct[-2:]: similarterm = import_utilities.find_synset_word(word_nopunct+'g') ## #print "TRUNCATED SLANG word: '"+word+"'",similarterm interim = import_utilities.lemma(similarterm) ## #print interim similarterm = import_utilities.conjugate(interim, tense=import_utilities.PARTICIPLE, parse=True)[:-1] # # # #print word,"widx:",widx," line_pos_tags[widx][0]:",line_pos_tags[widx][0]," line_pos_tags[widx][1]:",line_pos_tags[widx][1] ################# # SWEAR WORD # ################# ##print "at the garden of if:", word if word_nopunct in import_utilities.curses: similarterm = random.choice(import_utilities.curses) ##print "SWEAR WORD word: '"+word+"'",similarterm if len(hyp) >1: