Python lemma Exemples, import_utilities.lemma Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : Alchemy_poetryFoundation_STEP3_POEMS_FEATURE_EXTRACTION_HATCHER_ELO_INPUT.py Projet : Aranjedeath/Big-Data-Poetry

def extractFeaturesAndWriteBio(READ_PATH,file_type):
    
    

    global ALL_poems,bio,cnt

    inp=0
    sub_cnt=0

    for subdir, dirs, files in os.walk(READ_PATH):
        for file in files:
            
            num_of_files = len(files)-1 # deduct the DS_store
            #print (num_of_files,'readDirectory',READ_PATH)
            
            if file_type in file  and 'readme' not in file:

                # ID
                id=file.split(".")[0]
                #print "\n\n*********\nID:",id

                filenames.append(id)
                cnt+=1

                # print('')
                #print('')
                # print('OPENED:',id)
                # print('')
                #print('')

                sub_cnt+=1
                if sub_cnt>=inp:
                    sub_cnt=0
                    inp = input("\n\n^^^^^^^^^^^^^^\n\nHow many poems do u want? ")
                    print "\n\n^^^^^^^^^^^^^^^"

                print 'Poem #',sub_cnt+1

                poem_replaced = ""
                replacement_word = ""
                author=""
                titles=""
                title=""
                new_title=""

                replaced_ls =[]
                new_titles_ls = []
                quit_language=0

                ##########################
                # Load  POEM TEXT FILE     #
                ##########################

                txt_fn_path = DATA_DIR + READ_TXT_PATH + id.split("_")[1]+".txt"
                #print "txt_fn_path:",txt_fn_path

                if os.path.isfile(txt_fn_path) and cnt>0:
                    txt_data=open(txt_fn_path).read()

                    # http://blog.webforefront.com/archives/2011/02/python_ascii_co.html
                    # txt_data.decode('ISO-8859-2') .decode('utf-8')
                    # unicode(txt_data)

                    author=txt_data.split("****!****")[0].strip(' \t\n\r')
                    
                    title=txt_data.split("****!****")[1].strip(' \t\n\r')
                    
                    bio=txt_data.split("****!****")[2]#.strip(' \t\n\r')

                    ######  CLEAN BIO
                    bio.replace("\t","&#9;")
                    bio.replace("\n"," <br>")
                    bio.replace("\r"," <br>")
                    poem_replaced=bio
                    #print poem_replaced

                    ###############################
                    # REPLACE AUTHOR NAME
                    ##############################
                    author_ln=author.split(" ")[-1]
                    author_fn=author.split(" ")[:-1]
                    #
                    #poem_replaced = poem_replaced.replace(author_ln,"Jhave")

                    #######################
                    # fake AUTHOR
                    #######################
                    
                    new_author= " ".join(random.choice(authors).split(" ")[1:-2])+" "+random.choice(authors).split(" ")[-2]
                    

                    #######################
                    # replace BOOK TITLES
                    #######################
                    #print "TITLES"]
                    new_title = getNewTitle("title").encode('utf-8')
                             

                    ############################
                    # replace years with another
                    ############################
                    for w1 in poem_replaced.split("("):
                        for w2 in w1.split(")"):
                            if w2 is not None and w2.isdigit():
                                new_num = random.randint(int(w2)-5,int(w2)+5)
                                #print "REPLACING #:",w2,new_num
                                poem_replaced = poem_replaced.replace(w2,str(new_num))
                                replaced_ls.append(new_num)                            
                                               

                    #################
                    # Load JSON     #
                    #################
                    response = loadJSONfile(READ_JSON_PATH+"poetryFoundation_"+id.split("_")[1]+"_Alchemy_JSON.txt")

                    if response != "failed":

                        if response.get('entities') is not None:
                            for idx,entity in enumerate(response['entities']):

                                #print idx
                                ce = entity['text'].replace("0xc2"," ")
                                ce = ce.replace("0xe2","'")
                                ce = re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, ce)
                                ce = ce.encode('utf-8')

                                try:
                                    content = ce.decode('utf-8').encode('ascii', 'xmlcharrefreplace')
                                except UnicodeDecodeError:
                                    "AAAARGGGGHHH!!!!"

                                if content in poem_replaced:
                                                       
                                    ################################################
                                    # Replace similar entities from other JSON     #
                                    ################################################
                                    replacement_entity = findSimilarEntityinRandomJSON(content,entity['type'])

                                    cr = re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, replacement_entity)

                                    poem_replaced = poem_replaced.replace(content,replacement_entity)
                                    replaced_ls.append(replacement_entity)
                    

                    ##########################
                    #   POS REPLACMENT       #
                    ##########################

                    token_tuples = nltk.word_tokenize(poem_replaced)
                    tt = nltk.pos_tag(token_tuples)

                    #################
                    #  ADJECTIVES   #
                    #################
                    for i in tt:
                        if "/i" not in i[0] and len(i[0])>3 and i[0] != "died":
                            origw =  re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, i[0])
                            origw =import_utilities.strip_punctuation(origw) 
                            if i[1]=='JJ' :
                                JJr = random.choice(JJ)
                                # # JJr =  re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, JJr)
                                # JJr = import_utilities.strip_punctuation(JJr)
                                JJr = import_utilities.moveBeginAndEndPunctuationFromStrToString(i[0],JJr.lstrip().lstrip())
                                
                                if i[0].istitle():
                                    JJr = JJr.title()

                                poem_replaced = re.sub(r'\b' + import_utilities.strip_punctuation(i[0]) + r'\b', JJr, poem_replaced,1)#poem_replaced.replace(i[0],JJr,1)
                                replaced_ls.append(JJr)
                            if i[1]=='RB':
                                RBr = random.choice(RB)
                                RBr = import_utilities.moveBeginAndEndPunctuationFromStrToString(i[0],RBr.lstrip().lstrip())

                                if i[0].istitle():
                                    RBr = RBr.title()
                                poem_replaced = re.sub(r'\b' + import_utilities.strip_punctuation(i[0])  + r'\b', RBr, poem_replaced,1)
                                replaced_ls.append(RBr)


                    ########################
                    # IS IT ENGLISH?       #
                    ########################
                    for line  in poem_replaced.split('\n\r'):
                        if len(line)>0 :
                            if "english" not in import_utilities.get_language(line):
                                quit_language+=1
                                #print "NOT english:",quit_language,line
                            else:
                                quit_language-=1

                    
                    #########################
                    #   SYNSET REPLACE      #
                    #########################
                    for idx,word in enumerate(poem_replaced.split(' ')):




                        if "<br>" not in word and "&#9;" not in word and len(word)>0 and "~~~~!~~~" not in word:




                            #########################
                            #   PRONOUN ' VERB      #
                            #########################
                            if len(word.split("'"))>1:
                                if word.split("'")[0] in personal_pronouns:
                                    replacement_word = random.choice(personal_pronouns)+"'"+word.split("'")[1]+' '
                                poem_replaced.replace(word,replacement_word)             
                                #print "word,",word,"replacement_word:",replacement_word
                           
                            ####################################################
                            # Replacement of OTHERs                            #
                            ####################################################

                            elif not word.lower().strip(" \n\t\r") in stopwords.words('english'):

                                # take off leading brackets, commas etc...
                                word_punct_nopunct = import_utilities.strip_punctuation_bool(word)
                                word_nopunct = word_punct_nopunct['word'].strip(" \n\t\r")
                                word_punct = word_punct_nopunct['punct']
                                punct_bool = word_punct_nopunct['punct_bool']

                             

                                #######################################################
                                # MAIN EXCHANGE PROCESS CALL >>>>>>>   GET THE SYNSET #
                                #######################################################    
                                if word_nopunct[-4:].lower()=="here":
                                    similarterm=random.choice(import_utilities.heres)
                                else:
                                    #print "WORD:",word_nopunct
                                    if len(word_nopunct)>2:
                                        similarterm = import_utilities.find_synset_word(word_nopunct)#(word.lstrip().rstrip())

                                
                                ############################################
                                # manually get rid of some terrible choices
                                ############################################
                                if similarterm == "ilk":
                                    ##print "like"
                                    similarterm = "like"
                                if similarterm == "ope":
                                    ##print "doth"
                                    similarterm = "does"
                                if similarterm == "information technology":
                                    ##print "doth"
                                    similarterm = "it"

                                #######################################                      
                                # abbreviations for f*****g states!   #
                                #######################################
                                if word_nopunct.upper() in import_utilities.state_abbrev and word_nopunct.lower() not in stopwords.words('english') and "me," not in word:
                                    tmp = similarterm
                                    if word_nopunct == "oh": 
                                        similarterm = random.choice(import_utilities.exclaims)
                                    else:
                                        similarterm = random.choice(RESERVOIR)
                                    #print word_nopunct," replaced by", tmp, "replaced with:",similarterm, "in:",line

                                ##############
                                # hyphenated #
                                ##############
                                hyp =word.split("-")
                                #print word,len(hyp)
                                if len(hyp) >1:
                                    similarterm=""
                                    for w in hyp:
                                        if len(w) > 2:
                                            similarterm +=  import_utilities.find_synset_word(w)+"-"
                                    similarterm = import_utilities.strip_underscore(similarterm[:-1])
                                    #print "hyphenated:",word,"replaced by: "+similarterm
                                        


                                
                                #########################################################    
                                # is it a TRUNCATED VERB slang as in singin or wishin   #
                                #########################################################
                                if similarterm == word_nopunct and len(word)>2 and 'in' in word_nopunct[-2:]:
                                    similarterm = import_utilities.find_synset_word(word_nopunct+'g')
                                    ## #print "TRUNCATED SLANG word: '"+word+"'",similarterm
                                    interim = import_utilities.lemma(similarterm)
                                    ## #print interim
                                    similarterm = import_utilities.conjugate(interim, tense=import_utilities.PARTICIPLE, parse=True)[:-1] 
                                    # # # #print word,"widx:",widx," line_pos_tags[widx][0]:",line_pos_tags[widx][0]," line_pos_tags[widx][1]:",line_pos_tags[widx][1]
                                   

                                #################      
                                # SWEAR WORD    #
                                #################
                                ##print "at the garden of if:", word
                                if word_nopunct in import_utilities.curses:
                                    similarterm = random.choice(import_utilities.curses)
                                    ##print "SWEAR WORD word: '"+word+"'",similarterm


                                if len(hyp) >1:
                                    replacement_word = similarterm
                                else:
                                    replacement_word = word.replace(word_nopunct, similarterm)
                                    replacement_word = import_utilities.strip_underscore(replacement_word)
                                    replacement_word = import_utilities.replaceNumbers(replacement_word)

                                #########################
                                # RESERVOIR_OF_WEIRDNESS  #
                                #########################  

                                if word_nopunct.lower() in import_utilities.impera:
                                    replacement_word=random.choice(import_utilities.impera)
                                    #print word,"IMPERA:",replacement_word
                                elif word_nopunct.lower() in import_utilities.conjuncts:
                                    replacement_word=random.choice(import_utilities.conjuncts)
                                    #print word," CONJUNCTION replaced with",replacement_word
                                elif word_nopunct.lower() in import_utilities.indef_prono:
                                    replacement_word=random.choice(import_utilities.indef_prono)
                                    #print word," INDEF_prono replaced with",replacement_word
                                elif word_nopunct.lower() in import_utilities.prepo:
                                    replacement_word=random.choice(import_utilities.prepo)
                                    #print word," prepo replaced with",replacement_word
                                elif word_nopunct.lower() in import_utilities.rel_prono:
                                    replacement_word=word
                                    #print word," rel_prono LEAVE alone: ",replacement_word
                                elif word_nopunct.lower()[-2:] =="ly":
                                    replacement_word=import_utilities.strip_underscore(import_utilities.find_synset_word(word))#(word[:-2])
                                    #print word," ADVERB: ",replacement_word
                                    # if replacement_word[-2:] !="ly":
                                    #     replacement_word +="ly"
                                                                            
                                else:
                                    if len(hyp) <2 and "like" not in word_nopunct and import_utilities.singularize(word_nopunct) ==  import_utilities.singularize(replacement_word) and word_nopunct.lower() not in import_utilities.stopwords_ls:

                                        if word_nopunct not in RESERVOIR and quit_language<0 and import_utilities.countPunctuation(word)<1 and len(word_nopunct)>3 and not word_nopunct.istitle(): 
                                            
                                            #print "ADDING",word,"to reservoir"
                                            RESERVOIR.append(word)
                                            
                                            replacement_word = random.choice(RESERVOIR)
                                            #print word_nopunct,"replaced from reservoir with", replacement_word
                                       # print "'"+word_nopunct+"'  vs RESERVOIR  replacement_word:",replacement_word #,"    new_line:",new_line
                                if quit_language>1 and not word_nopunct.istitle():
                                    #print quit_language, "Probably foreign language: make a word salad in english"
                                    replacement_word = random.choice(RESERVOIR)
                                    #print word_nopunct,"OTHER replaced from reservoir with", replacement_word
                                

                                # REPLACEMENT
                                poem_ls = poem_replaced.split(' ')
                                idx =  poem_ls.index(word)


                                # #print idx,",", poem_ls[idx],",", word ,",",replacement_word

                                if poem_ls[idx]==word:
                                    poem_ls[idx]=replacement_word
                                poem_replaced = " ".join(poem_ls)


                                #poem_replaced = poem_replaced.replace(word,replacement_word)



                    # CORRECT the "A" to "An"    
                    for idx,word in enumerate(poem_replaced.split(" ")):
                        # poem_replaced = poem_replaced+"A organism"
                        if len(word)>0 and word[0].lower() in the_vowels and poem_replaced.split(" ")[idx-1].lower() =="a" :      
                                if poem_replaced.split(" ")[idx-1] =="a":
                                    old_str = "a "+poem_replaced.split(" ")[idx]    
                                    new_str = "an "+poem_replaced.split(" ")[idx]
                                else:
                                    old_str = "A "+poem_replaced.split(" ")[idx]    
                                    new_str = "An "+poem_replaced.split(" ")[idx]
                                poem_replaced = poem_replaced.replace(old_str,new_str)

                        # poem_replaced = poem_replaced+"An consonant"
                        if len(word)>0 and word[0].lower() not in the_vowels and poem_replaced.split(" ")[idx-1].lower() =="an" :      
                                if poem_replaced.split(" ")[idx-1] =="an":
                                    old_str = "an "+poem_replaced.split(" ")[idx]    
                                    new_str = "a "+poem_replaced.split(" ")[idx]
                                else:
                                    old_str = "An "+poem_replaced.split(" ")[idx]    
                                    new_str = "A "+poem_replaced.split(" ")[idx]
                                poem_replaced = poem_replaced.replace(old_str,new_str)
                                #print "FOUND correction needed",old_str,new_str


                    #########################
                    #   WRITE SINGLE POEM   #
                    #########################
                    tmp_poem=""   

                    # poem_replaced.replace("\t","&#9;")
                    # poem_replaced.replace("\n"," <br>")
                    # poem_replaced.replace("\r"," <br>")

                    HTML_poem=""
                    for line in poem_replaced.split("\n"):
                        #print "LINE", line
                        HTML_poem += line+"<br>"

                    if len(response) >0 and len(id.split("_"))>1:
                        # ALL_poems = ALL_poems_intro + " ".join(i for i in ALL_poems.split("</h2>.")[0:])+"<br><br>~~~~~~~~~~~~~~~~~~~~~~~~~~<br>[ A poem generated from template : <b>"+ author+"</b>, <i>"+ title +"</i> ]<br><br><b>"+new_title+"<br><br></b>"+HTML_poem
                        ALL_poems += "<br><br>~~~~~~~~~~~~~~~~~~~~~~~~~~<br>[ A poem generated from template : <b>"+ author+"</b>, <i>"+ title +"</i> ]<br><br><b>"+new_title+"<br><br></b>"+HTML_poem

                        tmp_poem= "[A poem generated from template: "+ author+", '"+ title +"'']\n\n'"+new_title+"'\nby\n"+new_author+"\n\n"+poem_replaced

                        print "\n******\n"+tmp_poem
                        #print "\nORIGINAL:",bio
                

                        txt_fn = id.split("_")[1]+"_POEMs.txt"

                        # WRITE_BIO_PATH = DATA_DIR+"generated/POEMS/POEMS_"+datetime.datetime.now().strftime('%Y-%m-%d_%H')+"/"
                        # if not os.path.exists(WRITE_BIO_PATH):
                        #         os.makedirs(WRITE_BIO_PATH)

                        txt_fn_path = GENERATED_DIR+txt_fn
                        f_txt=open(txt_fn_path,'w')
                        f_txt.write(tmp_poem)#.encode('utf-8'))       
                        f_txt.close();   
                        #print "\nTXT file created at:",txt_fn_path

                        
                        # #######
                        # #   write them all.... wasteful... but useful if run is interrupted....
                        # ###########    
                        # ALL_poems = ALL_poems.replace("$$datetime$$",datetime.datetime.now().strftime('%Y-%m-%d at %H:%M'))
                        # ALL_poems = ALL_poems.replace("$$cnt$$",str(cnt))
                        # print "cnt",cnt
                        # ALL_poems = ALL_poems.replace("$$gentime$$",str(time.time() - start_time))

                        # # ALL POEMS
                        # txt_fn = datetime.datetime.now().strftime('%Y-%m-%d_%H')+"_poetryFoundation_generatedPOEMS_"+type_of_run+".html"
                        # txt_fn_path = DATA_DIR+"generated/POEMS/"+txt_fn
                        # f_txt=open(txt_fn_path,'w')
                        # f_txt.write(ALL_poems+"</hmtl>")       
                        # f_txt.close();   
                        # print "\nTXT file created at:",txt_fn_path





                    else:
                        "~~~~~~~~~~~~~~~~!!!!!!!!!! EMPTY response:", author

Exemple #2

0

Afficher le fichier

Fichier : Alchemy_poetryFoundation_STEP3_POEMS_FEATURE_EXTRACTION_HATCHER_ELO_INPUT_RANDom.py Projet : amshenoy/Big-Data-Poetry

def extractFeaturesAndWriteBio(READ_PATH,file_type):
    
    

    global ALL_poems,bio,cnt, start_time

    inp=0
    sub_cnt=0
    words_total=0
    lines_total=0

    for subdir, dirs, files in os.walk(READ_PATH):
        for file in files:
            
            num_of_files = len(files)-1 # deduct the DS_store
            #print (num_of_files,'readDirectory',READ_PATH)
            
            if file_type in file  and 'readme' not in file:

                # ID
                id=file.split(".")[0]
                #print "\n\n*********\nID:",id

                filenames.append(id)
                cnt+=1

                # print('')
                #print('')
                # print('OPENED:',id)
                # print('')
                #print('')

                ##############
                #  HOW MANY? #
                ##############
                sub_cnt+=1
                if sub_cnt>=inp:
                    if inp != 0:
                        end_time = time.time()
                        es = end_time-start_time
                        print sub_cnt, "poems,\n",lines_total,"lines, &\n",words_total,"words \ngenerated in\n",("%.2f" % es),"seconds"
                        
                    words_total=0
                    lines_total=0

                    # RESTART

                    sub_cnt=0
                    inp = input("\n\n^^^^^^^^^^^^^^\n\nHow many poems do u want? ")
                    print "\n\n^^^^^^^^^^^^^^^"
                    start_time = time.time()

                print 'Poem #',sub_cnt+1

                poem_replaced = ""
                replacement_word = ""
                author=""
                titles=""
                title=""
                new_title=""

                replaced_ls =[]
                new_titles_ls = []
                quit_language=0

                #################################################################
                # Load  POEM TEXT FILE (based on id extracted from Alchemy JSON)    #
                #################################################################

                txt_fn_path = DATA_DIR + READ_TXT_PATH + id.split("_")[1]+".txt"
                #print "txt_fn_path:",txt_fn_path

                if os.path.isfile(txt_fn_path) and cnt>0:
                    txt_data=open(txt_fn_path).read()

                    # http://blog.webforefront.com/archives/2011/02/python_ascii_co.html
                    # txt_data.decode('ISO-8859-2') .decode('utf-8')
                    # unicode(txt_data)

                    author=txt_data.split("****!****")[0].strip(' \t\n\r')
                    
                    title=txt_data.split("****!****")[1].strip(' \t\n\r')
                    
                    bio=txt_data.split("****!****")[2]#.strip(' \t\n\r')

                    ######  CLEAN BIO
                    bio.replace("\t","&#9;")
                    bio.replace("\n"," <br>")
                    bio.replace("\r"," <br>")
                    poem_replaced=bio
                    #print poem_replaced

                    ###############################
                    # REPLACE AUTHOR NAME
                    ##############################
                    author_ln=author.split(" ")[-1]
                    author_fn=author.split(" ")[:-1]
                    #
                    #poem_replaced = poem_replaced.replace(author_ln,"Jhave")

                    #######################
                    # fake AUTHOR
                    #######################
                    
                    new_author= " ".join(random.choice(authors).split(" ")[1:-2])+" "+random.choice(authors).split(" ")[-2]
                    

                    #######################
                    # replace BOOK TITLES
                    #######################
                    #print "TITLES"]
                    new_title = getNewTitle("title").encode('utf-8')
                             

                    ############################
                    # replace years with another
                    ############################
                    for w1 in poem_replaced.split("("):
                        for w2 in w1.split(")"):
                            if w2 is not None and w2.isdigit():
                                new_num = random.randint(int(w2)-5,int(w2)+5)
                                #print "REPLACING #:",w2,new_num
                                poem_replaced = poem_replaced.replace(w2,str(new_num))
                                replaced_ls.append(new_num)                            
                                               

                    #################
                    # Load JSON     #
                    #################
                    response = loadJSONfile(READ_JSON_PATH+"poetryFoundation_"+id.split("_")[1]+"_Alchemy_JSON.txt")

                    if response != "failed":

                        if response.get('entities') is not None:
                            for idx,entity in enumerate(response['entities']):

                                #print idx
                                ce = entity['text'].replace("0xc2"," ")
                                ce = ce.replace("0xe2","'")
                                ce = re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, ce)
                                ce = ce.encode('utf-8')

                                try:
                                    content = ce.decode('utf-8').encode('ascii', 'xmlcharrefreplace')
                                except UnicodeDecodeError:
                                    "AAAARGGGGHHH!!!!"

                                if content in poem_replaced:
                                                       
                                    ################################################
                                    # Replace similar entities from other JSON     #
                                    ################################################
                                    replacement_entity = findSimilarEntityinRandomJSON(content,entity['type'])

                                    cr = re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, replacement_entity)

                                    poem_replaced = poem_replaced.replace(content,replacement_entity)
                                    replaced_ls.append(replacement_entity)
                    

                    ##########################
                    #   POS REPLACMENT       #
                    ##########################

                    token_tuples = nltk.word_tokenize(poem_replaced)
                    tt = nltk.pos_tag(token_tuples)

                    #################
                    #  ADJECTIVES   #
                    #################
                    for i in tt:
                        if "/i" not in i[0] and len(i[0])>3 and i[0] != "died":
                            origw =  re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, i[0])
                            origw =import_utilities.strip_punctuation(origw) 
                            if i[1]=='JJ' :
                                JJr = random.choice(JJ)
                                # # JJr =  re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, JJr)
                                # JJr = import_utilities.strip_punctuation(JJr)
                                JJr = import_utilities.moveBeginAndEndPunctuationFromStrToString(i[0],JJr.lstrip().lstrip())
                                
                                if i[0].istitle():
                                    JJr = JJr.title()

                                poem_replaced = re.sub(r'\b' + import_utilities.strip_punctuation(i[0]) + r'\b', JJr, poem_replaced,1)#poem_replaced.replace(i[0],JJr,1)
                                replaced_ls.append(JJr)
                            if i[1]=='RB':
                                RBr = random.choice(RB)
                                RBr = import_utilities.moveBeginAndEndPunctuationFromStrToString(i[0],RBr.lstrip().lstrip())

                                if i[0].istitle():
                                    RBr = RBr.title()
                                poem_replaced = re.sub(r'\b' + import_utilities.strip_punctuation(i[0])  + r'\b', RBr, poem_replaced,1)
                                replaced_ls.append(RBr)


                    ########################
                    # IS IT ENGLISH?       #
                    ########################
                    for line  in poem_replaced.split('\n\r'):

                        

                        if len(line)>0 :
                            if "english" not in import_utilities.get_language(line):
                                quit_language+=1
                                #print "NOT english:",quit_language,line
                            else:
                                quit_language-=1

                    
                    #########################
                    #   SYNSET REPLACE      #
                    #########################
                    for idx,word in enumerate(poem_replaced.split(' ')):


                        

                        if "<br>" not in word and "&#9;" not in word and len(word)>0 and "~~~~!~~~" not in word:


                            words_total+=1

                            #########################
                            #   PRONOUN ' VERB      #
                            #########################
                            if len(word.split("'"))>1:
                                if word.split("'")[0] in personal_pronouns:
                                    replacement_word = random.choice(personal_pronouns)+"'"+word.split("'")[1]+' '
                                poem_replaced.replace(word,replacement_word)             
                                #print "word,",word,"replacement_word:",replacement_word
                           
                            ####################################################
                            # Replacement of OTHERs                            #
                            ####################################################

                            elif not word.lower().strip(" \n\t\r") in stopwords.words('english'):

                                # take off leading brackets, commas etc...
                                word_punct_nopunct = import_utilities.strip_punctuation_bool(word)
                                word_nopunct = word_punct_nopunct['word'].strip(" \n\t\r")
                                word_punct = word_punct_nopunct['punct']
                                punct_bool = word_punct_nopunct['punct_bool']

                             

                                #######################################################
                                # MAIN EXCHANGE PROCESS CALL >>>>>>>   GET THE SYNSET #
                                #######################################################    
                                if word_nopunct[-4:].lower()=="here":
                                    similarterm=random.choice(import_utilities.heres)
                                else:
                                    #print "WORD:",word_nopunct
                                    if len(word_nopunct)>2:
                                        similarterm = import_utilities.find_synset_word(word_nopunct)#(word.lstrip().rstrip())

                                
                                ############################################
                                # manually get rid of some terrible choices
                                ############################################
                                if similarterm == "ilk":
                                    ##print "like"
                                    similarterm = "like"
                                if similarterm == "ope":
                                    ##print "doth"
                                    similarterm = "does"
                                if similarterm == "information technology":
                                    ##print "doth"
                                    similarterm = "it"

                                #######################################                      
                                # abbreviations for f*****g states!   #
                                #######################################
                                if word_nopunct.upper() in import_utilities.state_abbrev and word_nopunct.lower() not in stopwords.words('english') and "me," not in word:
                                    tmp = similarterm
                                    if word_nopunct == "oh": 
                                        similarterm = random.choice(import_utilities.exclaims)
                                    else:
                                        similarterm = random.choice(RESERVOIR)
                                    #print word_nopunct," replaced by", tmp, "replaced with:",similarterm, "in:",line

                                ##############
                                # hyphenated #
                                ##############
                                hyp =word.split("-")
                                #print word,len(hyp)
                                if len(hyp) >1:
                                    similarterm=""
                                    for w in hyp:
                                        if len(w) > 2:
                                            similarterm +=  import_utilities.find_synset_word(w)+"-"
                                    similarterm = import_utilities.strip_underscore(similarterm[:-1])
                                    #print "hyphenated:",word,"replaced by: "+similarterm
                                        


                                
                                #########################################################    
                                # is it a TRUNCATED VERB slang as in singin or wishin   #
                                #########################################################
                                if similarterm == word_nopunct and len(word)>2 and 'in' in word_nopunct[-2:]:
                                    similarterm = import_utilities.find_synset_word(word_nopunct+'g')
                                    ## #print "TRUNCATED SLANG word: '"+word+"'",similarterm
                                    interim = import_utilities.lemma(similarterm)
                                    ## #print interim
                                    similarterm = import_utilities.conjugate(interim, tense=import_utilities.PARTICIPLE, parse=True)[:-1] 
                                    # # # #print word,"widx:",widx," line_pos_tags[widx][0]:",line_pos_tags[widx][0]," line_pos_tags[widx][1]:",line_pos_tags[widx][1]
                                   

                                #################      
                                # SWEAR WORD    #
                                #################
                                ##print "at the garden of if:", word
                                if word_nopunct in import_utilities.curses:
                                    similarterm = random.choice(import_utilities.curses)
                                    ##print "SWEAR WORD word: '"+word+"'",similarterm


                                if len(hyp) >1:
                                    replacement_word = similarterm
                                else:
                                    replacement_word = word.replace(word_nopunct, similarterm)
                                    replacement_word = import_utilities.strip_underscore(replacement_word)
                                    replacement_word = import_utilities.replaceNumbers(replacement_word)

                                #########################
                                # RESERVOIR_OF_WEIRDNESS  #
                                #########################  

                                if word_nopunct.lower() in import_utilities.impera:
                                    replacement_word=random.choice(import_utilities.impera)
                                    #print word,"IMPERA:",replacement_word
                                elif word_nopunct.lower() in import_utilities.conjuncts:
                                    replacement_word=random.choice(import_utilities.conjuncts)
                                    #print word," CONJUNCTION replaced with",replacement_word
                                elif word_nopunct.lower() in import_utilities.indef_prono:
                                    replacement_word=random.choice(import_utilities.indef_prono)
                                    #print word," INDEF_prono replaced with",replacement_word
                                elif word_nopunct.lower() in import_utilities.prepo:
                                    replacement_word=random.choice(import_utilities.prepo)
                                    #print word," prepo replaced with",replacement_word
                                elif word_nopunct.lower() in import_utilities.rel_prono:
                                    replacement_word=word
                                    #print word," rel_prono LEAVE alone: ",replacement_word
                                elif word_nopunct.lower()[-2:] =="ly":
                                    replacement_word=import_utilities.strip_underscore(import_utilities.find_synset_word(word))#(word[:-2])
                                    #print word," ADVERB: ",replacement_word
                                    # if replacement_word[-2:] !="ly":
                                    #     replacement_word +="ly"
                                                                            
                                else:
                                    if len(hyp) <2 and "like" not in word_nopunct and import_utilities.singularize(word_nopunct) ==  import_utilities.singularize(replacement_word) and word_nopunct.lower() not in import_utilities.stopwords_ls:

                                        if word_nopunct not in RESERVOIR and quit_language<0 and import_utilities.countPunctuation(word)<1 and len(word_nopunct)>3 and not word_nopunct.istitle(): 
                                            
                                            #print "ADDING",word,"to reservoir"
                                            RESERVOIR.append(word)
                                            
                                            replacement_word = random.choice(RESERVOIR)
                                            #print word_nopunct,"replaced from reservoir with", replacement_word
                                       # print "'"+word_nopunct+"'  vs RESERVOIR  replacement_word:",replacement_word #,"    new_line:",new_line
                                if quit_language>1 and not word_nopunct.istitle():
                                    #print quit_language, "Probably foreign language: make a word salad in english"
                                    replacement_word = random.choice(RESERVOIR)
                                    #print word_nopunct,"OTHER replaced from reservoir with", replacement_word
                                

                                # REPLACEMENT
                                poem_ls = poem_replaced.split(' ')
                                idx =  poem_ls.index(word)


                                # #print idx,",", poem_ls[idx],",", word ,",",replacement_word

                                if poem_ls[idx]==word:
                                    poem_ls[idx]=replacement_word
                                poem_replaced = " ".join(poem_ls)


                                #poem_replaced = poem_replaced.replace(word,replacement_word)



                    # CORRECT the "A" to "An"    
                    for idx,word in enumerate(poem_replaced.split(" ")):
                        # poem_replaced = poem_replaced+"A organism"
                        if len(word)>0 and word[0].lower() in the_vowels and poem_replaced.split(" ")[idx-1].lower() =="a" :      
                                if poem_replaced.split(" ")[idx-1] =="a":
                                    old_str = "a "+poem_replaced.split(" ")[idx]    
                                    new_str = "an "+poem_replaced.split(" ")[idx]
                                else:
                                    old_str = "A "+poem_replaced.split(" ")[idx]    
                                    new_str = "An "+poem_replaced.split(" ")[idx]
                                poem_replaced = poem_replaced.replace(old_str,new_str)

                        # poem_replaced = poem_replaced+"An consonant"
                        if len(word)>0 and word[0].lower() not in the_vowels and poem_replaced.split(" ")[idx-1].lower() =="an" :      
                                if poem_replaced.split(" ")[idx-1] =="an":
                                    old_str = "an "+poem_replaced.split(" ")[idx]    
                                    new_str = "a "+poem_replaced.split(" ")[idx]
                                else:
                                    old_str = "An "+poem_replaced.split(" ")[idx]    
                                    new_str = "A "+poem_replaced.split(" ")[idx]
                                poem_replaced = poem_replaced.replace(old_str,new_str)
                                #print "FOUND correction needed",old_str,new_str


                    #########################
                    #   WRITE SINGLE POEM   #
                    #########################
                    tmp_poem=""   

                    # poem_replaced.replace("\t","&#9;")
                    # poem_replaced.replace("\n"," <br>")
                    # poem_replaced.replace("\r"," <br>")

                    HTML_poem=""
                    for line in poem_replaced.split("\n"):
                        lines_total+=1
                        #print "LINE", line
                        HTML_poem += line+"<br>"

                    if len(response) >0 and len(id.split("_"))>1:
                        # ALL_poems = ALL_poems_intro + " ".join(i for i in ALL_poems.split("</h2>.")[0:])+"<br><br>~~~~~~~~~~~~~~~~~~~~~~~~~~<br>[ A poem generated from template : <b>"+ author+"</b>, <i>"+ title +"</i> ]<br><br><b>"+new_title+"<br><br></b>"+HTML_poem
                        ALL_poems += "<br><br>~~~~~~~~~~~~~~~~~~~~~~~~~~<br>[ A poem generated from template : <b>"+ author+"</b>, <i>"+ title +"</i> ]<br><br><b>"+new_title+"<br><br></b>"+HTML_poem

                        tmp_poem= "[A poem generated from template: "+ author+", '"+ title +"'']\n\n'"+new_title+"'\nby\n"+new_author+"\n\n"+poem_replaced

                        print "\n******\n"+tmp_poem
                        #print "\nORIGINAL:",bio
                

                        txt_fn = id.split("_")[1]+"_POEMs.txt"

                        # WRITE_BIO_PATH = DATA_DIR+"generated/POEMS/POEMS_"+datetime.datetime.now().strftime('%Y-%m-%d_%H')+"/"
                        # if not os.path.exists(WRITE_BIO_PATH):
                        #         os.makedirs(WRITE_BIO_PATH)

                        txt_fn_path = GENERATED_DIR+txt_fn
                        f_txt=open(txt_fn_path,'w')
                        f_txt.write(tmp_poem)#.encode('utf-8'))       
                        f_txt.close();   
                        #print "\nTXT file created at:",txt_fn_path

                        
                        # #######
                        # #   write them all.... wasteful... but useful if run is interrupted....
                        # ###########    
                        # ALL_poems = ALL_poems.replace("$$datetime$$",datetime.datetime.now().strftime('%Y-%m-%d at %H:%M'))
                        # ALL_poems = ALL_poems.replace("$$cnt$$",str(cnt))
                        # print "cnt",cnt
                        # ALL_poems = ALL_poems.replace("$$gentime$$",str(time.time() - start_time))

                        # # ALL POEMS
                        # txt_fn = datetime.datetime.now().strftime('%Y-%m-%d_%H')+"_poetryFoundation_generatedPOEMS_"+type_of_run+".html"
                        # txt_fn_path = DATA_DIR+"generated/POEMS/"+txt_fn
                        # f_txt=open(txt_fn_path,'w')
                        # f_txt.write(ALL_poems+"</hmtl>")       
                        # f_txt.close();   
                        # print "\nTXT file created at:",txt_fn_path





                    else:
                        "~~~~~~~~~~~~~~~~!!!!!!!!!! EMPTY response:", author

Exemple #3

0

Afficher le fichier

                                            for w in hyp:
                                                if len(w) > 2:
                                                    similarterm +=  import_utilities.find_synset_word(w)+"-"
                                            similarterm = import_utilities.strip_underscore(similarterm[:-1])
                                            #print "hyphenated:",word,"replaced by: "+similarterm
                                                


                                        
                                        #########################################################    
                                        # is it a TRUNCATED VERB slang as in singin or wishin   #
                                        #########################################################
                                        if similarterm == word_nopunct and len(word)>2 and 'in' in word_nopunct[-2:]:
                                            similarterm = import_utilities.find_synset_word(word_nopunct+'g')
                                            ## #print "TRUNCATED SLANG word: '"+word+"'",similarterm
                                            interim = import_utilities.lemma(similarterm)
                                            ## #print interim
                                            similarterm = import_utilities.conjugate(interim, tense=import_utilities.PARTICIPLE, parse=True)[:-1] 
                                            # # # #print word,"widx:",widx," line_pos_tags[widx][0]:",line_pos_tags[widx][0]," line_pos_tags[widx][1]:",line_pos_tags[widx][1]
                                           

                                        #################      
                                        # SWEAR WORD    #
                                        #################
                                        ##print "at the garden of if:", word
                                        if word_nopunct in import_utilities.curses:
                                            similarterm = random.choice(import_utilities.curses)
                                            ##print "SWEAR WORD word: '"+word+"'",similarterm


                                        if len(hyp) >1: