Python conjugate 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: import_utilities

메소드/함수: conjugate

hotexamples.com에서의 예제들: 4

Python conjugate - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 import_utilities.conjugate에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: ELO2015_Alchemy_poetryFoundation_HATCHER-input-pause.py 프로젝트: Aranjedeath/Big-Data-Poetry

def extractFeaturesAndWriteBio(READ_PATH, file_type):

    global ALL_poems, bio, cnt, start_time

    inp = 0
    sub_cnt = 0
    words_total = 0
    lines_total = 0

    for subdir, dirs, files in os.walk(READ_PATH):
        for file in files:

            num_of_files = len(files) - 1  # deduct the DS_store
            #print (num_of_files,'readDirectory',READ_PATH)

            if file_type in file and 'readme' not in file:

                # ID
                id = file.split(".")[0]
                #print "\n\n*********\nID:",id

                filenames.append(id)
                cnt += 1

                # print('')
                #print('')
                # print('OPENED:',id)
                # print('')
                #print('')

                ##############
                #  HOW MANY? #
                ##############
                sub_cnt += 1
                if sub_cnt >= inp:
                    if inp != 0:
                        end_time = time.time()
                        es = end_time - start_time
                        print sub_cnt, "poems,\n", lines_total, "lines, &\n", words_total, "words \ngenerated in\n", (
                            "%.2f" % es), "seconds"

                    words_total = 0
                    lines_total = 0

                    # RESTART

                    sub_cnt = 0
                    inp = input(
                        "\n\n^^^^^^^^^^^^^^\n\nHow many poems do u want? ")
                    print "\n\n^^^^^^^^^^^^^^^"
                    start_time = time.time()

                print 'Poem #', sub_cnt + 1

                poem_replaced = ""
                replacement_word = ""
                author = ""
                titles = ""
                title = ""
                new_title = ""

                replaced_ls = []
                new_titles_ls = []
                quit_language = 0

                #################################################################
                # Load  POEM TEXT FILE (based on id extracted from Alchemy JSON)    #
                #################################################################

                txt_fn_path = DATA_DIR + READ_TXT_PATH + id.split(
                    "_")[1] + ".txt"
                #print "txt_fn_path:",txt_fn_path

                if os.path.isfile(txt_fn_path) and cnt > 0:
                    txt_data = open(txt_fn_path).read()

                    # http://blog.webforefront.com/archives/2011/02/python_ascii_co.html
                    # txt_data.decode('ISO-8859-2') .decode('utf-8')
                    # unicode(txt_data)

                    author = txt_data.split("****!****")[0].strip(' \t\n\r')

                    title = txt_data.split("****!****")[1].strip(' \t\n\r')

                    bio = txt_data.split("****!****")[2]  #.strip(' \t\n\r')

                    ######  CLEAN BIO
                    bio.replace("\t", "&#9;")
                    bio.replace("\n", " <br>")
                    bio.replace("\r", " <br>")
                    poem_replaced = bio
                    #print poem_replaced

                    ###############################
                    # REPLACE AUTHOR NAME
                    ##############################
                    author_ln = author.split(" ")[-1]
                    author_fn = author.split(" ")[:-1]
                    #
                    #poem_replaced = poem_replaced.replace(author_ln,"Jhave")

                    #######################
                    # fake AUTHOR
                    #######################

                    new_author = " ".join(
                        random.choice(authors).split(" ")
                        [1:-2]) + " " + random.choice(authors).split(" ")[-2]

                    #######################
                    # replace BOOK TITLES
                    #######################
                    #print "TITLES"]
                    new_title = getNewTitle("title").encode('utf-8')

                    ############################
                    # replace years with another
                    ############################
                    for w1 in poem_replaced.split("("):
                        for w2 in w1.split(")"):
                            if w2 is not None and w2.isdigit():
                                new_num = random.randint(
                                    int(w2) - 5,
                                    int(w2) + 5)
                                #print "REPLACING #:",w2,new_num
                                poem_replaced = poem_replaced.replace(
                                    w2, str(new_num))
                                replaced_ls.append(new_num)

                    #################
                    # Load JSON     #
                    #################
                    response = loadJSONfile(READ_JSON_PATH +
                                            "poetryFoundation_" +
                                            id.split("_")[1] +
                                            "_Alchemy_JSON.txt")

                    if response != "failed":

                        if response.get('entities') is not None:
                            for idx, entity in enumerate(response['entities']):

                                #print idx
                                ce = entity['text'].replace("0xc2", " ")
                                ce = ce.replace("0xe2", "'")
                                ce = re.sub(
                                    '(' +
                                    '|'.join(import_utilities.chars.keys()) +
                                    ')', import_utilities.replace_chars, ce)
                                ce = ce.encode('utf-8')

                                try:
                                    content = ce.decode('utf-8').encode(
                                        'ascii', 'xmlcharrefreplace')
                                except UnicodeDecodeError:
                                    "AAAARGGGGHHH!!!!"

                                if content in poem_replaced:

                                    ################################################
                                    # Replace similar entities from other JSON     #
                                    ################################################
                                    replacement_entity = findSimilarEntityinRandomJSON(
                                        content, entity['type'])

                                    cr = re.sub(
                                        '(' + '|'.join(
                                            import_utilities.chars.keys()) +
                                        ')', import_utilities.replace_chars,
                                        replacement_entity)

                                    poem_replaced = poem_replaced.replace(
                                        content, replacement_entity)
                                    replaced_ls.append(replacement_entity)

                    ##########################
                    #   POS REPLACMENT       #
                    ##########################

                    token_tuples = nltk.word_tokenize(poem_replaced)
                    tt = nltk.pos_tag(token_tuples)

                    #################
                    #  ADJECTIVES   #
                    #################
                    for i in tt:
                        if "/i" not in i[0] and len(
                                i[0]) > 3 and i[0] != "died":
                            origw = re.sub(
                                '(' + '|'.join(import_utilities.chars.keys()) +
                                ')', import_utilities.replace_chars, i[0])
                            origw = import_utilities.strip_punctuation(origw)
                            if i[1] == 'JJ':
                                JJr = random.choice(JJ)
                                # # JJr =  re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, JJr)
                                # JJr = import_utilities.strip_punctuation(JJr)
                                JJr = import_utilities.moveBeginAndEndPunctuationFromStrToString(
                                    i[0],
                                    JJr.lstrip().lstrip())

                                if i[0].istitle():
                                    JJr = JJr.title()

                                poem_replaced = re.sub(
                                    r'\b' +
                                    import_utilities.strip_punctuation(i[0]) +
                                    r'\b', JJr, poem_replaced,
                                    1)  #poem_replaced.replace(i[0],JJr,1)
                                replaced_ls.append(JJr)
                            if i[1] == 'RB':
                                RBr = random.choice(RB)
                                RBr = import_utilities.moveBeginAndEndPunctuationFromStrToString(
                                    i[0],
                                    RBr.lstrip().lstrip())

                                if i[0].istitle():
                                    RBr = RBr.title()
                                poem_replaced = re.sub(
                                    r'\b' +
                                    import_utilities.strip_punctuation(i[0]) +
                                    r'\b', RBr, poem_replaced, 1)
                                replaced_ls.append(RBr)

                    ########################
                    # IS IT ENGLISH?       #
                    ########################
                    for line in poem_replaced.split('\n\r'):

                        if len(line) > 0:
                            if "english" not in import_utilities.get_language(
                                    line):
                                quit_language += 1
                                #print "NOT english:",quit_language,line
                            else:
                                quit_language -= 1

                    #########################
                    #   SYNSET REPLACE      #
                    #########################
                    for idx, word in enumerate(poem_replaced.split(' ')):

                        if "<br>" not in word and "&#9;" not in word and len(
                                word) > 0 and "~~~~!~~~" not in word:

                            words_total += 1

                            #########################
                            #   PRONOUN ' VERB      #
                            #########################
                            if len(word.split("'")) > 1:
                                if word.split("'")[0] in personal_pronouns:
                                    replacement_word = random.choice(
                                        personal_pronouns) + "'" + word.split(
                                            "'")[1] + ' '
                                poem_replaced.replace(word, replacement_word)
                                #print "word,",word,"replacement_word:",replacement_word

                            ####################################################
                            # Replacement of OTHERs                            #
                            ####################################################

                            elif not word.lower().strip(
                                    " \n\t\r") in stopwords.words('english'):

                                # take off leading brackets, commas etc...
                                word_punct_nopunct = import_utilities.strip_punctuation_bool(
                                    word)
                                word_nopunct = word_punct_nopunct[
                                    'word'].strip(" \n\t\r")
                                word_punct = word_punct_nopunct['punct']
                                punct_bool = word_punct_nopunct['punct_bool']

                                #######################################################
                                # MAIN EXCHANGE PROCESS CALL >>>>>>>   GET THE SYNSET #
                                #######################################################
                                if word_nopunct[-4:].lower() == "here":
                                    similarterm = random.choice(
                                        import_utilities.heres)
                                else:
                                    #print "WORD:",word_nopunct
                                    if len(word_nopunct) > 2:
                                        similarterm = import_utilities.find_synset_word(
                                            word_nopunct
                                        )  #(word.lstrip().rstrip())

                                ############################################
                                # manually get rid of some terrible choices
                                ############################################
                                if similarterm == "ilk":
                                    ##print "like"
                                    similarterm = "like"
                                if similarterm == "ope":
                                    ##print "doth"
                                    similarterm = "does"
                                if similarterm == "information technology":
                                    ##print "doth"
                                    similarterm = "it"

                                #######################################
                                # abbreviations for f*****g states!   #
                                #######################################
                                if word_nopunct.upper(
                                ) in import_utilities.state_abbrev and word_nopunct.lower(
                                ) not in stopwords.words(
                                        'english') and "me," not in word:
                                    tmp = similarterm
                                    if word_nopunct == "oh":
                                        similarterm = random.choice(
                                            import_utilities.exclaims)
                                    else:
                                        similarterm = random.choice(RESERVOIR)
                                    #print word_nopunct," replaced by", tmp, "replaced with:",similarterm, "in:",line

                                ##############
                                # hyphenated #
                                ##############
                                hyp = word.split("-")
                                #print word,len(hyp)
                                if len(hyp) > 1:
                                    similarterm = ""
                                    for w in hyp:
                                        if len(w) > 2:
                                            similarterm += import_utilities.find_synset_word(
                                                w) + "-"
                                    similarterm = import_utilities.strip_underscore(
                                        similarterm[:-1])
                                    #print "hyphenated:",word,"replaced by: "+similarterm

                                #########################################################
                                # is it a TRUNCATED VERB slang as in singin or wishin   #
                                #########################################################
                                if similarterm == word_nopunct and len(
                                        word
                                ) > 2 and 'in' in word_nopunct[-2:]:
                                    similarterm = import_utilities.find_synset_word(
                                        word_nopunct + 'g')
                                    ## #print "TRUNCATED SLANG word: '"+word+"'",similarterm
                                    interim = import_utilities.lemma(
                                        similarterm)
                                    ## #print interim
                                    similarterm = import_utilities.conjugate(
                                        interim,
                                        tense=import_utilities.PARTICIPLE,
                                        parse=True)[:-1]
                                    # # # #print word,"widx:",widx," line_pos_tags[widx][0]:",line_pos_tags[widx][0]," line_pos_tags[widx][1]:",line_pos_tags[widx][1]

                                #################
                                # SWEAR WORD    #
                                #################
                                ##print "at the garden of if:", word
                                if word_nopunct in import_utilities.curses:
                                    similarterm = random.choice(
                                        import_utilities.curses)
                                    ##print "SWEAR WORD word: '"+word+"'",similarterm

                                if len(hyp) > 1:
                                    replacement_word = similarterm
                                else:
                                    replacement_word = word.replace(
                                        word_nopunct, similarterm)
                                    replacement_word = import_utilities.strip_underscore(
                                        replacement_word)
                                    replacement_word = import_utilities.replaceNumbers(
                                        replacement_word)

                                #########################
                                # RESERVOIR_OF_WEIRDNESS  #
                                #########################

                                if word_nopunct.lower(
                                ) in import_utilities.impera:
                                    replacement_word = random.choice(
                                        import_utilities.impera)
                                    #print word,"IMPERA:",replacement_word
                                elif word_nopunct.lower(
                                ) in import_utilities.conjuncts:
                                    replacement_word = random.choice(
                                        import_utilities.conjuncts)
                                    #print word," CONJUNCTION replaced with",replacement_word
                                elif word_nopunct.lower(
                                ) in import_utilities.indef_prono:
                                    replacement_word = random.choice(
                                        import_utilities.indef_prono)
                                    #print word," INDEF_prono replaced with",replacement_word
                                elif word_nopunct.lower(
                                ) in import_utilities.prepo:
                                    replacement_word = random.choice(
                                        import_utilities.prepo)
                                    #print word," prepo replaced with",replacement_word
                                elif word_nopunct.lower(
                                ) in import_utilities.rel_prono:
                                    replacement_word = word
                                    #print word," rel_prono LEAVE alone: ",replacement_word
                                elif word_nopunct.lower()[-2:] == "ly":
                                    replacement_word = import_utilities.strip_underscore(
                                        import_utilities.find_synset_word(
                                            word))  #(word[:-2])
                                    #print word," ADVERB: ",replacement_word
                                    # if replacement_word[-2:] !="ly":
                                    #     replacement_word +="ly"

                                else:
                                    if len(
                                            hyp
                                    ) < 2 and "like" not in word_nopunct and import_utilities.singularize(
                                            word_nopunct
                                    ) == import_utilities.singularize(
                                            replacement_word
                                    ) and word_nopunct.lower(
                                    ) not in import_utilities.stopwords_ls:

                                        if word_nopunct not in RESERVOIR and quit_language < 0 and import_utilities.countPunctuation(
                                                word
                                        ) < 1 and len(
                                                word_nopunct
                                        ) > 3 and not word_nopunct.istitle():

                                            #print "ADDING",word,"to reservoir"
                                            RESERVOIR.append(word)

                                            replacement_word = random.choice(
                                                RESERVOIR)
                                            #print word_nopunct,"replaced from reservoir with", replacement_word
                                    # print "'"+word_nopunct+"'  vs RESERVOIR  replacement_word:",replacement_word #,"    new_line:",new_line
                                if quit_language > 1 and not word_nopunct.istitle(
                                ):
                                    #print quit_language, "Probably foreign language: make a word salad in english"
                                    replacement_word = random.choice(RESERVOIR)
                                    #print word_nopunct,"OTHER replaced from reservoir with", replacement_word

                                # REPLACEMENT
                                poem_ls = poem_replaced.split(' ')
                                idx = poem_ls.index(word)

                                # #print idx,",", poem_ls[idx],",", word ,",",replacement_word

                                if poem_ls[idx] == word:
                                    poem_ls[idx] = replacement_word
                                poem_replaced = " ".join(poem_ls)

                                #poem_replaced = poem_replaced.replace(word,replacement_word)

                    # CORRECT the "A" to "An"
                    for idx, word in enumerate(poem_replaced.split(" ")):
                        # poem_replaced = poem_replaced+"A organism"
                        if len(word) > 0 and word[0].lower(
                        ) in the_vowels and poem_replaced.split(" ")[
                                idx - 1].lower() == "a":
                            if poem_replaced.split(" ")[idx - 1] == "a":
                                old_str = "a " + poem_replaced.split(" ")[idx]
                                new_str = "an " + poem_replaced.split(" ")[idx]
                            else:
                                old_str = "A " + poem_replaced.split(" ")[idx]
                                new_str = "An " + poem_replaced.split(" ")[idx]
                            poem_replaced = poem_replaced.replace(
                                old_str, new_str)

                        # poem_replaced = poem_replaced+"An consonant"
                        if len(word) > 0 and word[0].lower(
                        ) not in the_vowels and poem_replaced.split(" ")[
                                idx - 1].lower() == "an":
                            if poem_replaced.split(" ")[idx - 1] == "an":
                                old_str = "an " + poem_replaced.split(" ")[idx]
                                new_str = "a " + poem_replaced.split(" ")[idx]
                            else:
                                old_str = "An " + poem_replaced.split(" ")[idx]
                                new_str = "A " + poem_replaced.split(" ")[idx]
                            poem_replaced = poem_replaced.replace(
                                old_str, new_str)
                            #print "FOUND correction needed",old_str,new_str

                    #########################
                    #   WRITE SINGLE POEM   #
                    #########################
                    tmp_poem = ""

                    # poem_replaced.replace("\t","&#9;")
                    # poem_replaced.replace("\n"," <br>")
                    # poem_replaced.replace("\r"," <br>")

                    HTML_poem = ""
                    for line in poem_replaced.split("\n"):
                        lines_total += 1
                        #print "LINE", line
                        HTML_poem += line + "<br>"

                    if len(response) > 0 and len(id.split("_")) > 1:
                        # ALL_poems = ALL_poems_intro + " ".join(i for i in ALL_poems.split("</h2>.")[0:])+"<br><br>~~~~~~~~~~~~~~~~~~~~~~~~~~<br>[ A poem generated from template : <b>"+ author+"</b>, <i>"+ title +"</i> ]<br><br><b>"+new_title+"<br><br></b>"+HTML_poem
                        ALL_poems += "<br><br>~~~~~~~~~~~~~~~~~~~~~~~~~~<br>[ A poem generated from template : <b>" + author + "</b>, <i>" + title + "</i> ]<br><br><b>" + new_title + "<br><br></b>" + HTML_poem

                        tmp_poem = "[A poem generated from template: " + author + ", '" + title + "'']\n\n'" + new_title + "'\nby\n" + new_author + "\n\n" + poem_replaced

                        #####################
                        #                   #
                        #                   #
                        #     PAUSE IT      #
                        #                   #
                        #                   #
                        #####################

                        # sleep_time=0.03*sub_cnt
                        print "sub_cnt=", sub_cnt  # ,"sleep_time=",sleep_time
                        # time.sleep(sleep_time)

                        if sub_cnt >= 1:
                            raw_input("Press Enter to continue...")

                        #####################
                        #                   #
                        #                   #
                        #       PRINT       #
                        #                   #
                        #                   #
                        #####################

                        print "\n******\n" + tmp_poem

                        txt_fn = id.split("_")[1] + "_POEMs.txt"

                        # WRITE_BIO_PATH = DATA_DIR+"generated/POEMS/POEMS_"+datetime.datetime.now().strftime('%Y-%m-%d_%H')+"/"
                        # if not os.path.exists(WRITE_BIO_PATH):
                        #         os.makedirs(WRITE_BIO_PATH)

                        txt_fn_path = GENERATED_DIR + txt_fn
                        f_txt = open(txt_fn_path, 'w')
                        f_txt.write(tmp_poem)  #.encode('utf-8'))
                        f_txt.close()
                        #print "\nTXT file created at:",txt_fn_path

                        # #######
                        # #   write them all.... wasteful... but useful if run is interrupted....
                        # ###########
                        # ALL_poems = ALL_poems.replace("$$datetime$$",datetime.datetime.now().strftime('%Y-%m-%d at %H:%M'))
                        # ALL_poems = ALL_poems.replace("$$cnt$$",str(cnt))
                        # print "cnt",cnt
                        # ALL_poems = ALL_poems.replace("$$gentime$$",str(time.time() - start_time))

                        # # ALL POEMS
                        # txt_fn = datetime.datetime.now().strftime('%Y-%m-%d_%H')+"_poetryFoundation_generatedPOEMS_"+type_of_run+".html"
                        # txt_fn_path = DATA_DIR+"generated/POEMS/"+txt_fn
                        # f_txt=open(txt_fn_path,'w')
                        # f_txt.write(ALL_poems+"</hmtl>")
                        # f_txt.close();
                        # print "\nTXT file created at:",txt_fn_path

                    else:
                        "~~~~~~~~~~~~~~~~!!!!!!!!!! EMPTY response:", author

예제 #2

파일 보기

파일: Alchemy_poetryFoundation_STEP3_POEMS_FEATURE_EXTRACTION_HATCHER_ELO_INPUT_RANDom.py 프로젝트: amshenoy/Big-Data-Poetry

def extractFeaturesAndWriteBio(READ_PATH,file_type):
    
    

    global ALL_poems,bio,cnt, start_time

    inp=0
    sub_cnt=0
    words_total=0
    lines_total=0

    for subdir, dirs, files in os.walk(READ_PATH):
        for file in files:
            
            num_of_files = len(files)-1 # deduct the DS_store
            #print (num_of_files,'readDirectory',READ_PATH)
            
            if file_type in file  and 'readme' not in file:

                # ID
                id=file.split(".")[0]
                #print "\n\n*********\nID:",id

                filenames.append(id)
                cnt+=1

                # print('')
                #print('')
                # print('OPENED:',id)
                # print('')
                #print('')

                ##############
                #  HOW MANY? #
                ##############
                sub_cnt+=1
                if sub_cnt>=inp:
                    if inp != 0:
                        end_time = time.time()
                        es = end_time-start_time
                        print sub_cnt, "poems,\n",lines_total,"lines, &\n",words_total,"words \ngenerated in\n",("%.2f" % es),"seconds"
                        
                    words_total=0
                    lines_total=0

                    # RESTART

                    sub_cnt=0
                    inp = input("\n\n^^^^^^^^^^^^^^\n\nHow many poems do u want? ")
                    print "\n\n^^^^^^^^^^^^^^^"
                    start_time = time.time()

                print 'Poem #',sub_cnt+1

                poem_replaced = ""
                replacement_word = ""
                author=""
                titles=""
                title=""
                new_title=""

                replaced_ls =[]
                new_titles_ls = []
                quit_language=0

                #################################################################
                # Load  POEM TEXT FILE (based on id extracted from Alchemy JSON)    #
                #################################################################

                txt_fn_path = DATA_DIR + READ_TXT_PATH + id.split("_")[1]+".txt"
                #print "txt_fn_path:",txt_fn_path

                if os.path.isfile(txt_fn_path) and cnt>0:
                    txt_data=open(txt_fn_path).read()

                    # http://blog.webforefront.com/archives/2011/02/python_ascii_co.html
                    # txt_data.decode('ISO-8859-2') .decode('utf-8')
                    # unicode(txt_data)

                    author=txt_data.split("****!****")[0].strip(' \t\n\r')
                    
                    title=txt_data.split("****!****")[1].strip(' \t\n\r')
                    
                    bio=txt_data.split("****!****")[2]#.strip(' \t\n\r')

                    ######  CLEAN BIO
                    bio.replace("\t","&#9;")
                    bio.replace("\n"," <br>")
                    bio.replace("\r"," <br>")
                    poem_replaced=bio
                    #print poem_replaced

                    ###############################
                    # REPLACE AUTHOR NAME
                    ##############################
                    author_ln=author.split(" ")[-1]
                    author_fn=author.split(" ")[:-1]
                    #
                    #poem_replaced = poem_replaced.replace(author_ln,"Jhave")

                    #######################
                    # fake AUTHOR
                    #######################
                    
                    new_author= " ".join(random.choice(authors).split(" ")[1:-2])+" "+random.choice(authors).split(" ")[-2]
                    

                    #######################
                    # replace BOOK TITLES
                    #######################
                    #print "TITLES"]
                    new_title = getNewTitle("title").encode('utf-8')
                             

                    ############################
                    # replace years with another
                    ############################
                    for w1 in poem_replaced.split("("):
                        for w2 in w1.split(")"):
                            if w2 is not None and w2.isdigit():
                                new_num = random.randint(int(w2)-5,int(w2)+5)
                                #print "REPLACING #:",w2,new_num
                                poem_replaced = poem_replaced.replace(w2,str(new_num))
                                replaced_ls.append(new_num)                            
                                               

                    #################
                    # Load JSON     #
                    #################
                    response = loadJSONfile(READ_JSON_PATH+"poetryFoundation_"+id.split("_")[1]+"_Alchemy_JSON.txt")

                    if response != "failed":

                        if response.get('entities') is not None:
                            for idx,entity in enumerate(response['entities']):

                                #print idx
                                ce = entity['text'].replace("0xc2"," ")
                                ce = ce.replace("0xe2","'")
                                ce = re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, ce)
                                ce = ce.encode('utf-8')

                                try:
                                    content = ce.decode('utf-8').encode('ascii', 'xmlcharrefreplace')
                                except UnicodeDecodeError:
                                    "AAAARGGGGHHH!!!!"

                                if content in poem_replaced:
                                                       
                                    ################################################
                                    # Replace similar entities from other JSON     #
                                    ################################################
                                    replacement_entity = findSimilarEntityinRandomJSON(content,entity['type'])

                                    cr = re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, replacement_entity)

                                    poem_replaced = poem_replaced.replace(content,replacement_entity)
                                    replaced_ls.append(replacement_entity)
                    

                    ##########################
                    #   POS REPLACMENT       #
                    ##########################

                    token_tuples = nltk.word_tokenize(poem_replaced)
                    tt = nltk.pos_tag(token_tuples)

                    #################
                    #  ADJECTIVES   #
                    #################
                    for i in tt:
                        if "/i" not in i[0] and len(i[0])>3 and i[0] != "died":
                            origw =  re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, i[0])
                            origw =import_utilities.strip_punctuation(origw) 
                            if i[1]=='JJ' :
                                JJr = random.choice(JJ)
                                # # JJr =  re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, JJr)
                                # JJr = import_utilities.strip_punctuation(JJr)
                                JJr = import_utilities.moveBeginAndEndPunctuationFromStrToString(i[0],JJr.lstrip().lstrip())
                                
                                if i[0].istitle():
                                    JJr = JJr.title()

                                poem_replaced = re.sub(r'\b' + import_utilities.strip_punctuation(i[0]) + r'\b', JJr, poem_replaced,1)#poem_replaced.replace(i[0],JJr,1)
                                replaced_ls.append(JJr)
                            if i[1]=='RB':
                                RBr = random.choice(RB)
                                RBr = import_utilities.moveBeginAndEndPunctuationFromStrToString(i[0],RBr.lstrip().lstrip())

                                if i[0].istitle():
                                    RBr = RBr.title()
                                poem_replaced = re.sub(r'\b' + import_utilities.strip_punctuation(i[0])  + r'\b', RBr, poem_replaced,1)
                                replaced_ls.append(RBr)


                    ########################
                    # IS IT ENGLISH?       #
                    ########################
                    for line  in poem_replaced.split('\n\r'):

                        

                        if len(line)>0 :
                            if "english" not in import_utilities.get_language(line):
                                quit_language+=1
                                #print "NOT english:",quit_language,line
                            else:
                                quit_language-=1

                    
                    #########################
                    #   SYNSET REPLACE      #
                    #########################
                    for idx,word in enumerate(poem_replaced.split(' ')):


                        

                        if "<br>" not in word and "&#9;" not in word and len(word)>0 and "~~~~!~~~" not in word:


                            words_total+=1

                            #########################
                            #   PRONOUN ' VERB      #
                            #########################
                            if len(word.split("'"))>1:
                                if word.split("'")[0] in personal_pronouns:
                                    replacement_word = random.choice(personal_pronouns)+"'"+word.split("'")[1]+' '
                                poem_replaced.replace(word,replacement_word)             
                                #print "word,",word,"replacement_word:",replacement_word
                           
                            ####################################################
                            # Replacement of OTHERs                            #
                            ####################################################

                            elif not word.lower().strip(" \n\t\r") in stopwords.words('english'):

                                # take off leading brackets, commas etc...
                                word_punct_nopunct = import_utilities.strip_punctuation_bool(word)
                                word_nopunct = word_punct_nopunct['word'].strip(" \n\t\r")
                                word_punct = word_punct_nopunct['punct']
                                punct_bool = word_punct_nopunct['punct_bool']

                             

                                #######################################################
                                # MAIN EXCHANGE PROCESS CALL >>>>>>>   GET THE SYNSET #
                                #######################################################    
                                if word_nopunct[-4:].lower()=="here":
                                    similarterm=random.choice(import_utilities.heres)
                                else:
                                    #print "WORD:",word_nopunct
                                    if len(word_nopunct)>2:
                                        similarterm = import_utilities.find_synset_word(word_nopunct)#(word.lstrip().rstrip())

                                
                                ############################################
                                # manually get rid of some terrible choices
                                ############################################
                                if similarterm == "ilk":
                                    ##print "like"
                                    similarterm = "like"
                                if similarterm == "ope":
                                    ##print "doth"
                                    similarterm = "does"
                                if similarterm == "information technology":
                                    ##print "doth"
                                    similarterm = "it"

                                #######################################                      
                                # abbreviations for f*****g states!   #
                                #######################################
                                if word_nopunct.upper() in import_utilities.state_abbrev and word_nopunct.lower() not in stopwords.words('english') and "me," not in word:
                                    tmp = similarterm
                                    if word_nopunct == "oh": 
                                        similarterm = random.choice(import_utilities.exclaims)
                                    else:
                                        similarterm = random.choice(RESERVOIR)
                                    #print word_nopunct," replaced by", tmp, "replaced with:",similarterm, "in:",line

                                ##############
                                # hyphenated #
                                ##############
                                hyp =word.split("-")
                                #print word,len(hyp)
                                if len(hyp) >1:
                                    similarterm=""
                                    for w in hyp:
                                        if len(w) > 2:
                                            similarterm +=  import_utilities.find_synset_word(w)+"-"
                                    similarterm = import_utilities.strip_underscore(similarterm[:-1])
                                    #print "hyphenated:",word,"replaced by: "+similarterm
                                        


                                
                                #########################################################    
                                # is it a TRUNCATED VERB slang as in singin or wishin   #
                                #########################################################
                                if similarterm == word_nopunct and len(word)>2 and 'in' in word_nopunct[-2:]:
                                    similarterm = import_utilities.find_synset_word(word_nopunct+'g')
                                    ## #print "TRUNCATED SLANG word: '"+word+"'",similarterm
                                    interim = import_utilities.lemma(similarterm)
                                    ## #print interim
                                    similarterm = import_utilities.conjugate(interim, tense=import_utilities.PARTICIPLE, parse=True)[:-1] 
                                    # # # #print word,"widx:",widx," line_pos_tags[widx][0]:",line_pos_tags[widx][0]," line_pos_tags[widx][1]:",line_pos_tags[widx][1]
                                   

                                #################      
                                # SWEAR WORD    #
                                #################
                                ##print "at the garden of if:", word
                                if word_nopunct in import_utilities.curses:
                                    similarterm = random.choice(import_utilities.curses)
                                    ##print "SWEAR WORD word: '"+word+"'",similarterm


                                if len(hyp) >1:
                                    replacement_word = similarterm
                                else:
                                    replacement_word = word.replace(word_nopunct, similarterm)
                                    replacement_word = import_utilities.strip_underscore(replacement_word)
                                    replacement_word = import_utilities.replaceNumbers(replacement_word)

                                #########################
                                # RESERVOIR_OF_WEIRDNESS  #
                                #########################  

                                if word_nopunct.lower() in import_utilities.impera:
                                    replacement_word=random.choice(import_utilities.impera)
                                    #print word,"IMPERA:",replacement_word
                                elif word_nopunct.lower() in import_utilities.conjuncts:
                                    replacement_word=random.choice(import_utilities.conjuncts)
                                    #print word," CONJUNCTION replaced with",replacement_word
                                elif word_nopunct.lower() in import_utilities.indef_prono:
                                    replacement_word=random.choice(import_utilities.indef_prono)
                                    #print word," INDEF_prono replaced with",replacement_word
                                elif word_nopunct.lower() in import_utilities.prepo:
                                    replacement_word=random.choice(import_utilities.prepo)
                                    #print word," prepo replaced with",replacement_word
                                elif word_nopunct.lower() in import_utilities.rel_prono:
                                    replacement_word=word
                                    #print word," rel_prono LEAVE alone: ",replacement_word
                                elif word_nopunct.lower()[-2:] =="ly":
                                    replacement_word=import_utilities.strip_underscore(import_utilities.find_synset_word(word))#(word[:-2])
                                    #print word," ADVERB: ",replacement_word
                                    # if replacement_word[-2:] !="ly":
                                    #     replacement_word +="ly"
                                                                            
                                else:
                                    if len(hyp) <2 and "like" not in word_nopunct and import_utilities.singularize(word_nopunct) ==  import_utilities.singularize(replacement_word) and word_nopunct.lower() not in import_utilities.stopwords_ls:

                                        if word_nopunct not in RESERVOIR and quit_language<0 and import_utilities.countPunctuation(word)<1 and len(word_nopunct)>3 and not word_nopunct.istitle(): 
                                            
                                            #print "ADDING",word,"to reservoir"
                                            RESERVOIR.append(word)
                                            
                                            replacement_word = random.choice(RESERVOIR)
                                            #print word_nopunct,"replaced from reservoir with", replacement_word
                                       # print "'"+word_nopunct+"'  vs RESERVOIR  replacement_word:",replacement_word #,"    new_line:",new_line
                                if quit_language>1 and not word_nopunct.istitle():
                                    #print quit_language, "Probably foreign language: make a word salad in english"
                                    replacement_word = random.choice(RESERVOIR)
                                    #print word_nopunct,"OTHER replaced from reservoir with", replacement_word
                                

                                # REPLACEMENT
                                poem_ls = poem_replaced.split(' ')
                                idx =  poem_ls.index(word)


                                # #print idx,",", poem_ls[idx],",", word ,",",replacement_word

                                if poem_ls[idx]==word:
                                    poem_ls[idx]=replacement_word
                                poem_replaced = " ".join(poem_ls)


                                #poem_replaced = poem_replaced.replace(word,replacement_word)



                    # CORRECT the "A" to "An"    
                    for idx,word in enumerate(poem_replaced.split(" ")):
                        # poem_replaced = poem_replaced+"A organism"
                        if len(word)>0 and word[0].lower() in the_vowels and poem_replaced.split(" ")[idx-1].lower() =="a" :      
                                if poem_replaced.split(" ")[idx-1] =="a":
                                    old_str = "a "+poem_replaced.split(" ")[idx]    
                                    new_str = "an "+poem_replaced.split(" ")[idx]
                                else:
                                    old_str = "A "+poem_replaced.split(" ")[idx]    
                                    new_str = "An "+poem_replaced.split(" ")[idx]
                                poem_replaced = poem_replaced.replace(old_str,new_str)

                        # poem_replaced = poem_replaced+"An consonant"
                        if len(word)>0 and word[0].lower() not in the_vowels and poem_replaced.split(" ")[idx-1].lower() =="an" :      
                                if poem_replaced.split(" ")[idx-1] =="an":
                                    old_str = "an "+poem_replaced.split(" ")[idx]    
                                    new_str = "a "+poem_replaced.split(" ")[idx]
                                else:
                                    old_str = "An "+poem_replaced.split(" ")[idx]    
                                    new_str = "A "+poem_replaced.split(" ")[idx]
                                poem_replaced = poem_replaced.replace(old_str,new_str)
                                #print "FOUND correction needed",old_str,new_str


                    #########################
                    #   WRITE SINGLE POEM   #
                    #########################
                    tmp_poem=""   

                    # poem_replaced.replace("\t","&#9;")
                    # poem_replaced.replace("\n"," <br>")
                    # poem_replaced.replace("\r"," <br>")

                    HTML_poem=""
                    for line in poem_replaced.split("\n"):
                        lines_total+=1
                        #print "LINE", line
                        HTML_poem += line+"<br>"

                    if len(response) >0 and len(id.split("_"))>1:
                        # ALL_poems = ALL_poems_intro + " ".join(i for i in ALL_poems.split("</h2>.")[0:])+"<br><br>~~~~~~~~~~~~~~~~~~~~~~~~~~<br>[ A poem generated from template : <b>"+ author+"</b>, <i>"+ title +"</i> ]<br><br><b>"+new_title+"<br><br></b>"+HTML_poem
                        ALL_poems += "<br><br>~~~~~~~~~~~~~~~~~~~~~~~~~~<br>[ A poem generated from template : <b>"+ author+"</b>, <i>"+ title +"</i> ]<br><br><b>"+new_title+"<br><br></b>"+HTML_poem

                        tmp_poem= "[A poem generated from template: "+ author+", '"+ title +"'']\n\n'"+new_title+"'\nby\n"+new_author+"\n\n"+poem_replaced

                        print "\n******\n"+tmp_poem
                        #print "\nORIGINAL:",bio
                

                        txt_fn = id.split("_")[1]+"_POEMs.txt"

                        # WRITE_BIO_PATH = DATA_DIR+"generated/POEMS/POEMS_"+datetime.datetime.now().strftime('%Y-%m-%d_%H')+"/"
                        # if not os.path.exists(WRITE_BIO_PATH):
                        #         os.makedirs(WRITE_BIO_PATH)

                        txt_fn_path = GENERATED_DIR+txt_fn
                        f_txt=open(txt_fn_path,'w')
                        f_txt.write(tmp_poem)#.encode('utf-8'))       
                        f_txt.close();   
                        #print "\nTXT file created at:",txt_fn_path

                        
                        # #######
                        # #   write them all.... wasteful... but useful if run is interrupted....
                        # ###########    
                        # ALL_poems = ALL_poems.replace("$$datetime$$",datetime.datetime.now().strftime('%Y-%m-%d at %H:%M'))
                        # ALL_poems = ALL_poems.replace("$$cnt$$",str(cnt))
                        # print "cnt",cnt
                        # ALL_poems = ALL_poems.replace("$$gentime$$",str(time.time() - start_time))

                        # # ALL POEMS
                        # txt_fn = datetime.datetime.now().strftime('%Y-%m-%d_%H')+"_poetryFoundation_generatedPOEMS_"+type_of_run+".html"
                        # txt_fn_path = DATA_DIR+"generated/POEMS/"+txt_fn
                        # f_txt=open(txt_fn_path,'w')
                        # f_txt.write(ALL_poems+"</hmtl>")       
                        # f_txt.close();   
                        # print "\nTXT file created at:",txt_fn_path





                    else:
                        "~~~~~~~~~~~~~~~~!!!!!!!!!! EMPTY response:", author

예제 #3

파일 보기

파일: Alchemy_poetryFoundation_STEP3_POEMS_FEATURE_EXTRACTION_PERVERTED_LOVE_VERSION.py 프로젝트: jhave/DS_HK_2

def extractFeaturesAndWriteBio(READ_PATH, file_type):

    global ALL_poems, bio, cnt

    for subdir, dirs, files in os.walk(READ_PATH):
        for file in files:

            num_of_files = len(files) - 1  # deduct the DS_store
            #print (num_of_files,'readDirectory',READ_PATH)

            if file_type in file and 'readme' not in file:

                # ID
                id = file.split(".")[0]
                print "\nID:", id.split("_")[1]

                filenames.append(id)
                cnt += 1

                # print('')
                # print('')
                # print('OPENED:',id)
                # print('')
                # print('')

                poem_replaced = ""
                replacement_word = ""
                previous_replacement_word = ""

                author = ""
                titles = ""
                title = ""
                new_title = ""

                replaced_ls = []
                new_titles_ls = []
                quit_language = 0
                oscillator = 0

                # if EXCEPTION is raised... do not add to html
                SKIP_bool = False

                ##########################
                # Load  POEM TEXT FILE     #
                ##########################

                txt_fn_path = DATA_DIR + READ_TXT_PATH + id.split(
                    "_")[1] + ".txt"
                #print "txt_fn_path:",txt_fn_path

                if os.path.isfile(txt_fn_path) and cnt > 0:
                    txt_data = open(txt_fn_path).read()

                    # http://blog.webforefront.com/archives/2011/02/python_ascii_co.html
                    # txt_data.decode('ISO-8859-2') .decode('utf-8')
                    # unicode(txt_data)

                    author = txt_data.split("****!****")[0].strip(' \t\n\r')

                    title = txt_data.split("****!****")[1].strip(' \t\n\r')

                    bio = txt_data.split("****!****")[2]  #.strip(' \t\n\r')

                    ######  CLEAN BIO
                    bio.replace("\t", "&#9;")
                    bio.replace("\n", " <br>")
                    bio.replace("\r", " <br>")
                    poem_replaced = bio
                    #print poem_replaced

                    ###############################
                    # REPLACE AUTHOR NAME in poem
                    ##############################
                    author_ln = author.split(" ")[-1].lstrip()
                    author_fn = author.split(" ")[:-1]
                    author = " ".join(n for n in author_fn) + author_ln
                    #
                    #poem_replaced = poem_replaced.replace(author_ln,"Jhave")

                    #######################
                    # replace BOOK TITLES
                    #######################
                    #print "TITLES"]
                    new_title = getNewTitle("title").encode('utf-8')

                    #######################
                    # fake AUTHOR
                    #######################

                    new_author = " ".join(
                        random.choice(authors).split(" ")
                        [1:-2]) + " " + random.choice(authors).split(" ")[-2]
                    #print "new AUTHOR",new_author

                    ############################
                    # replace years with another
                    ############################
                    for w1 in poem_replaced.split("("):
                        for w2 in w1.split(")"):
                            if w2 is not None and w2.isdigit():
                                new_num = random.randint(
                                    int(w2) - 5,
                                    int(w2) + 5)
                                #print "REPLACING #:",w2,new_num
                                poem_replaced = poem_replaced.replace(
                                    w2, str(new_num))
                                replaced_ls.append(new_num)

                    #################
                    # Load JSON     #
                    #################
                    response = loadJSONfile(READ_JSON_PATH +
                                            "poetryFoundation_" +
                                            id.split("_")[1] +
                                            "_Alchemy_JSON.txt")

                    if response != "failed":

                        if response.get('entities') is not None:
                            for idx, entity in enumerate(response['entities']):

                                #print idx
                                ce = entity['text'].replace("0xc2", " ")
                                ce = ce.replace("0xe2", "'")
                                ce = re.sub(
                                    '(' +
                                    '|'.join(import_utilities.chars.keys()) +
                                    ')', import_utilities.replace_chars, ce)
                                ce = ce.encode('utf-8')

                                try:
                                    content = ce.decode('utf-8').encode(
                                        'ascii', 'xmlcharrefreplace')
                                except UnicodeDecodeError:
                                    "AAAARGGGGHHH!!!!"

                                if content in poem_replaced:

                                    ################################################
                                    # Replace similar entities from other JSON     #
                                    ################################################
                                    replacement_entity = findSimilarEntityinRandomJSON(
                                        content, entity['type'])

                                    cr = re.sub(
                                        '(' + '|'.join(
                                            import_utilities.chars.keys()) +
                                        ')', import_utilities.replace_chars,
                                        replacement_entity)

                                    poem_replaced = poem_replaced.replace(
                                        content, replacement_entity)
                                    replaced_ls.append(replacement_entity)

                    ##########################
                    #   POS REPLACMENT       #
                    ##########################

                    token_tuples = nltk.word_tokenize(poem_replaced)
                    tt = nltk.pos_tag(token_tuples)

                    #################
                    #  ADJECTIVES   #
                    #################
                    for i in tt:
                        if "/i" not in i[0] and len(
                                i[0]) > 2 and i[0] != "died":
                            origw = re.sub(
                                '(' + '|'.join(import_utilities.chars.keys()) +
                                ')', import_utilities.replace_chars, i[0])
                            origw = import_utilities.strip_punctuation(origw)
                            if i[1] == 'JJ':
                                JJr = random.choice(JJ)
                                # # JJr =  re.sub('(' + '|'.join(import_utilities.chars.keys()) + ')', import_utilities.replace_chars, JJr)
                                # JJr = import_utilities.strip_punctuation(JJr)
                                JJr = import_utilities.moveBeginAndEndPunctuationFromStrToString(
                                    i[0],
                                    JJr.lstrip().lstrip())

                                if i[0].istitle():
                                    JJr = JJr.title()

                                poem_replaced = re.sub(
                                    r'\b' +
                                    import_utilities.strip_punctuation(i[0]) +
                                    r'\b', JJr, poem_replaced,
                                    1)  #poem_replaced.replace(i[0],JJr,1)
                                replaced_ls.append(JJr)
                            if i[1] == 'RB':
                                RBr = random.choice(RB)
                                RBr = import_utilities.moveBeginAndEndPunctuationFromStrToString(
                                    i[0],
                                    RBr.lstrip().lstrip())

                                if i[0].istitle():
                                    RBr = RBr.title()
                                poem_replaced = re.sub(
                                    r'\b' +
                                    import_utilities.strip_punctuation(i[0]) +
                                    r'\b', RBr, poem_replaced, 1)
                                replaced_ls.append(RBr)
                                #print "RBr=",RBr,"repaced",i[0]

                    ########################
                    # IS IT ENGLISH?       #
                    ########################
                    for line in poem_replaced.split('\n\r'):
                        if len(line) > 0:
                            if "english" not in import_utilities.get_language(
                                    line):
                                quit_language += 1
                                #print "NOT english:",quit_language,line
                            else:
                                quit_language -= 1

                    #########################
                    #   SYNSET REPLACE      #
                    #########################
                    for idx, word in enumerate(poem_replaced.split(' ')):

                        if "<br>" not in word and "&#9;" not in word and len(
                                word) > 0:

                            #########################
                            #   PRONOUN ' VERB      #
                            #########################
                            if len(word.split("'")) > 1:
                                if word.split("'")[0] in personal_pronouns:
                                    replacement_word = random.choice(
                                        personal_pronouns) + "'" + word.split(
                                            "'")[1] + ' '
                                poem_replaced.replace(word, replacement_word)
                                #print "word,",word,"replacement_word:",replacement_word

                            ####################################################
                            # Replacement of OTHERs                            #
                            ####################################################

                            else:
                                # elif not word.lower().strip(" \n\t\r") in stopwords.words('english'):

                                # take off leading brackets, commas etc...
                                word_punct_nopunct = import_utilities.strip_punctuation_bool(
                                    word)
                                word_nopunct = word_punct_nopunct[
                                    'word'].strip(" .\n\t\r")
                                word_punct = word_punct_nopunct['punct']
                                punct_bool = word_punct_nopunct['punct_bool']

                                #print "word_nopunct:",word_nopunct

                                #######################################################
                                # MAIN EXCHANGE PROCESS CALL >>>>>>>   GET THE SYNSET #
                                #######################################################
                                similarterm = ""
                                if word_nopunct[-4:].lower() == "here":
                                    similarterm = random.choice(
                                        import_utilities.heres)
                                else:
                                    #print "WORD:",word_nopunct
                                    if len(word_nopunct) > 3:

                                        oscillator = oscillator + 1

                                        ############################################
                                        #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                                        # STYLE SWITCH..... should in future use POS
                                        # ... i.e. if noun & oscillator%3, do...
                                        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
                                        ############################################

                                        similarterm = import_utilities.find_synset_word(
                                            word_nopunct)

                                        # synset
                                        # if oscillator%4==0:
                                        #     # SYNSET
                                        #     similarterm = import_utilities.find_synset_word(word_nopunct)
                                        #     #print "synset", similarterm

                                        # elif oscillator%3==0:
                                        #     # RAP MOUTH
                                        #     similarterm = random.choice(rap_mouth)
                                        #     #print "rap",similarterm

                                        # # elif oscillator%2==0:
                                        # else:
                                        #     similarterm = import_utilities.find_synset_word(word_nopunct)
                                        # # SCIENCE MOUTH
                                        # similarterm = random.choice(science_mouth)
                                        # if similarterm.endswith('logy'):
                                        #         similarterm = similarterm[:-4]
                                        # if similarterm.endswith('o'):
                                        #         similarterm = similarterm[:-1]
                                        #print "science_mouth",similarterm
                                        # if len(similarterm)<6:
                                        #     similarterm = random.choice(import_utilities.curses)

                                        # else:
                                        #     # FILTH
                                        #     print "filth"
                                        #     similarterm = random.choice(import_utilities.curses)

                                ############################################
                                # manually get rid of some terrible choices
                                ############################################
                                if similarterm == "ilk":
                                    ##print "like"
                                    similarterm = "like"
                                if similarterm == "ope":
                                    ##print "doth"
                                    similarterm = "does"
                                if similarterm == "information technology":
                                    ##print "doth"
                                    similarterm = "it"
                                if similarterm == "velleity":
                                    ##print "doth"
                                    similarterm = "want"
                                if similarterm == "Crataegus laevigata":
                                    ##print "doth"
                                    similarterm = "may"
                                if similarterm == "brunet" or similarterm == "ot":
                                    ##print "doth"
                                    similarterm = random.choice(
                                        import_utilities.curses)
                                if similarterm == "ge":
                                    ##print "doth"
                                    similarterm = random.choice(science_mouth)
                                if similarterm.lower() == "nox":
                                    ##print "doth"
                                    similarterm = random.choice(science_mouth)
                                if similarterm.lower() == "paunited":
                                    print "################### paUnited ###################"
                                    similarterm = word

                                #######################################
                                # abbreviations for f*****g states!   #
                                #######################################
                                if word_nopunct.upper(
                                ) in import_utilities.state_abbrev and word_nopunct.lower(
                                ) not in stopwords.words(
                                        'english') and "me," not in word:
                                    tmp = similarterm
                                    if word_nopunct == "oh":
                                        similarterm = random.choice(
                                            import_utilities.exclaims)
                                    else:
                                        similarterm = random.choice(
                                            rap_mouth)  # RESERVOIR)RESERVOIR)
                                    #print word_nopunct," replaced by", tmp, "replaced with:",similarterm, "in:",line

                                ##############
                                # hyphenated #
                                ##############
                                hyp = word.split("-")
                                #print word,len(hyp)
                                if len(hyp) > 1:
                                    similarterm = ""
                                    for w in hyp:
                                        if len(w) > 2:
                                            similarterm += import_utilities.find_synset_word(
                                                w) + "-"
                                    similarterm = import_utilities.strip_underscore(
                                        similarterm[:-1])
                                    #print "hyphenated:",word,"replaced by: "+similarterm

                                #########################################################
                                # is it a TRUNCATED VERB slang as in singin or wishin   #
                                #########################################################
                                if similarterm == word_nopunct and len(
                                        word
                                ) > 2 and 'in' in word_nopunct[-2:]:
                                    similarterm = import_utilities.find_synset_word(
                                        word_nopunct + 'g')
                                    #print "TRUNCATED SLANG word: '"+word+"'",similarterm
                                    interim = import_utilities.lemma(
                                        similarterm)
                                    ## #print interim
                                    similarterm = import_utilities.conjugate(
                                        interim,
                                        tense=import_utilities.PARTICIPLE,
                                        parse=True)[:-1]
                                    # # # #print word,"widx:",widx," line_pos_tags[widx][0]:",line_pos_tags[widx][0]," line_pos_tags[widx][1]:",line_pos_tags[widx][1]

                                #################
                                # SWEAR WORD    #
                                #################
                                ##print "at the garden of if:", word
                                if word_nopunct in import_utilities.curses:
                                    similarterm = random.choice(
                                        import_utilities.curses)
                                    #print "SWEAR WORD word: '"+word+"'",similarterm

                                if len(hyp) > 1:
                                    replacement_word = similarterm
                                else:
                                    replacement_word = word.replace(
                                        word_nopunct, similarterm)
                                    replacement_word = import_utilities.strip_underscore(
                                        replacement_word)
                                    replacement_word = import_utilities.replaceNumbers(
                                        replacement_word)

                                #print "replacement_word:",replacement_word

                                #########################
                                # RESERVOIR_OF_WEIRDNESS  #
                                #########################

                                if word_nopunct.lower(
                                ) in import_utilities.impera:
                                    replacement_word = random.choice(
                                        import_utilities.impera)
                                    #print word,"IMPERA:",replacement_word
                                elif word_nopunct.lower(
                                ) in import_utilities.conjuncts:
                                    replacement_word = random.choice(
                                        import_utilities.conjuncts)
                                    #print word," CONJUNCTION replaced with",replacement_word
                                elif word_nopunct.lower(
                                ) in import_utilities.indef_prono:
                                    replacement_word = random.choice(
                                        import_utilities.indef_prono)
                                    #print word," INDEF_prono replaced with",replacement_word
                                elif word_nopunct.lower(
                                ) in import_utilities.prepo:
                                    replacement_word = random.choice(
                                        import_utilities.prepo)
                                    #print word," prepo replaced with",replacement_word
                                elif word_nopunct.lower(
                                ) in import_utilities.rel_prono:
                                    replacement_word = word
                                    #print word," rel_prono LEAVE alone: ",replacement_word
                                elif word_nopunct.lower(
                                )[-2:] == "ly" or word_nopunct.lower(
                                )[-3:] == "ly.":
                                    replacement_word = import_utilities.strip_underscore(
                                        import_utilities.find_synset_word(
                                            word))  #(word[:-2])
                                    #print word," ADVERB: ",replacement_word
                                    # if replacement_word[-2:] !="ly":
                                    #     replacement_word +="ly"

                                else:
                                    if len(
                                            hyp
                                    ) < 2 and "like" not in word_nopunct and import_utilities.singularize(
                                            word_nopunct
                                    ) == import_utilities.singularize(
                                            replacement_word
                                    ) and word_nopunct.lower(
                                    ) not in import_utilities.stopwords_ls:

                                        if word not in RESERVOIR and import_utilities.countPunctuation(
                                                word
                                        ) < 1 and len(
                                                word_nopunct
                                        ) > 3 and not word_nopunct.istitle():

                                            if len(
                                                    word
                                            ) > 4 and english_dict.check(word):
                                                #print "ADDING",word,"to reservoir"
                                                RESERVOIR.append(word)
                                                #RESERVOIR = list(set())

                                            replacement_word = random.choice(
                                                RESERVOIR)
                                            #print word_nopunct,"replaced from reservoir with", replacement_word
                                    # print "'"+word_nopunct+"'  vs RESERVOIR  replacement_word:",replacement_word #,"    new_line:",new_line
                                if quit_language > 1 and not word_nopunct.istitle(
                                ):
                                    #print quit_language, "Probably foreign language: make a word salad in english"
                                    replacement_word = random.choice(
                                        rap_mouth)  #RESERVOIR)
                                    #print word_nopunct,"OTHER replaced from reservoir with", replacement_word

                                ###################################################
                                # MOST REPLACEMENT occurs here...                 #
                                ###################################################
                                poem_ls = poem_replaced.split(' ')
                                idx = poem_ls.index(word)

                                #print idx,",", poem_ls[idx],",", word ,",",replacement_word
                                #print word ," --- ",previous_replacement_word,replacement_word

                                try:
                                    #print "poem_ls[idx]",poem_ls[idx],"word",word
                                    if poem_ls[
                                            idx] == word and "****" not in word and "." != word and "\n" not in word:
                                        # if "\n" in word:
                                        #     replacement_word=replacement_word+"\n"
                                        # if replacement_word=="":
                                        #     replacement_word=random.choice(RESERVOIR)
                                        poem_ls[
                                            idx] = replacement_word  #.encode('utf-8')
                                        "REPLACE", word, "with", replacement_word
                                    poem_replaced = " ".join(poem_ls)

                                    # store this word so that conjugation can be checked
                                    previous_replacement_word = replacement_word
                                except Exception, e:
                                    print "PENULTIMATE SKIP_bool replace FAIL", e
                                    SKIP_bool = True
                                    continue

                    ###########################################################################
                    # testing Pattern.en as parser for conjugation and article replacement    #
                    # much more robust than my hand-coded hacks                               #
                    ###########################################################################

                    # correct CONJUGATion of paticiple verbs with pattern.en
                    parsed = parse(poem_replaced, tags=True)
                    pre_verbal = ["'m", "'s", "'re"]
                    for idx, p in enumerate(parsed.split(" ")):
                        tok = p.split("/")[0]
                        typ = p.split("/")[1]
                        #print idx,tok,typ
                        if tok in pre_verbal:
                            #print "pre_verbal:",tok
                            next_word = parsed.split(" ")[idx + 1].split("/")

                            # try try try
                            for ix, n in enumerate(next_word):
                                next_word[ix] = re.sub(
                                    '(' +
                                    '|'.join(import_utilities.chars.keys()) +
                                    ')', import_utilities.replace_chars,
                                    n).encode('utf-8')
                            try:
                                #print  next_word,next_word[0],next_word[1][:2]
                                # if it's a verb that follows
                                if next_word[1][:2] == "VB":
                                    before_verb = " ".join(
                                        w for w in poem_replaced.split(" ")
                                        [:idx])  #.encode('utf-8')
                                    after_verb = " ".join(
                                        w for w in poem_replaced.split(" ")
                                        [idx + 1:])  #.encode('utf-8')
                                    new_verb = conjugate(
                                        next_word[0],
                                        tense=PARTICIPLE,
                                        parse=True).encode('utf-8')
                                    # insert new
                                    #print "CONJUGATION needed, changing:",poem_replaced.split(" ")[idx],"to",parsed.split(" ")[idx],poem_replaced.split(" ")[idx-1]+" "+new_verb
                                    poem_replaced = before_verb + " " + new_verb + " " + after_verb
                            except Exception, e:
                                #print "INside parsed COnjugation loop",e
                                continue

                    # correct ARTICLES
                    for idx, word in enumerate(poem_replaced.split(" ")):
                        if len(word) > 0 and idx != 0 and " " not in word:
                            # A or AN
                            if poem_replaced.split(" ")[idx - 1].lower(
                            ) == "a" or poem_replaced.split(" ")[
                                    idx - 1].lower() == "an":
                                #print word,"---",article(word)+" "+word
                                before_article = " ".join(
                                    w for w in poem_replaced.split(" ")[:idx -
                                                                        1])
                                after_article = " ".join(
                                    w for w in poem_replaced.split(" ")[idx +
                                                                        1:])
                                new_conj = referenced(word)
                                # capitalize
                                if poem_replaced.split(" ")[idx - 1].istitle():
                                    new_conj = new_conj.split(" ")[0].title(
                                    ) + " " + new_conj.split(" ")[1]
                                poem_replaced = before_article + " " + new_conj + " " + after_article

                    #########################
                    #   WRITE SINGLE POEM   #
                    #########################
                    if not SKIP_bool:

                        tmp_poem = ""

                        # poem_replaced.replace("\t","&#9;")
                        # poem_replaced.replace("\n"," <br>")
                        # poem_replaced.replace("\r"," <br>")

                        HTML_poem = ""
                        for line in poem_replaced.split("\n"):
                            #print "LINE", line
                            HTML_poem += line + "<br>"

                        if len(response) > 0 and len(id.split("_")) > 1:
                            # ALL_poems = ALL_poems_intro + " ".join(i for i in ALL_poems.split("</h2>.")[0:])+"<br><br>~~~~~~~~~~~~~~~~~~~~~~~~~~<br>[ A poem generated from template : <b>"+ author+"</b>, <i>"+ title +"</i> ]<br><br><b>"+new_title+"<br><br></b>"+HTML_poem

                            try:
                                ALL_poems = "<br>[ A  generated-poem based upon: <i>" + title + "</i> by <b>" + author + "</b>]<br><br><i>" + new_title + "</i><br> by <b>" + new_author + "</b><br>" + HTML_poem + ALL_poems.split(
                                    "</h2>")[1].replace("  ", "&nbsp")

                                tmp_poem = "[A generated-poem based upon: '" + title + "' by " + author + "]\n\n" + new_title + "\nby " + new_author + "\n" + poem_replaced

                                print "\n~~~\n\n" + tmp_poem
                                #print "\nORIGINAL:",bio

                                txt_fn = id.split("_")[1] + "_POEMs.txt"

                                WRITE_BIO_PATH = DATA_DIR + "generated/POEMS/POEMS_" + datetime.datetime.now(
                                ).strftime('%Y-%m-%d_%H') + "/"
                                if not os.path.exists(WRITE_BIO_PATH):
                                    os.makedirs(WRITE_BIO_PATH)

                                txt_fn_path = WRITE_BIO_PATH + txt_fn
                                f_txt = open(txt_fn_path, 'w')
                                f_txt.write(tmp_poem)  #.encode('utf-8'))
                                f_txt.close()
                                #print "\nTXT file created at:",txt_fn_path

                                #######
                                #   write them all.... wasteful... but useful if run is interrupted....
                                ###########

                                # if cnt==1:
                                #     ALL_poems = ALL_poems_intro+ALL_poems
                                # else:
                                ALL_poems = ALL_poems_intro + ALL_poems.replace(
                                    "  ", "&nbsp")
                                ALL_poems = ALL_poems.replace(
                                    "$$datetime$$",
                                    datetime.datetime.now().strftime(
                                        '%Y-%m-%d at %H:%M'))
                                ALL_poems = ALL_poems.replace(
                                    "$$cnt$$", str(cnt))
                                #print "cnt",cnt
                                ALL_poems = ALL_poems.replace(
                                    "$$gentime$$",
                                    str(time.time() - start_time))

                                # ALL POEMS
                                txt_fn = datetime.datetime.now().strftime(
                                    '%Y-%m-%d_%H'
                                ) + "_poetryFoundation_generatedPOEMS_" + type_of_run + ".html"
                                txt_fn_path = DATA_DIR + "generated/POEMS/" + txt_fn
                                f_txt = open(txt_fn_path, 'w')
                                f_txt.write(ALL_poems + "</hmtl>")
                                f_txt.close()
                                #print "\nTXT file created at:",txt_fn_path
                            except Exception, e:
                                print "At the final LOOP", e
                                continue

                        else:
                            print "~! EMPTY response:", author

                    else:
                        cnt = cnt - 1

예제 #4

파일 보기

                                                    similarterm +=  import_utilities.find_synset_word(w)+"-"
                                            similarterm = import_utilities.strip_underscore(similarterm[:-1])
                                            #print "hyphenated:",word,"replaced by: "+similarterm
                                                


                                        
                                        #########################################################    
                                        # is it a TRUNCATED VERB slang as in singin or wishin   #
                                        #########################################################
                                        if similarterm == word_nopunct and len(word)>2 and 'in' in word_nopunct[-2:]:
                                            similarterm = import_utilities.find_synset_word(word_nopunct+'g')
                                            ## #print "TRUNCATED SLANG word: '"+word+"'",similarterm
                                            interim = import_utilities.lemma(similarterm)
                                            ## #print interim
                                            similarterm = import_utilities.conjugate(interim, tense=import_utilities.PARTICIPLE, parse=True)[:-1] 
                                            # # # #print word,"widx:",widx," line_pos_tags[widx][0]:",line_pos_tags[widx][0]," line_pos_tags[widx][1]:",line_pos_tags[widx][1]
                                           

                                        #################      
                                        # SWEAR WORD    #
                                        #################
                                        ##print "at the garden of if:", word
                                        if word_nopunct in import_utilities.curses:
                                            similarterm = random.choice(import_utilities.curses)
                                            ##print "SWEAR WORD word: '"+word+"'",similarterm


                                        if len(hyp) >1:
                                            replacement_word = similarterm
                                        else: