Python Parser.getKeywords 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: myParser

클래스/타입: Parser

메소드/함수: getKeywords

hotexamples.com에서의 예제들: 2

Python Parser.getKeywords - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 myParser.Parser.getKeywords에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Parser(10)

get_titles(3)

getKeywords(2)

parse(2)

get_time(2)

get_paragraphs(2)

get_tags(2)

getConstant(1)

getAddress(1)

dest(1)

advance(1)

comp(1)

commandType(1)

hasMoreCommands(1)

isConstant(1)

jump(1)

closeFile(1)

parseCoreData(1)

parse_known_args(1)

resetFile(1)

symbol(1)

예제 #1

파일 보기

파일: summarizer.py 프로젝트: JeffreyHoa/legull

class Summarizer:
    def __init__(self):
        self.parser = Parser()

    '''
    test : it is catchphrase, one sentence.
    title: sentence list.
    '''
    def summarize(self, text, title):
        sentences = text
        result = []

        ## step 1, get term list of catchphrase.
        (catchphrase_keywords, catchphrase_wordCount) = self.parser.getKeywords(title)
        result.append( (catchphrase_keywords, catchphrase_wordCount) )

        catchword_list = [catchphrase_keywords[idx]['word'] for idx in range(len(catchphrase_keywords))]
        #print("[*catchword_list*]",catchword_list)

        ## step 2, get top k word list in sentences.
        ## 2.1 get term list of detail.
        #text_merged = " ".join(sentences)
        #(detail_keywords, detail_wordCount) = self.parser.getKeywords(text_merged)

        for idx in range(len(text)):
            (sentence_keywords, sentence_wordCount) = self.parser.getKeywords(text[idx])
            result.append( (sentence_keywords, sentence_wordCount) )

            word_list = [sentence_keywords[idx]['word'] for idx in range(len(sentence_keywords))]
            #print("\n[*word_list*]", word_list)


        return result

예제 #2

파일 보기

def predictBayesianModel(sentenceList=[
    'hello world occupation lease', 'machine learning board',
    'machine learning lease occupation'
],
                         input_path="./model/train_model.npz",
                         word_index_file="./model/word_index.npz"):
    #print("\n-----------------------------------------")
    #print("Load model1: ./model/train_model.npz")
    #print("-----------------------------------------")
    #--------------------------------------------------
    npzfile = np.load(input_path)

    pi_bar = npzfile['arr_0']
    theta_bar = npzfile['arr_1']
    #print("\n[[pi]]:")
    #print(pi_bar)
    #print(pi_bar.shape)
    #print("\n[[theta]]:")
    #print(theta_bar)
    #print(theta_bar.shape)

    #print("\n-----------------------------------------")
    #print("Load model2: ./model/word_index.npz")
    #print("-----------------------------------------")
    #--------------------------------------------------
    npzfile2 = np.load(word_index_file)

    catchword_index = npzfile2['arr_0']
    bodyword_index = npzfile2['arr_1']
    #print("\n[[catchword index]]:")
    #print(catchword_index)
    #print(catchword_index.shape)
    #print("\n[[bodyword index]]:")
    #print(bodyword_index)
    #print(bodyword_index.shape)
    #--------------------------------------------------

    scoreRecord = []
    parser = Parser()
    catchword_list = catchword_index.tolist()
    bodyword_list = bodyword_index.tolist()

    #--------------------------------------------------
    # Get catchword_positionList
    #--------------------------------------------------
    catchwords = sentenceList[0]
    (keywords, wordCount) = parser.getKeywords(catchwords)

    catchword_positionList = []
    #print("keywords: ", keywords)
    for elem in keywords:
        word = elem['word']
        count = elem['count']

        idx = catchword_list.index(word) if word in catchword_list else -1
        if (idx != -1):
            #print("appending ", catchword_list[idx])
            catchword_positionList.append(idx)

    #Debug
    #print("catchword_positionList:", catchword_positionList)
    #for catchwordPos in catchword_positionList:
    #print(catchword_list[catchwordPos])

    #--------------------------------------------------
    # Calculate score for each word in body sentence.
    # The first sentence is catchphrases.
    #--------------------------------------------------
    for idx in range(1, len(sentenceList)):
        (keywords, wordCount) = parser.getKeywords(sentenceList[idx])

        sentence_score = 0
        '''
        1) get the position list of catch words in predicted case.
        2) for each word in each sentence, find the scores for each catchword in theta_bar.
        3) add these scores which will be the final for one word in this sentence.
        4) evaluate next word... until the end of this sentence.
        5) goto 2).
        '''

        ## print("----------- sentence --------------")
        for elem in keywords:
            # Jeff: For each word in body sentence.
            word = elem['word']
            count = elem['count']
            ## print("sentence word      :", word)
            ## print("sentence word count:", count)
            ## print(" ")

            word_score = 0

            wordInSentence_idx = bodyword_list.index(
                word) if word in bodyword_list else -1
            if (wordInSentence_idx != -1):

                # Jeff: For each word in catchphrase
                for catchwordIdx in catchword_positionList:
                    ## print("* theta_bar[",idx, "][", catchword_list[catchwordIdx], "]")
                    ## print("* score:", theta_bar[idx][catchwordIdx])
                    ## print(" ")

                    word_score += theta_bar[idx][catchwordIdx]

            sentence_score += word_score * count

        scoreRecord.append(sentence_score)

    # NB: sentence ith, from 1 to end.
    #print("\nScore list for each sentence:")
    #print([ float("%.2f" % elem) for elem in scoreRecord ])
    #print("")

    return scoreRecord