Example #1
0
    def search_book_by_title(self, title, n = 10) :
        words = nlp_module.pos_Kkma(title)

        words = set(words)
        appearance_list = []
        for word in words :
            if (word[1][0] == 'N' or word[1] == 'OL') and\
                    word[0] in self.wordtobookindex.keys():
                print(word)
                #print(self.wordtobookindex[word[0]])
                appearance_list.extend(self.wordtobookindex[word[0]])

        #print(appearance_list)
        counted_list = Counter(appearance_list)

        sortedlist = sorted(counted_list.items(), key=lambda x : x[1], reverse=True)

        return sortedlist[:n]
Example #2
0
    def init_word_index(self, book_set) :
        self.wordtobookindex = defaultdict(list)

        for i in range(len(book_set)) :
            book = book_set[i]
            words = nlp_module.pos_Kkma(book.title)

            words = set(words)
            for word in words :
                if word[1][0] == 'N' or word[1] == 'OL':
                    #print(word)
                    self.wordtobookindex[word[0]].append(i)

        dictlist = ["'{}' : {}".format(key, [(idx, book_set[idx].title) for idx in self.wordtobookindex[key]])
              for key in self.wordtobookindex.keys()]
        for i in range(len(dictlist)) :
            print(dictlist[i])
        print("index init finished")
def tokenize(sentences):
    sen_nlp = nlp_module.pos_Kkma(sentences)
    result = []

    ignore_word = [
        'extreme', 'novel', 'j', 'l', 'nt', '노', '라이트', '시리즈', '제', '노', '일본',
        '의', '벨', '회', '노와', '리', '장', '마', '라', '히', '을', '토', '팬', '전', '탄',
        '오', '말', '메', '레', '로', '타', '바', '가', '서', '루', '디', '데', '요', '하',
        '들', '카', '키', '미', '코', '유', '속', '우', '사', '르', '출간', '사이트', '독자',
        '세', '작품', '회', '편', '완결', '신작', '호평', '누', '연재', '저자', '원작', '주',
        '단편', '단편집', '국내', '장려상', '발행', '최신작', '모', '사토', '와의', '중', '책갈피',
        '초판', '한국', '투', '황', '모토', '니스', '마코토', '나미', '통합', '루가', '이지만', '마사',
        '맥', '토모', '이슈', '전개'
    ]

    for chunk in sen_nlp:
        if chunk[1] == 'NNG' or chunk[1] == 'NNP':
            #or chunk[1] == 'VV' or chunk[1] == 'VA':
            word = chunk[0].lower()
            if not (word in ignore_word):
                result.append(word)

    return set(result)