def test_extract_word(): got = mecab.extract_word('環境音楽だ', '名詞') assert got, [u'環境' == u'音楽'] got = mecab.extract_word('環境音楽だ', '名詞', phrase=True) assert got, [u'環境', u'音楽' == u'環境音楽'] got = mecab.extract_word('寝ろ', '動詞', rootform=True) assert got == [u'寝る']
def test_extract_word(): got = mecab.extract_word('環境音楽だ', '名詞') assert_equals(got, [u'環境', u'音楽']) got = mecab.extract_word('環境音楽だ', '名詞', phrase=True) assert_equals(got, [u'環境', u'音楽', u'環境音楽']) got = mecab.extract_word('寝ろ', '動詞', rootform=True) assert_equals(got, [u'寝る'])
def respond(text): """Extract a past post responding a post similar to given text """ for response in _extract_response_by_search([text], False): if response: yield response query = mecab.extract_word(text, 'content_word') for response in _extract_response_by_search([' '.join(query)], True): if response: yield response query = mecab.extract_word(text, ('名詞,固有名詞',)) for response in _extract_response_by_search([' '.join(query)], True): if response: yield response
def respond(text, *args): """Extract a past post responding a post similar to given text """ for response in _extract_response_by_search([text], False): if response: yield response query = mecab.extract_word(text, 'content_word') for response in _extract_response_by_search([' '.join(query)], True): if response: yield response query = mecab.extract_word(text, ('名詞,固有名詞',)) for response in _extract_response_by_search([' '.join(query)], True): if response: yield response
def count(self, log): for post in log: for idx in ('text', 'q1', 'q2'): if not post.get(idx): continue if isinstance(post[idx], list): post[idx] = '\n'.join(post[idx]) for line in post[idx].splitlines(): line = self.prepare_for_counting(line) for w in mecab.extract_word(line): ws = [] ws.append(self.word_ids[w]) if w not in self.words: self.words.append(w) cntr = Counter(ws) for word in cntr.keys(): self.word_counts[word] += cntr self.unique_wordcounts += cntr
def count(self, log): for post in log: for idx in ('text', 'q1', 'q2'): if not post.get(idx): continue if isinstance(post[idx], list): post[idx] = '\n'.join(post[idx]) for line in post[idx].splitlines(): line = self.prepare_for_counting(line) for w in mecab.extract_word(line): if w in NG_WORDS: continue ws = [] ws.append(self.word_ids[w]) if w not in self.words: self.words.append(w) cntr = Counter(ws) for word in cntr.keys(): self.word_counts[word] += cntr self.unique_wordcounts += cntr
def respond(text, *args): logs = kuzuha.search(mecab.extract_word(text)) logs = [cleansing(log.get('text', '')) for log in logs] for message in get_longest_common_substring(''.join(logs)): if message: yield message + '(;´Д`)'