def concrete_terms(self, results_dic): results_set = set() ts = ToiuSearchable() concrete_terms = dict() for result_broader in results_dic: for pair in results_dic[result_broader]: results_set.add(pair) for simple_pair in results_set: concrete_object_terms = set() # 1つのsimple_resultにつき複数のconcrete_otがある object_term, predicate_term = simple_pair.split('_') result_pages = ts.result_pages(object_term, constants.FINAL_QUERY) for page in result_pages: # snippetの括弧を除去して、()内を除去して、メカブして、 # 「というobject_term」の直前が名詞だったとき、それを答えとする snippet = page.snippet_without_parenthesis() m_words_factory = MWordsFactory() m_words_of_snippet = m_words_factory.build_from(snippet) for i, m_word in enumerate(m_words_of_snippet): if m_word.name == object_term: if m_words_of_snippet[i-1].word_info == 'という\t助詞,格助詞,連語,*,*,*,という,トイウ,トユウ': if m_words_of_snippet[i-2].type == '名詞': if m_words_of_snippet[i-2].name in STOP_WORDS: continue concrete_object_terms.add(m_words_of_snippet[i-2].name + '_' + predicate_term) concrete_terms[simple_pair] = concrete_object_terms return concrete_terms
def __init__(self, text): tc = TextCombiner() self.body = tc.remove_parenthesis(text) self.set_blank_to_body_if_not_includes_hiragana_or_karakana() self.body = tc.remove_inside_round_parenthesis(self.body) m_factory = MWordsFactory() self.m_body_words = m_factory.build_from(self.body)
def concrete_terms(self, results_dic): results_set = set() ts = ToiuSearchable() concrete_terms = dict() for result_broader in results_dic: for pair in results_dic[result_broader]: results_set.add(pair) for simple_pair in results_set: concrete_object_terms = set( ) # 1つのsimple_resultにつき複数のconcrete_otがある object_term, predicate_term = simple_pair.split('_') result_pages = ts.result_pages(object_term, constants.FINAL_QUERY) for page in result_pages: # snippetの括弧を除去して、()内を除去して、メカブして、 # 「というobject_term」の直前が名詞だったとき、それを答えとする snippet = page.snippet_without_parenthesis() m_words_factory = MWordsFactory() m_words_of_snippet = m_words_factory.build_from(snippet) for i, m_word in enumerate(m_words_of_snippet): if m_word.name == object_term: if m_words_of_snippet[ i - 1].word_info == 'という\t助詞,格助詞,連語,*,*,*,という,トイウ,トユウ': if m_words_of_snippet[i - 2].type == '名詞': if m_words_of_snippet[i - 2].name in STOP_WORDS: continue concrete_object_terms.add( m_words_of_snippet[i - 2].name + '_' + predicate_term) concrete_terms[simple_pair] = concrete_object_terms return concrete_terms
def prepare_m_words(self, page): tc = TextCombiner() snippet = tc.remove_all_parenthesis(page.snippet) mwf = MWordsFactory() m_words = mwf.build_from(snippet) m_words = tc.combine_nouns(m_words) m_words = tc.combine_verbs(m_words) return m_words
def pick_sahens(self): keywords = [] mw_factory = MWordsFactory() m_words = mw_factory.build_from(self.body) for m_word in m_words: if m_word.subtype == 'サ変接続': item = m_word.name keywords.append(item) return keywords
def pick_words_by_types(self, types): keywords = [] mw_factory = MWordsFactory() m_words = mw_factory.build_from(self.body) for m_word in m_words: for word_type in types: if m_word.type == word_type: keywords.append(m_word.name) return keywords
def verbs(self, text): keywords = [] m_words_factory = MWordsFactory() mwords = m_words_factory.build_from(text) for m_word in mwords: if m_word.type == '動詞': item = m_word.stem keywords.append(item) return keywords
def sahens(self, text): keywords = [] m_words_factory = MWordsFactory() mwords = m_words_factory.build_from(text) for m_word in mwords: if m_word.subtype == 'サ変接続': item = m_word.name keywords.append(item) return keywords
def score_of_sentence(self, sentence): factory = MWordsFactory() m_words = factory.build_from(sentence) stems = [m_word.stem for m_word in m_words] sum_of_score = 0.0 stems_text = ''.join(stems) for positive_text in self.positive_experiences: if positive_text in stems_text: sum_of_score += 1 return sum_of_score
def set_core_noun_from_name(self): mwf = MWordsFactory() m_words = mwf.build_from(self.name) for i, m_word in enumerate(m_words): if m_word.word_info == 'の\t助詞,連体化,*,*,*,*,の,ノ,ノ': m_words_after_no = m_words[i+1:] names = [m.name for m in m_words_after_no] result = ''.join(names) self.core_noun = result return self.core_noun = self.name
def set_core_noun_from_name(self): mwf = MWordsFactory() m_words = mwf.build_from(self.name) for i, m_word in enumerate(m_words): if m_word.word_info == 'の\t助詞,連体化,*,*,*,*,の,ノ,ノ': m_words_after_no = m_words[i + 1:] names = [m.name for m in m_words_after_no] result = ''.join(names) self.core_noun = result return self.core_noun = self.name