def concrete_terms(self, results_dic):
     results_set = set()
     ts = ToiuSearchable()
     concrete_terms = dict()
     for result_broader in results_dic:
         for pair in results_dic[result_broader]:
             results_set.add(pair)
     for simple_pair in results_set:
         concrete_object_terms = set()  # 1つのsimple_resultにつき複数のconcrete_otがある
         object_term, predicate_term = simple_pair.split('_')
         result_pages = ts.result_pages(object_term, constants.FINAL_QUERY)
         for page in result_pages:
             # snippetの括弧を除去して、()内を除去して、メカブして、
             # 「というobject_term」の直前が名詞だったとき、それを答えとする
             snippet = page.snippet_without_parenthesis()
             m_words_factory = MWordsFactory()
             m_words_of_snippet = m_words_factory.build_from(snippet)
             for i, m_word in enumerate(m_words_of_snippet):
                 if m_word.name == object_term:
                     if m_words_of_snippet[i-1].word_info == 'という\t助詞,格助詞,連語,*,*,*,という,トイウ,トユウ':
                         if m_words_of_snippet[i-2].type == '名詞':
                             if m_words_of_snippet[i-2].name in STOP_WORDS:
                                 continue
                             concrete_object_terms.add(m_words_of_snippet[i-2].name + '_' + predicate_term)
         concrete_terms[simple_pair] = concrete_object_terms
     return concrete_terms
Beispiel #2
0
 def __init__(self, text):
     tc = TextCombiner()
     self.body = tc.remove_parenthesis(text)
     self.set_blank_to_body_if_not_includes_hiragana_or_karakana()
     self.body = tc.remove_inside_round_parenthesis(self.body)
     m_factory = MWordsFactory()
     self.m_body_words = m_factory.build_from(self.body)
Beispiel #3
0
 def __init__(self, text):
     tc = TextCombiner()
     self.body = tc.remove_parenthesis(text)
     self.set_blank_to_body_if_not_includes_hiragana_or_karakana()
     self.body = tc.remove_inside_round_parenthesis(self.body)
     m_factory = MWordsFactory()
     self.m_body_words = m_factory.build_from(self.body)
Beispiel #4
0
 def concrete_terms(self, results_dic):
     results_set = set()
     ts = ToiuSearchable()
     concrete_terms = dict()
     for result_broader in results_dic:
         for pair in results_dic[result_broader]:
             results_set.add(pair)
     for simple_pair in results_set:
         concrete_object_terms = set(
         )  # 1つのsimple_resultにつき複数のconcrete_otがある
         object_term, predicate_term = simple_pair.split('_')
         result_pages = ts.result_pages(object_term, constants.FINAL_QUERY)
         for page in result_pages:
             # snippetの括弧を除去して、()内を除去して、メカブして、
             # 「というobject_term」の直前が名詞だったとき、それを答えとする
             snippet = page.snippet_without_parenthesis()
             m_words_factory = MWordsFactory()
             m_words_of_snippet = m_words_factory.build_from(snippet)
             for i, m_word in enumerate(m_words_of_snippet):
                 if m_word.name == object_term:
                     if m_words_of_snippet[
                             i -
                             1].word_info == 'という\t助詞,格助詞,連語,*,*,*,という,トイウ,トユウ':
                         if m_words_of_snippet[i - 2].type == '名詞':
                             if m_words_of_snippet[i -
                                                   2].name in STOP_WORDS:
                                 continue
                             concrete_object_terms.add(
                                 m_words_of_snippet[i - 2].name + '_' +
                                 predicate_term)
         concrete_terms[simple_pair] = concrete_object_terms
     return concrete_terms
Beispiel #5
0
 def prepare_m_words(self, page):
     tc = TextCombiner()
     snippet = tc.remove_all_parenthesis(page.snippet)
     mwf = MWordsFactory()
     m_words = mwf.build_from(snippet)
     m_words = tc.combine_nouns(m_words)
     m_words = tc.combine_verbs(m_words)
     return m_words
Beispiel #6
0
 def prepare_m_words(self, page):
     tc = TextCombiner()
     snippet = tc.remove_all_parenthesis(page.snippet)
     mwf = MWordsFactory()
     m_words = mwf.build_from(snippet)
     m_words = tc.combine_nouns(m_words)
     m_words = tc.combine_verbs(m_words)
     return m_words
Beispiel #7
0
 def pick_sahens(self):
     keywords = []
     mw_factory = MWordsFactory()
     m_words = mw_factory.build_from(self.body)
     for m_word in m_words:
         if m_word.subtype == 'サ変接続':
             item = m_word.name
             keywords.append(item)
     return keywords
Beispiel #8
0
 def pick_words_by_types(self, types):
     keywords = []
     mw_factory = MWordsFactory()
     m_words = mw_factory.build_from(self.body)
     for m_word in m_words:
         for word_type in types:
             if m_word.type == word_type:
                 keywords.append(m_word.name)
     return keywords
 def verbs(self, text):
     keywords = []
     m_words_factory = MWordsFactory()
     mwords = m_words_factory.build_from(text)
     for m_word in mwords:
         if m_word.type == '動詞':
             item = m_word.stem
             keywords.append(item)
     return keywords
 def sahens(self, text):
     keywords = []
     m_words_factory = MWordsFactory()
     mwords = m_words_factory.build_from(text)
     for m_word in mwords:
         if m_word.subtype == 'サ変接続':
             item = m_word.name
             keywords.append(item)
     return keywords
Beispiel #11
0
 def verbs(self, text):
     keywords = []
     m_words_factory = MWordsFactory()
     mwords = m_words_factory.build_from(text)
     for m_word in mwords:
         if m_word.type == '動詞':
             item = m_word.stem
             keywords.append(item)
     return keywords
Beispiel #12
0
 def sahens(self, text):
     keywords = []
     m_words_factory = MWordsFactory()
     mwords = m_words_factory.build_from(text)
     for m_word in mwords:
         if m_word.subtype == 'サ変接続':
             item = m_word.name
             keywords.append(item)
     return keywords
Beispiel #13
0
 def score_of_sentence(self, sentence):
     factory = MWordsFactory()
     m_words = factory.build_from(sentence)
     stems = [m_word.stem for m_word in m_words]
     sum_of_score = 0.0
     stems_text = ''.join(stems)
     for positive_text in self.positive_experiences:
         if positive_text in stems_text:
             sum_of_score += 1
     return sum_of_score
 def score_of_sentence(self, sentence):
     factory = MWordsFactory()
     m_words = factory.build_from(sentence)
     stems = [m_word.stem for m_word in m_words]
     sum_of_score = 0.0
     stems_text = ''.join(stems)
     for positive_text in self.positive_experiences:
         if positive_text in stems_text:
             sum_of_score += 1
     return sum_of_score
Beispiel #15
0
 def set_core_noun_from_name(self):
     mwf = MWordsFactory()
     m_words = mwf.build_from(self.name)
     for i, m_word in enumerate(m_words):
         if m_word.word_info == 'の\t助詞,連体化,*,*,*,*,の,ノ,ノ':
             m_words_after_no = m_words[i+1:]
             names = [m.name for m in m_words_after_no]
             result = ''.join(names)
             self.core_noun = result
             return
     self.core_noun = self.name
Beispiel #16
0
 def set_core_noun_from_name(self):
     mwf = MWordsFactory()
     m_words = mwf.build_from(self.name)
     for i, m_word in enumerate(m_words):
         if m_word.word_info == 'の\t助詞,連体化,*,*,*,*,の,ノ,ノ':
             m_words_after_no = m_words[i + 1:]
             names = [m.name for m in m_words_after_no]
             result = ''.join(names)
             self.core_noun = result
             return
     self.core_noun = self.name