class PosinegaGraphMapper(AbstractTaskGraphManager):
    def __init__(self, graph=None):
        super().__init__(graph)
        self.loader = HypoHypeDBDataLoader()

    def add_edges_with_page(self, page):
        """
        もし1ページ内に順序があればorder=1から始まる値を与える。
        """
        if page.subtypes:
            for subtype in page.subtypes:
                self.graph.add_edge(page.query, subtype)
                self.graph.add_edge(subtype, page.url)
        else:
            self.graph.add_edge(page.query, page.url)

        for task in page.tasks:
            # if task.is_noise():
            #    continue
            self.graph.add_edge(page.url, "%s_%s_%s" % (task.object_term.core_noun, task.cmp, task.predicate_term))

    def in_degree(self):
        return self.graph.in_degree()

    def _hypes(self, task):
        hypes = self.loader.hypes_except_for_blockwords(task.object_term.core_noun)
        return hypes

    def _broader_preds(self, task):
        librarian = EntailmentLibrarian()
        entailing_predicates = librarian.genaral_from_all_except_for_nonent_ntriv_with_special(task.predicate_term)
        return entailing_predicates  # dict
Example #2
0
class TestTaskStep(unittest.TestCase):
    def setUp(self):
        self.loader = HypoHypeDBDataLoader()

    def test_set_headings_from_text(self):
        hypes = self.loader.hypes_except_for_blockwords('家')
        expectation = ['名所', '建造物', '施設', '歴史的建造物', '町の施設']
        self.assertEqual(hypes, expectation)
class TestTaskStep(unittest.TestCase):

    def setUp(self):
        self.loader = HypoHypeDBDataLoader()

    def test_set_headings_from_text(self):
        hypes = self.loader.hypes_except_for_blockwords('家')
        expectation = ['名所', '建造物', '施設', '歴史的建造物', '町の施設']
        self.assertEqual(hypes, expectation)
Example #4
0
 def _subtypes(self):
     """
     nounの下位語を探している。
     """
     if not hasattr(self, 'query_noun'):
         self._convert_query_from_nv_to_ncv()
     subtypes = set()
     with HypoHypeDBDataLoader() as loader:
         subtype_nouns = loader.select_hypos_with_hype(self.query_noun)
     for noun in subtype_nouns:
         if noun == 'の部屋':
             continue
         if noun in self.title:
             subtypes.add(noun)
     return subtypes
Example #5
0
class PosinegaGraphMapper(AbstractTaskGraphManager):
    def __init__(self, graph=None):
        super().__init__(graph)
        self.loader = HypoHypeDBDataLoader()

    def add_edges_with_page(self, page):
        """
        もし1ページ内に順序があればorder=1から始まる値を与える。
        """
        if page.subtypes:
            for subtype in page.subtypes:
                self.graph.add_edge(page.query, subtype)
                self.graph.add_edge(subtype, page.url)
        else:
            self.graph.add_edge(page.query, page.url)

        for task in page.tasks:
            #if task.is_noise():
            #    continue
            self.graph.add_edge(
                page.url, '%s_%s_%s' %
                (task.object_term.core_noun, task.cmp, task.predicate_term))

    def in_degree(self):
        return self.graph.in_degree()

    def _hypes(self, task):
        hypes = self.loader.hypes_except_for_blockwords(
            task.object_term.core_noun)
        return hypes

    def _broader_preds(self, task):
        librarian = EntailmentLibrarian()
        entailing_predicates = librarian.genaral_from_all_except_for_nonent_ntriv_with_special(
            task.predicate_term)
        return entailing_predicates  # dict
Example #6
0
 def setUp(self):
     self.loader = HypoHypeDBDataLoader()
Example #7
0
        if self.cur.fetchall():
            print('もうdbにあります')
            return True
        return False

    def insert(self, body, supertype_noun):
        try:
            sql = 'insert into subtype_nouns(body, supertype_noun) ' \
                  'values("%s", "%s");' \
                  % (body, supertype_noun)
        except IndexError:
            print('%sの文のinsert失敗' % body)
            return False
        try:
            self.cur.execute(sql)
        except (sqlite3.OperationalError, sqlite3.IntegrityError):
            pdb.set_trace()
        print('%sの入力完了!' % body)
        self.conn.commit()


if __name__ == '__main__':
    queries = constants.QUERIES_4
    with SubtypeDataInserter() as inserter:
        with HypoHypeDBDataLoader() as loader:
            for query in queries:
                hype = query.split(' ')[0]
                hypos = loader.select_hypos_with_hype(hype)
                for hypo in hypos:
                    inserter.insert(body=hypo, supertype_noun=hype)
Example #8
0
 def __init__(self, graph=None):
     super().__init__(graph)
     self.loader = HypoHypeDBDataLoader()
 def setUp(self):
     self.loader = HypoHypeDBDataLoader()
 def __init__(self, graph=None):
     super().__init__(graph)
     self.loader = HypoHypeDBDataLoader()
Example #11
0
class GraphTaskMapper(AbstractTaskGraphManager):
    def __init__(self, graph=None):
        super().__init__(graph)
        self.loader = HypoHypeDBDataLoader()

    def add_node_and_edge_with_task(self, task):
        """
        もし1ページ内に順序があればorder=1から始まる値を与える。
        """
        #if task.is_noise():
        #    return
        hypes = self._hypes(task)
        nouns = {'hypes': hypes}
        # hypesのときには、edgeにhypeエッジを与える必要ある? subtype-ofを発見するために。
        original_noun = task.object_term.core_noun
        nouns['original'] = [original_noun]

        original_verb = task.predicate_term
        verbs = self._broader_preds(task)
        verbs['original'] = tuple([original_verb])

        cmp = task.cmp

        # 上位語・下位語が揃った。
        for hype_type in nouns:
            for noun in nouns[hype_type]:
                for entailment_type in verbs:  # verbsはdict
                    for verb in verbs[entailment_type]:

                        if noun == original_noun and verb == original_verb:
                            is_original = True
                        else:
                            is_original = False
                        self._add_new_node(object_term=noun,
                                           predicate_term=verb,
                                           cmp=cmp,
                                           order=task.order,
                                           url=task.url,
                                           distance_between_subtypes=task.distance_between_subtypes,
                                           is_original=is_original,
                                           is_shopping=task.is_shopping,
                                           is_official=task.is_official,
                                           sentence=task.sentence)

                        if hype_type == 'hypes':
                            is_hype = True
                        else:
                            is_hype = False
                        self._add_new_edge(task=task,
                                           noun=noun,
                                           verb=verb,
                                           entailment_type=entailment_type,
                                           is_hype=is_hype)

    def _add_new_node(self,
                      object_term,
                      predicate_term,
                      cmp,
                      order,
                      url,
                      distance_between_subtypes,
                      is_original=False,
                      is_shopping=False,
                      is_official=False,
                      sentence=''):
        if self._has_stop_object_term(object_term):
            return

        task_name = '%s_%s_%s' % (object_term, cmp, predicate_term)
        new_aspect = {
                'order': order,
                'url': url,
                'distance_between_subtypes': distance_between_subtypes,
                'is_original': is_original,
                'is_shopping': is_shopping,
                'is_official': is_official,
                'sentence': sentence
            }

        if task_name in self.graph.node:
            try:
                old_aspects = self._aspects_with_task_name(task_name)
                if new_aspect in old_aspects:
                    print('このaspectはもうgraphにあります')
                    return
                old_aspects.append(new_aspect)
                new_aspects = old_aspects
            except (KeyError, AttributeError):  # add_edgeでnodeが追加されたあと、nodeとして追加されたときに、ここに来る。
                new_aspects = [new_aspect]
        else:
            new_aspects = [new_aspect]
        self.graph.add_node(task_name, aspects=new_aspects)

    def _add_new_edge(self, task, noun, verb, entailment_type, is_hype):
        if self._has_stop_object_term(noun):
            return False
        if type(noun) == list or type(task.object_term.core_noun) == list:
            pdb.set_trace()
        self.graph.add_edge('%s_%s_%s' %
                            (task.object_term.core_noun, task.cmp, task.predicate_term),
                            '%s_%s_%s' %
                            (noun, task.cmp, verb),
                            entailment_type=entailment_type,
                            is_hype=is_hype)

    def _has_stop_object_term(self, object_term):
        stop_words = ['こと', 'もの', 'など']
        if object_term in stop_words:
            return True
        return False

    def in_degree(self):
        return self.graph.in_degree()

    def _hypes(self, task):
        hypes = self.loader.hypes_except_for_blockwords(task.object_term.core_noun)
        return hypes

    def _broader_preds(self, task):
        librarian = EntailmentLibrarian()
        entailing_predicates = librarian.genaral_from_all_except_for_nonent_ntriv_with_special(task.predicate_term)
        return entailing_predicates  # dict