Ejemplo n.º 1
0
    def insert_rest_pending_terms(self):
        for label, term in self.pending_terms.items():
            insert_x_entity_description(
                self.conn, (term['text'], label, None, term['id']))

        self.terms.update(self.pending_terms)
        self.pending_terms.clear()
Ejemplo n.º 2
0
    def insert_rest_pending_entities(self, dic, pending_dic):
        for label, entity in pending_dic.items():
            insert_x_entity_description(
                self.conn, (entity['text'], label, None, entity['id']))

        dic.update(pending_dic)
        pending_dic.clear()
Ejemplo n.º 3
0
 def insert_wiki_intro(title, intro):
     entity = self.pending_terms[title]
     self.terms[title] = entity
     del self.pending_terms[title]
     if '.' not in intro:  # disambiguation page
         insert_x_entity_description(
             self.conn, (entity['text'], title, None, entity['id']))
     else:
         insert_x_entity_description(
             self.conn, (intro, title, 1, entity['id']))
Ejemplo n.º 4
0
    def insert_wiki_intro(self, is_people, title, intro):
        if is_people:
            entity = self.pending_people[title]
            self.people[title] = entity
            del self.pending_people[title]
        else:
            entity = self.pending_terms[title]
            self.terms[title] = entity
            del self.pending_terms[title]

        if not any(period in intro for period in ['.', '。']):
            # disambiguation page
            insert_x_entity_description(
                self.conn, (entity['text'], title, None, entity['id']))
        else:
            insert_x_entity_description(
                self.conn, (intro, title, 1, entity['id']))
Ejemplo n.º 5
0
    def insert_entity(self, data, data_type, start, text):
        self.insert_occurrence(self.entity_id, data_type, start, len(data))
        if data_type == 'PERSON':
            if ' ' in data:  # full name
                for name in data.split(' '):
                    if name not in self.names:
                        self.names[name] = self.entity_id
            self.names[data] = self.entity_id
            self.people[data] = self.entity_id
            insert_x_entity_description(
                self.conn, (text, data, None, self.entity_id))
            self.num_people += 1
        else:
            entity = {'text': text, 'id': self.entity_id}
            self.pending_terms[data] = entity
            if len(self.pending_terms) == 20:  # max exlimit
                self.search_wikipedia()
            self.num_terms += 1

        self.entity_id += 1
Ejemplo n.º 6
0
    def insert_entity(self, data, is_person, start, text, length):
        self.insert_occurrence(self.entity_id, is_person, start, length)
        if is_person:
            for name in self.split_name(data):
                self.names[name] = self.entity_id
            self.names[data] = self.entity_id
            self.num_people += 1
            if prefs['search_people']:
                self.pending_people[data] = {'text': text,
                                             'id': self.entity_id}
                if len(self.pending_people) == MAX_EXLIMIT:
                    self.search_wikipedia(True, self.pending_people)
            else:
                self.people[data] = self.entity_id
                insert_x_entity_description(
                    self.conn, (text, data, None, self.entity_id))
        else:
            self.pending_terms[data] = {'text': text, 'id': self.entity_id}
            if len(self.pending_terms) == MAX_EXLIMIT:
                self.search_wikipedia(False, self.pending_terms)
            self.num_terms += 1

        self.entity_id += 1