예제 #1
0
                words2 = self.words_around_symbol(candidate)
                self.words2 += len(words1)
                cm = CandidateMetrics()
                cm.score = self.words_similarity(words1, words2, exclude)
                cm.degree = ksyn.degree(self.hg, candidate)
                logging.info('%s %s' % (candidate, cm))
                if cm.better_than(best_cm):
                    best_cm = cm
                    best = candidate

        self.best_sense_t += time.time() - start
        return best, best_cm


if __name__ == '__main__':
    hgr = hyperg.HyperGraph({
        'backend': 'leveldb',
        'hg': 'wordnet_wikidata.hg'
    })
    p = par.Parser()
    d = Disambiguation(hgr, p)

    r1 = ['stocks', 'stock']
    text1 = "Chinese stocks end year with double-digit losses"

    r2 = ['cambridge']
    text2 = "Cambridge near Boston in the United States."
    text3 = "Cambridge near London in England."

    print(d.best_sense(r2, text2))
예제 #2
0
#   GNU Affero General Public License for more details.
#
#   You should have received a copy of the GNU Affero General Public License
#   along with GraphBrain.  If not, see <http://www.gnu.org/licenses/>.

import operator
import numpy as np
from sklearn.cluster import DBSCAN
import gb.tools.json as json_tools
import gb.hypergraph.edge as ed
import gb.nlp.parser as par
from gb.explore.similarity import edge_similarity

if __name__ == '__main__':
    print('creating parser...')
    par = par.Parser()
    print('parser created.')

    edge_data = json_tools.read('edges_similar_concepts.json')

    extra_edges = {}
    for item in edge_data:
        edge = ed.str2edge(item['edge'])
        matched = [ed.str2edge(match[1]) for match in item['matches']]
        for part in edge[1:]:
            if part not in matched:
                key = ed.edge2str(part)
                if key in extra_edges:
                    extra_edges[key] += 1
                else:
                    extra_edges[key] = 1
예제 #3
0
def headlines_inference(hg, predicates_file):
    parser = par.Parser()
    Headlines(hg, parser, predicates_file).process()
예제 #4
0
                self.entities[entity]['total_conflict_from'] = total

                h, total = herfindhal_and_total(self.entities[entity]['conflict_to']
                                                + self.entities[entity]['conflict_from'])
                self.entities[entity]['h_conflict'] = h
                self.entities[entity]['total_conflict'] = total

                h, total = herfindhal_and_total(self.entities[entity]['conflict_over'])
                self.entities[entity]['h_conflict_over'] = h
                self.entities[entity]['total_conflict_over'] = total

                h, total = herfindhal_and_total(self.entities[entity]['conflict_for'])
                self.entities[entity]['h_conflict_for'] = h
                self.entities[entity]['total_conflict_for'] = total

                self.write_metrics(entity)

                i += 1
                bar.update(i)

    def process(self):
        self.find_actors()
        self.infer()
        self.compute_metrics()


if __name__ == '__main__':
    hgr = HyperGraph({'backend': 'leveldb', 'hg': 'infer.hg'})
    parse = par.Parser()
    Headlines(hgr, parse, 'predicate_patterns.csv').process()
예제 #5
0
def generate(hg):
    print('starting parser...')
    parser = par.Parser()

    mer = Meronomy(hg, parser)

    print('reading edges...')
    total_edges = 0
    total_beliefs = 0

    total_verts = hg.symbol_count() + hg.edge_count()
    i = 0
    with progressbar.ProgressBar(max_value=total_verts) as bar:
        for vertex in hg.all():
            if sym.is_edge(vertex):
                edge = vertex
                total_edges += 1
                if hg.is_belief(edge):
                    mer.add_edge(edge)
                    total_beliefs += 1
            i += 1
            if (i % 1000) == 0:
                bar.update(i)

    print('edges: %s; beliefs: %s' % (total_edges, total_beliefs))

    print('post assignments...')
    i = 0
    with progressbar.ProgressBar(max_value=total_verts) as bar:
        for vertex in hg.all():
            if sym.is_edge(vertex):
                edge = vertex
                if hg.is_belief(edge):
                    mer.post_assignments(edge)
            i += 1
            if (i % 1000) == 0:
                bar.update(i)

    print('generating meronomy graph...')
    mer.generate()

    print('normalizing meronomy graph...')
    mer.normalize_graph()

    print('generating synonyms...')
    mer.generate_synonyms()

    print('writing synonyms...')
    i = 0
    with progressbar.ProgressBar(max_value=len(mer.synonym_sets)) as bar:
        for syn_id in mer.synonym_sets:
            edges = set()
            for atom in mer.synonym_sets[syn_id]:
                if atom in mer.edge_map:
                    edges |= mer.edge_map[atom]
            best_count = -1
            best_label_edge = None
            for edge in edges:
                if mer.edge_counts[edge] > best_count:
                    best_count = mer.edge_counts[edge]
                    best_label_edge = edge
            label = hg.get_label(best_label_edge)
            syn_symbol = sym.build(label, 'syn%s' % syn_id)
            for edge in edges:
                syn_edge = (cons.are_synonyms, edge, syn_symbol)
                hg.add(syn_edge)
            label_symbol = sym.build(label, cons.label_namespace)
            label_edge = (cons.has_label, syn_symbol, label_symbol)
            hg.add(label_edge)
            i += 1
            if i % 1000 == 0:
                bar.update(i)
        bar.update(i)

    print('%s synonym sets created' % len(mer.synonym_sets))
    print('done.')