예제 #1
0
 def get_label(self, edge):
     edges = self.pattern2edges([const.has_label, edge, None])
     if len(edges) > 0:
         label_symbol = edges.pop()[2]
         if not sym.is_edge(label_symbol):
             return sym.symbol2str(label_symbol)
     return sym.symbol2str(edge)
예제 #2
0
def down(hg, symbol, visited=None):
    if not visited:
        visited = set()
    if sym.symbol2str(symbol) in visited:
        return None
    # print(symbol)
    visited.add(sym.symbol2str(symbol))
    synonyms = [synonym for synonym in syn.synonyms(hg, symbol)]
    edges = [s for s in hg.star(symbol)]
    edges = [edge for edge in edges if is_concept(edge)]
    return {'symbol': symbol,
            'synonyms': [down(hg, synonym, visited) for synonym in synonyms],
            'derived_symbols': [down(hg, edge, visited) for edge in edges]}
예제 #3
0
    def best_sense(self, roots, aux_text, namespaces=None):
        start = time.time()
        # reset profiling
        self.candidates = 0
        self.words1 = 0
        self.words2 = 0

        candidates = set()
        exclude = set()
        for root in roots:
            candidates = candidates.union(self.hg.symbols_with_root(root))
            text = sym.symbol2str(root)
            for token in text.split():
                exclude.add(token)
        self.candidates = len(candidates)
        words1 = self.words_from_text(aux_text)
        self.words1 = len(words1)
        best = None
        best_cm = CandidateMetrics()
        for candidate in candidates:
            if check_namespace(candidate, namespaces):
                words2 = self.words_around_symbol(candidate)
                self.words2 += len(words1)
                cm = CandidateMetrics()
                cm.score = self.words_similarity(words1, words2, exclude)
                cm.degree = ksyn.degree(self.hg, candidate)
                logging.info('%s %s' % (candidate, cm))
                if cm.better_than(best_cm):
                    best_cm = cm
                    best = candidate

        self.best_sense_t += time.time() - start
        return best, best_cm
예제 #4
0
def enrich_edge(parser, edge):
    if sym.is_edge(edge):
        eedge = [enrich_edge(parser, item) for item in edge]
        prob = 1.
        total_prob = 0.
        word_count = 0
        words = []
        for item in eedge:
            word_count += item['word_count']
            prob *= item['prob']
            total_prob += item['prob'] * item['word_count']
            words += item['words']
        mean_prob = total_prob / word_count
        return {'edge': edge, 'eedge': eedge, 'words': words, 'prob': prob, 'word_count': word_count,
                'mean_prob': mean_prob}

    ngram = sym.symbol2str(edge)
    tokens = [token for token in ngram.split(' ') if len(token) > 0]
    for i in range(len(tokens)):
        if tokens[i][0] == '+':
            tokens[i] = tokens[i][1:]
    tokens = [token for token in tokens if len(token) > 0]
    words = [parser.make_word(token) for token in tokens]
    prob = 1.
    total_prob = 0.
    for word in words:
        p = math.exp(word.prob)
        prob *= p
        total_prob += p
    word_count = len(words)
    if word_count > 0:
        mean_prob = total_prob / word_count
    else:
        mean_prob = 1.
    return {'symbol': edge, 'words': words, 'prob': prob, 'word_count': word_count, 'mean_prob': mean_prob}
예제 #5
0
def derived_symbols(hg, ont, symbols=None, depth=0):
    if not symbols:
        symbols = {}
    symbol = ont['symbol']
    degree = syn.degree(hg, symbol)
    symbols[sym.symbol2str(symbol)] = {'degree': degree, 'depth': depth}
    for subont in ont['derived_symbols']:
        derived_symbols(hg, subont, symbols, depth + 1)
    return symbols
예제 #6
0
def symbol_html(symbol, rel):
    label = sym.symbol2str(symbol)
    if rel:
        return '<div class="rel"><a href="/vertex?id=%s">%s</a></div><div class="arrow"></div>'\
               % (symbol, label)
    else:
        extra_class = SYMBOL_CLASSES[symbol_to_int(symbol) % 5]
        return '<button type="button" class="btn %s symbol"><a class="symbol" href="/vertex?id=%s">%s</a></button>'\
               % (extra_class, symbol, label)
예제 #7
0
def html(hg, eid):
    vertex = ed.str2edge(eid)
    if sym.sym_type(vertex) == sym.SymbolType.EDGE:
        title = edge_html(hg, vertex)
    else:
        title = '<h1>%s</h1>' % sym.symbol2str(eid)
    return """
<div class="container" role="main">
    <div class="page-header">
        %s
        <h4>%s</h4>
    </div>
    %s
</div>
    """ % (title, eid, edges_html(hg, vertex))
예제 #8
0
    def words_around_symbol(self, symbol):
        start = time.time()
        edges = self.hg.star(symbol, limit=STAR_LIMIT)
        words = set()
        for edge in edges:
            for entity in edge:
                for symbol in ed.symbols(entity):
                    term = sym.symbol2str(symbol)
                    for token in term.split():
                        word = self.parser.make_word(token)
                        if word.prob < MAX_PROB and np.count_nonzero(
                                word.vector) > 0:
                            words.add(word)

        self.words_around_symbol_t += time.time() - start
        return words