Beispiel #1
0
def recursivelyAddNodesAndEdges(word, idx, entity, db, level):
    if level == 3:
        return
    if not word.startswith('/c/'):
        word = str('/c/en/') + word
    u1 = getNodeFromDB(word, idx, db)
    if word in words_dict:
        return
    try:
        assertions = lookup(word)
        for assertion in assertions:
            relation = encode(assertion['rel'])
            if relation == "/r/InstanceOf" or relation == "/r/IsA":
                end = encode(assertion['end'])
                if end == "/c/en/person":
                    return
        for assertion in lookup(word):
            startWord = encode(assertion['start'])
            endWord = encode(assertion['end'])
            if startWord == "" or endWord == "":
                continue
            if startWord.startswith('/c/en') and endWord.startswith('/c/en'):
                otherWord = None
                if startWord == word or wordMatchesWithoutPosTag(
                        startWord, word):
                    u2 = getNodeFromDB(endWord, idx, db)
                    otherWord = endWord
                    #entity.add(u2);
                else:
                    u2 = u1
                    u1 = getNodeFromDB(startWord, idx, db)
                    otherWord = startWord
                    #entity.add(u1);
                print otherWord, " added.., level=", level, "word=", word
                # TODO: Add only unique relationships
                u1.relationships.create(encode(assertion['rel']),
                                        u2,
                                        weight=assertion['weight'])
                recursivelyAddNodesAndEdges(otherWord, idx, entity, db,
                                            level + 1)
        words_dict[word] = 1
    except UnicodeDecodeError as ude:
        print "UnicodeDecodeError found. Ignoring..."
Beispiel #2
0
def edges_for_uri(uri):
    """
    This function replaces most functions in the old Web interface, as every
    query to the API now returns a list of edges.
    """
    uri = '/' + uri.rstrip('/')
    edges = list(lookup(uri, limit=100))
    seen_edges = {}
    out_edges = []
    caption = uri
    for edge in edges:
        switched = False
        if edge['uri'] not in seen_edges:
            url1 = WEB_ROOT+edge['start']
            url2 = WEB_ROOT+edge['end']
            edge['startName'] = uri2name(edge['start'])
            edge['relName'] = uri2name(edge['rel'])
            edge['endName'] = uri2name(edge['end'])
            text = edge.get('surfaceText') or ''
            if caption == uri and edge['start'] == uri:
                caption = edge['startName']
            if caption == uri and edge['end'] == uri:
                caption = edge['endName']

            ## possible guess:
            #  "[[%s]] %s [[%s]]" %\
            #  (uri2name(edge['start']), uri2name(edge['rel']),
            #   uri2name(edge['end']))

            linked1 = re.sub(r'\[\[([^\]]+)\]\]',
                r'<a href="%s">\1</a>' % url1, text, count=1)
            linked2 = re.sub(r'\[\[([^\]]+)\]\]',
                r'<a href="%s">\1</a>' % url2, linked1, count=1)
            edge['linked'] = linked2
            out_edges.append(edge)
            seen_edges[edge['uri']] = edge
        else:
            oldedge = seen_edges[edge['uri']]
            oldedge['score'] += edge['score']
            if not oldedge.get('linked'):
                text = edge.get('surfaceText') or ''
                url1 = WEB_ROOT+edge['start']
                url2 = WEB_ROOT+edge['end']
                linked1 = re.sub(r'\[\[([^\]]+)\]\]',
                    r'<a href="%s">\1</a>' % url1, text, count=1)
                linked2 = re.sub(r'\[\[([^\]]+)\]\]',
                    r'<a href="%s">\1</a>' % url2, linked1, count=1)
                oldedge['linked'] = linked2

    if not edges:
        return render_template('not_found.html', uri=uri, languages=LANGUAGES)
    else:
        return render_template('edges.html', uri=uri, caption=caption,
        edges=out_edges, root=WEB_ROOT, languages=LANGUAGES)
def edges_for_uri(uri):
    """
    This function replaces most functions in the old Web interface, as every
    query to the API now returns a list of edges.
    """
    uri = '/' + uri.rstrip('/')
    edges = list(lookup(uri, limit=100))
    seen_edges = {}
    out_edges = []
    caption = uri
    for edge in edges:
        switched = False
        if edge['uri'] not in seen_edges:
            url1 = WEB_ROOT+edge['start']
            url2 = WEB_ROOT+edge['end']
            edge['startName'] = uri2name(edge['start'])
            edge['relName'] = uri2name(edge['rel'])
            edge['endName'] = uri2name(edge['end'])
            text = edge.get('surfaceText') or ''
            if caption == uri and edge['start'] == uri:
                caption = edge['startName']
            if caption == uri and edge['end'] == uri:
                caption = edge['endName']

            ## possible guess:
            #  "[[%s]] %s [[%s]]" %\
            #  (uri2name(edge['start']), uri2name(edge['rel']),
            #   uri2name(edge['end']))

            linked1 = re.sub(r'\[\[([^\]]+)\]\]',
                r'<a href="%s">\1</a>' % url1, text, count=1)
            linked2 = re.sub(r'\[\[([^\]]+)\]\]',
                r'<a href="%s">\1</a>' % url2, linked1, count=1)
            edge['linked'] = linked2
            out_edges.append(edge)
            seen_edges[edge['uri']] = edge
        else:
            oldedge = seen_edges[edge['uri']]
            oldedge['score'] += edge['score']
            if not oldedge.get('linked'):
                text = edge.get('surfaceText') or ''
                url1 = WEB_ROOT+edge['start']
                url2 = WEB_ROOT+edge['end']
                linked1 = re.sub(r'\[\[([^\]]+)\]\]',
                    r'<a href="%s">\1</a>' % url1, text, count=1)
                linked2 = re.sub(r'\[\[([^\]]+)\]\]',
                    r'<a href="%s">\1</a>' % url2, linked1, count=1)
                oldedge['linked'] = linked2

    if not edges:
        return render_template('not_found.html', uri=uri, languages=LANGUAGES)
    else:
        return render_template('edges.html', uri=uri, caption=caption,
        edges=out_edges, root=WEB_ROOT, languages=LANGUAGES)
Beispiel #4
0
def associatedWords(pattern, relations):
    uri = "/c/{0}/{1}".format(default_language, pattern)
    r = list(lookup(uri, limit=350))
    CLOCK.time_step("lookup")
    #for e in r:
    #    print(e['start'] + ' ' + e['rel'] + ' ' + e['end'])
    res = []
    for e in r:
        if e['rel'] in relations:
            cand = buildCandidate(pattern, e)
            if cand != None and cand.tag != -1:
                res.append(cand)
    #for cand in res:
    #    print(cand.word + ' ' + str(cand.weight))
    CLOCK.time_step("buildCandidate")
    for cand in res:
        cand.computeScore()
    computeWeight(res)
    res.sort(key=lambda x: x.score)
    CLOCK.time_step("weights")
    return {a.word for a in res}
def associatedWords(pattern,relations):
    uri = "/c/{0}/{1}".format(default_language,pattern)
    r = list(lookup(uri,limit=350))
    CLOCK.time_step("lookup")
    #for e in r:
    #    print(e['start'] + ' ' + e['rel'] + ' ' + e['end'])
    res = []
    for e in r:
        if e['rel'] in relations:
            cand = buildCandidate(pattern,e)
            if cand != None and cand.tag != -1:
                res.append(cand)
    #for cand in res:
    #    print(cand.word + ' ' + str(cand.weight))
    CLOCK.time_step("buildCandidate")
    for cand in res:
        cand.computeScore()
    computeWeight(res)
    res.sort(key = lambda x: x.score)
    CLOCK.time_step("weights")
    return {a.word for a in res}