def recursivelyAddNodesAndEdges(word, idx, entity, db, level): if level == 3: return if not word.startswith('/c/'): word = str('/c/en/') + word u1 = getNodeFromDB(word, idx, db) if word in words_dict: return try: assertions = lookup(word) for assertion in assertions: relation = encode(assertion['rel']) if relation == "/r/InstanceOf" or relation == "/r/IsA": end = encode(assertion['end']) if end == "/c/en/person": return for assertion in lookup(word): startWord = encode(assertion['start']) endWord = encode(assertion['end']) if startWord == "" or endWord == "": continue if startWord.startswith('/c/en') and endWord.startswith('/c/en'): otherWord = None if startWord == word or wordMatchesWithoutPosTag( startWord, word): u2 = getNodeFromDB(endWord, idx, db) otherWord = endWord #entity.add(u2); else: u2 = u1 u1 = getNodeFromDB(startWord, idx, db) otherWord = startWord #entity.add(u1); print otherWord, " added.., level=", level, "word=", word # TODO: Add only unique relationships u1.relationships.create(encode(assertion['rel']), u2, weight=assertion['weight']) recursivelyAddNodesAndEdges(otherWord, idx, entity, db, level + 1) words_dict[word] = 1 except UnicodeDecodeError as ude: print "UnicodeDecodeError found. Ignoring..."
def edges_for_uri(uri): """ This function replaces most functions in the old Web interface, as every query to the API now returns a list of edges. """ uri = '/' + uri.rstrip('/') edges = list(lookup(uri, limit=100)) seen_edges = {} out_edges = [] caption = uri for edge in edges: switched = False if edge['uri'] not in seen_edges: url1 = WEB_ROOT+edge['start'] url2 = WEB_ROOT+edge['end'] edge['startName'] = uri2name(edge['start']) edge['relName'] = uri2name(edge['rel']) edge['endName'] = uri2name(edge['end']) text = edge.get('surfaceText') or '' if caption == uri and edge['start'] == uri: caption = edge['startName'] if caption == uri and edge['end'] == uri: caption = edge['endName'] ## possible guess: # "[[%s]] %s [[%s]]" %\ # (uri2name(edge['start']), uri2name(edge['rel']), # uri2name(edge['end'])) linked1 = re.sub(r'\[\[([^\]]+)\]\]', r'<a href="%s">\1</a>' % url1, text, count=1) linked2 = re.sub(r'\[\[([^\]]+)\]\]', r'<a href="%s">\1</a>' % url2, linked1, count=1) edge['linked'] = linked2 out_edges.append(edge) seen_edges[edge['uri']] = edge else: oldedge = seen_edges[edge['uri']] oldedge['score'] += edge['score'] if not oldedge.get('linked'): text = edge.get('surfaceText') or '' url1 = WEB_ROOT+edge['start'] url2 = WEB_ROOT+edge['end'] linked1 = re.sub(r'\[\[([^\]]+)\]\]', r'<a href="%s">\1</a>' % url1, text, count=1) linked2 = re.sub(r'\[\[([^\]]+)\]\]', r'<a href="%s">\1</a>' % url2, linked1, count=1) oldedge['linked'] = linked2 if not edges: return render_template('not_found.html', uri=uri, languages=LANGUAGES) else: return render_template('edges.html', uri=uri, caption=caption, edges=out_edges, root=WEB_ROOT, languages=LANGUAGES)
def associatedWords(pattern, relations): uri = "/c/{0}/{1}".format(default_language, pattern) r = list(lookup(uri, limit=350)) CLOCK.time_step("lookup") #for e in r: # print(e['start'] + ' ' + e['rel'] + ' ' + e['end']) res = [] for e in r: if e['rel'] in relations: cand = buildCandidate(pattern, e) if cand != None and cand.tag != -1: res.append(cand) #for cand in res: # print(cand.word + ' ' + str(cand.weight)) CLOCK.time_step("buildCandidate") for cand in res: cand.computeScore() computeWeight(res) res.sort(key=lambda x: x.score) CLOCK.time_step("weights") return {a.word for a in res}
def associatedWords(pattern,relations): uri = "/c/{0}/{1}".format(default_language,pattern) r = list(lookup(uri,limit=350)) CLOCK.time_step("lookup") #for e in r: # print(e['start'] + ' ' + e['rel'] + ' ' + e['end']) res = [] for e in r: if e['rel'] in relations: cand = buildCandidate(pattern,e) if cand != None and cand.tag != -1: res.append(cand) #for cand in res: # print(cand.word + ' ' + str(cand.weight)) CLOCK.time_step("buildCandidate") for cand in res: cand.computeScore() computeWeight(res) res.sort(key = lambda x: x.score) CLOCK.time_step("weights") return {a.word for a in res}