def t(word): try: if inverted(word): return invert(d[invert(word)]) else: return d[word] except KeyError: return word
def summarize_neighborhood(graph, seed=None, max_depth=2, nbr_samples=20, save_path=None): if seed is None: seed = random.choice(graph.neighbors.keys()) print 'seed:', seed triples = set() explored = set() queue = deque() queue.append((seed, 0)) while len(queue) != 0: entity, depth = queue.popleft() if depth >= max_depth: continue # loop through each available relation for r in graph.neighbors[entity]: # sample neighbors nbrs = graph.neighbors[entity][r] sampled_nbrs = util.sample_if_large(nbrs, nbr_samples, replace=False) num_missed = len(nbrs) - len(sampled_nbrs) edge_crossed = lambda target: (entity, r, target) if not inverted( r) else (target, invert(r), entity) # document edges crossed, and add nbrs to queue for nbr in sampled_nbrs: triples.add(edge_crossed(nbr)) if nbr not in explored: queue.append((nbr, depth + 1)) # add "summary entity" for all entities we missed if num_missed > 0: triples.add( edge_crossed('{}_{}_{}'.format(entity, r, num_missed))) if save_path is not None: with open(save_path, 'w') as f: for tr in triples: f.write('\t'.join(tr) + '\n') return list(triples)
def summarize_neighborhood(graph, seed=None, max_depth=2, nbr_samples=20, save_path=None): if seed is None: seed = random.choice(graph.neighbors.keys()) print 'seed:', seed triples = set() explored = set() queue = deque() queue.append((seed, 0)) while len(queue) != 0: entity, depth = queue.popleft() if depth >= max_depth: continue # loop through each available relation for r in graph.neighbors[entity]: # sample neighbors nbrs = graph.neighbors[entity][r] sampled_nbrs = util.sample_if_large(nbrs, nbr_samples, replace=False) num_missed = len(nbrs) - len(sampled_nbrs) edge_crossed = lambda target: (entity, r, target) if not inverted(r) else (target, invert(r), entity) # document edges crossed, and add nbrs to queue for nbr in sampled_nbrs: triples.add(edge_crossed(nbr)) if nbr not in explored: queue.append((nbr, depth + 1)) # add "summary entity" for all entities we missed if num_missed > 0: triples.add(edge_crossed('{}_{}_{}'.format(entity, r, num_missed))) if save_path is not None: with open(save_path, 'w') as f: for tr in triples: f.write('\t'.join(tr) + '\n') return list(triples)