def single_relation(query): if len(query.r) != 1: return False r = query.r[-1] if inverted(r): return False return r
def t(word): try: if inverted(word): return invert(d[invert(word)]) else: return d[word] except KeyError: return word
def summarize_neighborhood(graph, seed=None, max_depth=2, nbr_samples=20, save_path=None): if seed is None: seed = random.choice(graph.neighbors.keys()) print 'seed:', seed triples = set() explored = set() queue = deque() queue.append((seed, 0)) while len(queue) != 0: entity, depth = queue.popleft() if depth >= max_depth: continue # loop through each available relation for r in graph.neighbors[entity]: # sample neighbors nbrs = graph.neighbors[entity][r] sampled_nbrs = util.sample_if_large(nbrs, nbr_samples, replace=False) num_missed = len(nbrs) - len(sampled_nbrs) edge_crossed = lambda target: (entity, r, target) if not inverted( r) else (target, invert(r), entity) # document edges crossed, and add nbrs to queue for nbr in sampled_nbrs: triples.add(edge_crossed(nbr)) if nbr not in explored: queue.append((nbr, depth + 1)) # add "summary entity" for all entities we missed if num_missed > 0: triples.add( edge_crossed('{}_{}_{}'.format(entity, r, num_missed))) if save_path is not None: with open(save_path, 'w') as f: for tr in triples: f.write('\t'.join(tr) + '\n') return list(triples)
def find_inverses(graph): inverse = {} # heuristic for finding inverse relation for r in graph.relation_args.keys(): ct = Counter() # count co-occurring relations for k, s in enumerate(graph.relation_args[r]['s']): t = graph.neighbors[s][r][0] # pick an arbitrary target ct.update(graph.neighbors[t].keys()) # look at the target's relations if k >= 100: break # inverse = top co-occurring relation that isn't inverse for r2, count in ct.most_common(): if not inverted(r2): inverse[r] = r2 print r, r2, count break
def find_inverses(graph): inverse = {} # heuristic for finding inverse relation for r in graph.relation_args.keys(): ct = Counter() # count co-occurring relations for k, s in enumerate(graph.relation_args[r]['s']): t = graph.neighbors[s][r][0] # pick an arbitrary target ct.update( graph.neighbors[t].keys()) # look at the target's relations if k >= 100: break # inverse = top co-occurring relation that isn't inverse for r2, count in ct.most_common(): if not inverted(r2): inverse[r] = r2 print r, r2, count break
def summarize_neighborhood(graph, seed=None, max_depth=2, nbr_samples=20, save_path=None): if seed is None: seed = random.choice(graph.neighbors.keys()) print 'seed:', seed triples = set() explored = set() queue = deque() queue.append((seed, 0)) while len(queue) != 0: entity, depth = queue.popleft() if depth >= max_depth: continue # loop through each available relation for r in graph.neighbors[entity]: # sample neighbors nbrs = graph.neighbors[entity][r] sampled_nbrs = util.sample_if_large(nbrs, nbr_samples, replace=False) num_missed = len(nbrs) - len(sampled_nbrs) edge_crossed = lambda target: (entity, r, target) if not inverted(r) else (target, invert(r), entity) # document edges crossed, and add nbrs to queue for nbr in sampled_nbrs: triples.add(edge_crossed(nbr)) if nbr not in explored: queue.append((nbr, depth + 1)) # add "summary entity" for all entities we missed if num_missed > 0: triples.add(edge_crossed('{}_{}_{}'.format(entity, r, num_missed))) if save_path is not None: with open(save_path, 'w') as f: for tr in triples: f.write('\t'.join(tr) + '\n') return list(triples)