Beispiel #1
0
 def t(word):
     try:
         if inverted(word):
             return invert(d[invert(word)])
         else:
             return d[word]
     except KeyError:
         return word
 def t(word):
     try:
         if inverted(word):
             return invert(d[invert(word)])
         else:
             return d[word]
     except KeyError:
         return word
Beispiel #3
0
def summarize_neighborhood(graph,
                           seed=None,
                           max_depth=2,
                           nbr_samples=20,
                           save_path=None):
    if seed is None:
        seed = random.choice(graph.neighbors.keys())
        print 'seed:', seed

    triples = set()
    explored = set()
    queue = deque()
    queue.append((seed, 0))
    while len(queue) != 0:
        entity, depth = queue.popleft()

        if depth >= max_depth:
            continue

        # loop through each available relation
        for r in graph.neighbors[entity]:
            # sample neighbors
            nbrs = graph.neighbors[entity][r]
            sampled_nbrs = util.sample_if_large(nbrs,
                                                nbr_samples,
                                                replace=False)
            num_missed = len(nbrs) - len(sampled_nbrs)

            edge_crossed = lambda target: (entity, r, target) if not inverted(
                r) else (target, invert(r), entity)

            # document edges crossed, and add nbrs to queue
            for nbr in sampled_nbrs:
                triples.add(edge_crossed(nbr))
                if nbr not in explored:
                    queue.append((nbr, depth + 1))

            # add "summary entity" for all entities we missed
            if num_missed > 0:
                triples.add(
                    edge_crossed('{}_{}_{}'.format(entity, r, num_missed)))

    if save_path is not None:
        with open(save_path, 'w') as f:
            for tr in triples:
                f.write('\t'.join(tr) + '\n')

    return list(triples)
def summarize_neighborhood(graph, seed=None, max_depth=2, nbr_samples=20, save_path=None):
    if seed is None:
        seed = random.choice(graph.neighbors.keys())
        print 'seed:', seed

    triples = set()
    explored = set()
    queue = deque()
    queue.append((seed, 0))
    while len(queue) != 0:
        entity, depth = queue.popleft()

        if depth >= max_depth:
            continue

        # loop through each available relation
        for r in graph.neighbors[entity]:
            # sample neighbors
            nbrs = graph.neighbors[entity][r]
            sampled_nbrs = util.sample_if_large(nbrs, nbr_samples, replace=False)
            num_missed = len(nbrs) - len(sampled_nbrs)

            edge_crossed = lambda target: (entity, r, target) if not inverted(r) else (target, invert(r), entity)

            # document edges crossed, and add nbrs to queue
            for nbr in sampled_nbrs:
                triples.add(edge_crossed(nbr))
                if nbr not in explored:
                    queue.append((nbr, depth + 1))

            # add "summary entity" for all entities we missed
            if num_missed > 0:
                triples.add(edge_crossed('{}_{}_{}'.format(entity, r, num_missed)))

    if save_path is not None:
        with open(save_path, 'w') as f:
            for tr in triples:
                f.write('\t'.join(tr) + '\n')

    return list(triples)