def find_paths_to_node(node):
    """
    Use the global `dg` di-graph to find all the modules/nodes reachable via the
    requisites from the given node, at least staying at the same level or going
    down one, as well as all the edges involved in any of these traversals.

    :param node: The node to start at.
    :return: (reachable_nodes: set, edges_involved: set)
    """
    # FixMe: This function could potentially be changed/improved to resemble the
    # FixMe: one used in `static/js/column-vis.js`, the `returnNodes` method.
    # if the graph has not been initialised, do so
    if len(dg.nodes) == 0 or len(dg.edges) == 0:
        query_db()

    # due to the way I represented requisites, we have to use descendants
    # instead of ancestors (maybe the db should be redone? (yikes!))
    reachable_nodes = set(nxalg.descendants(dg, node))

    # find all the edges which connect any pair of the involved nodes
    edges_involved = set([
        p for p in dg.edges
        if (p[0] in reachable_nodes and p[1] in reachable_nodes)
    ])
    return reachable_nodes, edges_involved
Exemple #2
0
def main(args, metrics):
    graph_dataset = MAGDataset(name="", path=args.data_path, raw=False)
    metrics = [getattr(module_metric, met) for met in metrics]
    pre_metric = partial(module_metric.obtain_ranks, mode=0)

    full_graph = graph_dataset.g_full.to_networkx()
    core_subgraph = get_holdout_subgraph(graph_dataset.train_node_ids,
                                         full_graph)
    pseudo_leaf_node = -1
    for node in list(core_subgraph.nodes()):
        core_subgraph.add_edge(node, pseudo_leaf_node)
    node2descendants = {
        n: set(descendants(core_subgraph, n))
        for n in core_subgraph.nodes
    }
    candidate_positions = list(
        set(
            chain.from_iterable([[(n, d) for d in ds]
                                 for n, ds in node2descendants.items()])))

    edge2nbs = {}
    for u, v in candidate_positions:
        pu = set(core_subgraph.predecessors(u))
        cu = set(core_subgraph.successors(u))
        if v == pseudo_leaf_node:
            pv = set()
            cv = set()
        else:
            pv = set(core_subgraph.predecessors(v))
            cv = set(core_subgraph.successors(v))
        nbs = pu.union(pv).union(cu).union(cv)
        if pseudo_leaf_node in nbs:
            nbs.remove(pseudo_leaf_node)
        edge2nbs[(u, v)] = list(map(str, nbs))

    holdout_subgraph = get_holdout_subgraph(
        graph_dataset.train_node_ids + graph_dataset.test_node_ids, full_graph)
    node2pos = find_insert_posistion(graph_dataset.test_node_ids,
                                     core_subgraph, holdout_subgraph,
                                     pseudo_leaf_node)

    node_features = graph_dataset.g_full.ndata['x']
    node_features = F.normalize(node_features, p=2, dim=1)
    kv = KeyedVectors(vector_size=node_features.shape[1])
    kv.add([str(i) for i in range(len(node_features))], node_features.numpy())

    all_ranks = []
    for node in tqdm(graph_dataset.test_node_ids):
        dists = distances(str(node), candidate_positions, edge2nbs, kv,
                          pseudo_leaf_node)
        scores, labels = rearrange(torch.Tensor(dists), candidate_positions,
                                   node2pos[node])
        all_ranks.extend(pre_metric(scores, labels))
    total_metrics = [metric(all_ranks) for metric in metrics]

    for i, mtr in enumerate(metrics):
        print('    {:15s}: {}'.format(mtr.__name__, total_metrics[i]))

    return
Exemple #3
0
def get_holdout_subgraph(node_ids, full_graph):
    node_to_remove = [n for n in full_graph.nodes if n not in node_ids]
    subgraph = full_graph.subgraph(node_ids).copy()
    for node in node_to_remove:
        parents = set()
        children = set()
        ps = deque(full_graph.predecessors(node))
        cs = deque(full_graph.successors(node))
        while ps:
            p = ps.popleft()
            if p in subgraph:
                parents.add(p)
            else:
                ps += list(full_graph.predecessors(p))
        while cs:
            c = cs.popleft()
            if c in subgraph:
                children.add(c)
            else:
                cs += list(full_graph.successors(c))
        for p in parents:
            for c in children:
                subgraph.add_edge(p, c)
    # remove jump edges
    node2descendants = {
        n: set(descendants(subgraph, n))
        for n in subgraph.nodes
    }
    for node in subgraph.nodes():
        if subgraph.out_degree(node) > 1:
            successors1 = set(subgraph.successors(node))
            successors2 = set(
                chain.from_iterable([node2descendants[n]
                                     for n in successors1]))
            checkset = successors1.intersection(successors2)
            if checkset:
                for s in checkset:
                    subgraph.remove_edge(node, s)
    return subgraph
Exemple #4
0
 def get_descendants(self, host):
     node = self.find_node_from_host(host)
     if node:
         return descendants(self.graph, node)
     else:
         return []