Ejemplo n.º 1
0
    def reindex_to_parrent_nodes(self, nodes):
        """Map the given subgraph node id to parent graph id.

        Args:
            nodes: A list of nodes in this subgraph.

        Return:
            A list of node ids in parent graph.
        """
        return graph_kernel.map_nodes(nodes, self._to_reindex)
Ejemplo n.º 2
0
    def reindex_from_parrent_nodes(self, nodes):
        """Map the given parent graph node id to subgraph id.

        Args:
            nodes: A list of nodes from parent graph.

        Return:
            A list of subgraph ids.
        """
        return graph_kernel.map_nodes(nodes, self._from_reindex)
Ejemplo n.º 3
0
    def batch_fn(self, batch_nodes):

        graph_list, from_reindex = neighbor_sample(self.dataset.graph,
                                                   batch_nodes, self.samples)

        neigh_nodes = graph_list[0][1]
        neigh_nodes = np.array(neigh_nodes, dtype='int32')
        y = self.dataset.y[batch_nodes]

        label_idx = list((set(neigh_nodes) - set(batch_nodes))
                         & self.dataset.train_idx_label)
        sub_label_index = graph_kernel.map_nodes(label_idx, from_reindex)
        sub_label_y = self.dataset.y[label_idx]
        pos = 2021 - self.dataset.year[neigh_nodes]
        return graph_list, neigh_nodes, y, sub_label_y, sub_label_index, pos
Ejemplo n.º 4
0
def neighbor_sample(graph, nodes, samples):

    graph_list = []
    samples_list = [[25, 10, 10, 5, 5], [15, 10, 10, 5, 5]]
    for idi, max_deg in enumerate(samples):
        start_nodes = copy.deepcopy(nodes)
        edges = []
        edge_ids = []
        edge_feats = []
        neigh_nodes = [start_nodes]
        if max_deg == -1:
            pred_nodes, pred_eids = graph.predecessor(start_nodes,
                                                      return_eids=True)
        else:
            for idj, g_t in enumerate(graph):
                pred_nodes, pred_eids = g_t.sample_predecessor(
                    start_nodes,
                    max_degree=samples_list[idi][idj],
                    return_eids=True)
                neigh_nodes.append(pred_nodes)
                for dst_node, src_nodes, src_eids in zip(
                        start_nodes, pred_nodes, pred_eids):
                    for src_node, src_eid in zip(src_nodes, src_eids):
                        edges.append((src_node, dst_node))
                        edge_ids.append(src_eid)
                        edge_feats.append(g_t.edge_feat['edge_type'][src_eid])
        neigh_nodes = flat_node_and_edge(neigh_nodes)

        from_reindex = {x: i for i, x in enumerate(neigh_nodes)}
        sub_node_index = graph_kernel.map_nodes(nodes, from_reindex)

        sg = subgraph(graph[0],
                      eid=edge_ids,
                      nodes=neigh_nodes,
                      edges=edges,
                      with_node_feat=False,
                      with_edge_feat=False)
        edge_feats = np.array(edge_feats, dtype='int32')
        sg._edge_feat['edge_type'] = edge_feats

        graph_list.append((sg, neigh_nodes, sub_node_index))
        nodes = neigh_nodes

    graph_list = graph_list[::-1]
    return graph_list, from_reindex
Ejemplo n.º 5
0
def neighbor_sample(graph, nodes, samples):
    assert not graph.is_tensor(), "You must call Graph.numpy() first."

    graph_list = []
    for max_deg in samples:
        start_nodes = copy.deepcopy(nodes)
        edges = []
        edge_ids = []
        if max_deg == -1:
            pred_nodes, pred_eids = graph.predecessor(start_nodes,
                                                      return_eids=True)
        else:
            pred_nodes, pred_eids = graph.sample_predecessor(
                start_nodes, max_degree=max_deg, return_eids=True)

        for dst_node, src_nodes, src_eids in zip(start_nodes, pred_nodes,
                                                 pred_eids):
            for src_node, src_eid in zip(src_nodes, src_eids):
                edges.append((src_node, dst_node))
                edge_ids.append(src_eid)

        neigh_nodes = [start_nodes, pred_nodes]
        neigh_nodes = flat_node_and_edge(neigh_nodes)

        from_reindex = {x: i for i, x in enumerate(neigh_nodes)}
        sub_node_index = graph_kernel.map_nodes(nodes, from_reindex)

        sg = subgraph(graph,
                      eid=edge_ids,
                      nodes=neigh_nodes,
                      edges=edges,
                      with_node_feat=False,
                      with_edge_feat=True)
        #         sg = add_self_loop(sg, sub_node_index)

        graph_list.append((sg, neigh_nodes, sub_node_index))
        nodes = neigh_nodes

    graph_list = graph_list[::-1]
    return graph_list
Ejemplo n.º 6
0
def graphsage_sample(graph, nodes, samples, ignore_edges=[]):
    """Implement of graphsage sample.
    Reference paper: https://cs.stanford.edu/people/jure/pubs/graphsage-nips17.pdf.
    Args:
        graph: A pgl graph instance
        nodes: Sample starting from nodes
        samples: A list, number of neighbors in each layer
        ignore_edges: list of edge(src, dst) will be ignored.
    Return:
        A list of subgraphs
    """
    assert not graph.is_tensor(), "You must call Graph.numpy() first."
    node_index = copy.deepcopy(nodes)
    start = time.time()
    num_layers = len(samples)
    start_nodes = nodes
    nodes = list(start_nodes)
    eids, edges = [], []
    nodes_set = set(nodes)
    layer_nodes, layer_eids, layer_edges = [], [], []
    ignore_edge_set = set([edge_hash(src, dst) for src, dst in ignore_edges])

    for layer_idx in reversed(range(num_layers)):
        if len(start_nodes) == 0:
            layer_nodes = [nodes] + layer_nodes
            layer_eids = [eids] + layer_eids
            layer_edges = [edges] + layer_edges
            continue
        batch_pred_nodes, batch_pred_eids = graph.sample_predecessor(
            start_nodes, samples[layer_idx], return_eids=True)
        start = time.time()
        last_nodes_set = nodes_set

        nodes, eids = copy.copy(nodes), copy.copy(eids)
        edges = copy.copy(edges)
        nodes_set, eids_set = set(nodes), set(eids)
        for srcs, dst, pred_eids in zip(batch_pred_nodes, start_nodes,
                                        batch_pred_eids):
            for src, eid in zip(srcs, pred_eids):
                if edge_hash(src, dst) in ignore_edge_set:
                    continue
                if eid not in eids_set:
                    eids.append(eid)
                    edges.append([src, dst])
                    eids_set.add(eid)
                if src not in nodes_set:
                    nodes.append(src)
                    nodes_set.add(src)
        layer_edges = [edges] + layer_edges
        start_nodes = list(nodes_set - last_nodes_set)
        layer_nodes = [nodes] + layer_nodes
        layer_eids = [eids] + layer_eids
        start = time.time()
        # Find new nodes

    from_reindex = {x: i for i, x in enumerate(layer_nodes[0])}
    node_index = graph_kernel.map_nodes(node_index, from_reindex)
    sample_index = np.array(layer_nodes[0], dtype="int64")

    graph_list = []
    for i in range(num_layers):
        sg = subgraph(graph,
                      nodes=layer_nodes[0],
                      eid=layer_eids[i],
                      edges=layer_edges[i])
        graph_list.append((sg, sample_index, node_index))

    return graph_list