Beispiel #1
0
def load_dataset(
    dataset,
    prefix='',
):
    mtx_path = Path("dataset/{}/{}.mtx".format(dataset, dataset))
    prefix = Path(prefix, 'dataset', dataset)
    feats_path = Path(prefix, f'{dataset}-feats.npy')
    feats = np.load(str(feats_path))
    if mtx_path.exists():
        print('load previous mtxfile')
        laplacian = mmread(str(mtx_path))
    else:
        if dataset in [
                'citeseer',
                'cora',
                'pubmed',
        ]:
            dataset_path = str(Path(prefix, f'{dataset}-G.json'))
            G_data = json.load(open(dataset_path))
            G = json_graph.node_link_graph(G_data)
        elif dataset in ['Amazon2M', 'reddit', 'ppi']:
            start = time.time()
            G = read_gpickle(str(Path(prefix, f'{dataset}.gpickle')))
            print(f'gpickle load finish: {time.time() - start}')
        else:
            raise ValueError('dataset not known')
        print('calculating laplacian')
        start = time.time()
        laplacian = laplacian_matrix(G)
        print(f'calculating finished: {time.time() - start}')
        file = open(mtx_path, "wb")
        mmwrite(str(mtx_path), laplacian)
        file.close()
    return laplacian, feats
Beispiel #2
0
    def draw(self):
        colors = ['c', 'm', 'g', 'y', 'r', 'k']

        plt.subplot(131)
        nx.draw_networkx_nodes(self.G, self.pos, node_size=500)
        edges = [(u, v) for (u, v, d) in self.G.edges(data=True)]
        edges_width = [10 * d['w'] for (u, v, d) in self.G.edges(data=True)]
        nx.draw_networkx_edges(self.G,
                               self.pos,
                               edgelist=edges,
                               width=edges_width)
        nx.draw_networkx_labels(self.G,
                                self.pos,
                                font_size=20,
                                font_family='sans-serif')
        plt.axis('off')

        plt.subplot(132)
        lap_matrix = laplacian_matrix(self.G, weight='w').todense()
        node_list = self.G.nodes()
        k = 3
        eigenvalues, eigenvectors = linalg.eig(lap_matrix)
        sorted_indices = np.argsort(eigenvalues)
        topk_evecs = eigenvectors[:, sorted_indices[:k]]
        topk_evecs = self.standardization(topk_evecs)
        x = [np.array(i)[0][0] for i in topk_evecs]
        y = [np.array(i)[0][1] for i in topk_evecs]
        plt.scatter(x, y, c='r', s=50)
        for i, txt in enumerate(node_list):
            plt.annotate(txt, (x[i], y[i]))

        plt.subplot(133)
        clustering = DBSCAN(eps=0.7, min_samples=2)
        c_res = clustering.fit_predict(topk_evecs)
        dict_res = {}
        for c, n in zip(c_res, node_list):
            if c in dict_res:
                dict_res[c].append(n)
            else:
                dict_res[c] = [n]
        for idx, i in enumerate(dict_res):
            nx.draw_networkx_nodes(self.G,
                                   self.pos,
                                   nodelist=dict_res[i],
                                   node_size=500,
                                   node_color=colors[idx])

        nx.draw_networkx_edges(self.G,
                               self.pos,
                               edgelist=edges,
                               width=edges_width)
        nx.draw_networkx_labels(self.G,
                                self.pos,
                                font_size=20,
                                font_family='sans-serif')
        plt.axis('off')

        plt.show()
Beispiel #3
0
def json2mtx(dataset):
    G_data = json.load(open("dataset/{}/{}-G.json".format(dataset, dataset)))
    G = json_graph.node_link_graph(G_data)
    laplacian = laplacian_matrix(G, nodelist=range(len(G.nodes)))
    file = open("dataset/{}/{}.mtx".format(dataset, dataset), "wb")
    mmwrite("dataset/{}/{}.mtx".format(dataset, dataset), laplacian)
    file.close()

    return laplacian
Beispiel #4
0
def json2mtx(dataset):
    G_data = json.load(
        open("/home/xl289/CS6241_proj/dataset/{}/{}-G.json".format(
            dataset, dataset)))
    G = json_graph.node_link_graph(G_data)
    laplacian = laplacian_matrix(G)
    file = open(
        "/home/xl289/CS6241_proj/dataset/{}/{}.mtx".format(dataset, dataset),
        "wb")
    mmwrite(
        "/home/xl289/CS6241_proj/dataset/{}/{}.mtx".format(dataset, dataset),
        laplacian)
    file.close()

    return laplacian
Beispiel #5
0
def laplacian_cluster(g, partitions=8):
    nodes = list(g.nodes())
    if partitions < 2 or len(nodes) == 1:
        return [nodes]

    L = lpm.laplacian_matrix(g).todense()
    eig_vals, eig_vecs = np.linalg.eig(L)
    eig_vals = np.abs(eig_vals)
    s_idx = np.argsort(eig_vals)[:2]

    index = s_idx[-1]
    vec = eig_vecs[:, index]
    n1, n2 = [], []
    for i, v in enumerate(vec):
        if v < 0:
            n1.append(nodes[i])
        else:
            n2.append(nodes[i])
    g1 = g.subgraph(n1)
    g2 = g.subgraph(n2)
    out = []
    out.extend(laplacian_cluster(g1, partitions=partitions // 2))
    out.extend(laplacian_cluster(g2, partitions=partitions // 2))
    return out
Beispiel #6
0
def main():
    parser = ArgumentParser(description="ne")
    parser.add_argument("-d", "--dataset", type=str, default="cora", \
                        help="input dataset")
    parser.add_argument("-o", "--coarse", type=str, default="simple", \
                        help="choose either simple_coarse or lamg_coarse, [simple, lamg]")
    parser.add_argument("-c", "--mcr_dir", type=str, default="/opt/matlab/R2018A/", \
                        help="directory of matlab compiler runtime (only required by lamg_coarsen)")
    parser.add_argument("-s", "--search_ratio", type=int, default=12, \
                        help="control the search space in graph fusion process (only required by lamg_coarsen)")
    parser.add_argument("-r", "--reduce_ratio", type=int, default=2, \
                        help="control graph coarsening levels (only required by lamg_coarsen)")
    parser.add_argument("-v", "--level", type=int, default=1, \
                        help="number of coarsening levels (only required by simple_coarsen)")
    parser.add_argument("-n", "--num_neighs", type=int, default=2, \
                        help="control k-nearest neighbors in graph fusion process")
    parser.add_argument("-l", "--lda", type=float, default=0.1, \
                        help="control self loop in adjacency matrix")
    parser.add_argument("-e", "--embed_path", type=str, default="embed_results/embeddings_palone_deepwalk.npy", \
                        help="path of embedding result")
    parser.add_argument("-m", "--embed_method", type=str, default="deepwalk", \
                        help="[deepwalk, node2vec, graphsage]")
    parser.add_argument("-f", "--fusion", default=True, action="store_false", \
                        help="whether use graph fusion")
    parser.add_argument("-p", "--power", default=False, action="store_true", \
                        help="Strong power of graph filter, set True to enhance filter power")
    parser.add_argument("-g", "--sage_model", type=str, default="mean", \
                        help="aggregation function in graphsage")
    parser.add_argument("-w", "--sage_weighted", default=True, action="store_false", \
                        help="whether consider weighted reduced graph")

    args = parser.parse_args()

    dataset = args.dataset
    feature_path = "dataset/{}/{}-feats.npy".format(dataset, dataset)
    fusion_input_path = "dataset/{}/{}.mtx".format(dataset, dataset)
    reduce_results = "reduction_results/"
    mapping_path = "{}Mapping.mtx".format(reduce_results)

    if args.fusion:
        coarsen_input_path = "dataset/{}/fused_{}.mtx".format(dataset, dataset)
    else:
        coarsen_input_path = "dataset/{}/{}.mtx".format(dataset, dataset)

    ######Load Data######
    print("%%%%%% Loading Graph Data %%%%%%")

    if args.dataset == "ogb":
        d_name = "ogbl-ppa"

        from ogb.linkproppred import LinkPropPredDataset

        dataset = LinkPropPredDataset(name=d_name)
        print(dataset)
        print(dataset[0])

        split_edge = dataset.get_edge_split()
        print(split_edge)
        # train_edge, valid_edge, test_edge = split_edge["train"], split_edge["valid"], split_edge["test"]
        graph = dataset[0]  # graph: library-agnostic graph object

        print(graph['edge_index'].shape)
        print(graph['edge_feat'])
        print(graph['node_feat'])
        # print((np.array(graph['node_feat']) == 0.0).all())
        graph['directed'] = False
        print(graph)
        graph_nodes = [i for i in range(0, graph['num_nodes'])]
        G = nx.Graph()
        G.add_nodes_from(graph_nodes)
        G.add_edges_from(graph['edge_index'].T)
        # nx.draw(G, with_labels=True)
        print(G.nodes)
        # plt.show()
        laplacian = laplacian_matrix(G)
        print(laplacian)
    else:
        path = "dataset/ppi/ppi.mtx"
        G = mtx2graph(path)
        laplacian, edges = json2mtx(dataset)

    ## whether node features are required
    if args.fusion or args.embed_method == "graphsage":

        if args.dataset == 'ogb':
            feature = graph['node_feat']
        else:
            feature = np.load(feature_path)
        # print(feature[1][0])

    ######Embed Reduced Graph######

    print("%%%%%% Starting Graph Embedding %%%%%%")

    if args.embed_method == "deepwalk":
        embed_start = time.process_time()
        embeddings = deepwalk(G)

    elif args.embed_method == "node2vec":
        embed_start = time.process_time()
        embeddings = node2vec(G)

    elif args.embed_method == "graphsage":
        from embed_methods.graphsage.graphsage import graphsage
        nx.set_node_attributes(G, False, "test")
        nx.set_node_attributes(G, False, "val")

        ## obtain mapping operator
        if args.coarse == "lamg":
            mapping = normalize(mtx2matrix(mapping_path), norm='l1', axis=1)
        else:
            mapping = identity(feature.shape[0])
            for p in projections:
                mapping = mapping @ p
            mapping = normalize(mapping, norm='l1', axis=1).transpose()

        ## control iterations for training
        coarse_ratio = mapping.shape[1] / mapping.shape[0]

        ## map node feats to the coarse graph
        feats = mapping @ feature

        embed_start = time.process_time()
        embeddings = graphsage(G, feats, args.sage_model, args.sage_weighted,
                               int(1000 / coarse_ratio))

    embed_time = time.process_time() - embed_start

    ######Save Embeddings######

    np.save(args.embed_path, embeddings)

    ######Evaluation######
    print("%%%%%% Starting Evaluation %%%%%%")

    # link prediction
    embeds = np.load(args.embed_path)
    '''

    if args.dataset == "ogb":
        acc, pre, sen, mcc, auc = linkprediction_ogb(split_edge, embeds)
    else:
        acc, pre, sen, mcc, auc = linkprediction(edges, embeds, dataset)'''

    print("Running regression..")

    # node prediction
    # run_regression(np.array(train_embeds), np.array(train_labels), np.array(test_embeds), np.array(test_labels))
    # lr("dataset/{}/".format(dataset), args.embed_path, dataset)

    ######Report timing information######å
    print("%%%%%% CPU time %%%%%%")
    if args.fusion:
        total_time = embed_time
        print(f"Graph Fusion     Time:")
    else:
        total_time = embed_time
        print("Graph Fusion     Time: 0")

    print(f"Graph Embedding  Time: {embed_time:.3f}")
    print(f"Total Time = Embedding_time = {total_time:.3f}")