def load_citation_graph(graph_name):
    """
    Loads one of the DGL-hosted citation graph datasets

    :param graph_name: name of the citation graph to load; one of
        ['cora', 'citeseer', 'pubmed']
    :return: namedtuple for the citation graph dataset; attributes:
        [graph, features, labels, mask]
    """
    # retrieve the dataset
    if graph_name == 'cora':
        dataset = citation_graph.load_cora()
    elif graph_name == 'citeseer':
        dataset = citation_graph.load_citeseer()
    elif graph_name == 'pubmed':
        dataset = citation_graph.load_pubmed()
    else:
        raise ValueError(
            "Unknown citation graph name <{:s}>; "
            "Expected one of [cora, citeseer, pubmed]".format(graph_name))
    #endif

    # return the datasets' components
    dataset_tuple = namedtuple("citation_graph",
                               ["graph", "features", "labels", "mask"])
    return dataset_tuple(DGLGraph(dataset.graph),
                         torch.FloatTensor(dataset.features),
                         torch.LongTensor(dataset.labels),
                         torch.BoolTensor(dataset.train_mask))
예제 #2
0
def load_citseer_data():
    data = citegrh.load_citeseer()
    features = th.FloatTensor(data.features)
    labels = th.LongTensor(data.labels)
    train_mask = th.BoolTensor(data.train_mask)
    test_mask = th.BoolTensor(data.test_mask)
    g = DGLGraph(data.graph)
    return g, features, labels, train_mask, test_mask
예제 #3
0
def load_data(dataset):
    if dataset == 'cora':
        data = citegrh.load_cora()
        features = th.FloatTensor(data.features)
        labels = th.LongTensor(data.labels)
        num_labels = data.num_labels
        g = DGLGraph(data.graph)
    elif dataset == 'pubmed':
        data = citegrh.load_pubmed()
        features = th.FloatTensor(data.features)
        labels = th.LongTensor(data.labels)
        num_labels = data.num_labels
        g = DGLGraph(data.graph)
    elif dataset == 'citeseer':
        data = citegrh.load_citeseer()
        features = th.FloatTensor(data.features)
        labels = th.LongTensor(data.labels)
        num_labels = data.num_labels
        g = DGLGraph(data.graph)
    elif dataset == 'amazon-computers':
        dataset = gnn_benckmark.AmazonCoBuy('computers')
        g = dataset[0]
        features = th.FloatTensor(g.ndata['feat'].float())
        labels = th.LongTensor(g.ndata['label'])
        num_labels = int(th.max(labels) + 1)
    elif dataset == 'amazon-photo':
        dataset = gnn_benckmark.AmazonCoBuy('photo')
        g = dataset[0]
        features = th.FloatTensor(g.ndata['feat'].float())
        labels = th.LongTensor(g.ndata['label'])
        num_labels = int(th.max(labels) + 1)
    elif dataset == 'coauthor-cs':
        dataset = gnn_benckmark.Coauthor('cs')
        g = dataset[0]
        features = th.FloatTensor(g.ndata['feat'].float())
        labels = th.LongTensor(g.ndata['label'])
        num_labels = int(th.max(labels) + 1)
    else:
        dataset = gnn_benckmark.Coauthor('physics')
        g = dataset[0]
        features = th.FloatTensor(g.ndata['feat'].float())
        labels = th.LongTensor(g.ndata['label'])
        num_labels = int(th.max(labels) + 1)
    # 数据集划分点
    split1 = int(0.7 * len(labels))
    split2 = int(0.9 * len(labels))
    train_mask = th.BoolTensor(_sample_mask(range(split1), labels.shape[0]))
    val_mask = th.BoolTensor(
        _sample_mask(range(split1, split2), labels.shape[0]))
    test_mask = th.BoolTensor(
        _sample_mask(range(split2, labels.shape[0] - 1), labels.shape[0]))
    print(
        "Total size: {:}| Feature dims: {:}| Train size: {:}| Val size: {:}| Test size: {:}| Num of labels: {:}"
        .format(features.size(0), features.size(1), len(labels[train_mask]),
                len(labels[val_mask]), len(labels[test_mask]), num_labels))
    return g, features, labels, num_labels, train_mask, val_mask, test_mask
예제 #4
0
def load_citeseer_data():
    data = citegrh.load_citeseer()
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    mask = torch.ByteTensor(data.train_mask)
    val_mask = torch.BoolTensor(data.val_mask)
    test_mask = torch.BoolTensor(data.test_mask)
    g = data.graph
    # add self loop, A^hat = A + I in the paper
    g.remove_edges_from(nx.selfloop_edges(g))
    g = DGLGraph(g)
    g.add_edges(g.nodes(), g.nodes())
    # return graph, node features, labels, and training mask
    return g, features, labels, mask, val_mask, test_mask
def load_data(dataset_name: str):
    if dataset_name == "cora":
        data = citegrh.load_cora()
    if dataset_name == "citeseer":
        data = citegrh.load_citeseer()
    if dataset_name == "pubmed":
        data = citegrh.load_pubmed()

    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    train_mask = torch.BoolTensor(data.train_mask)
    test_mask = torch.BoolTensor(data.test_mask)
    g = DGLGraph(data.graph)
    return g, features, labels, train_mask, test_mask
예제 #6
0
def load_data(dataset_name, self_loops):
    if dataset_name == 'cora':
        return citegrh.load_cora()
    elif dataset_name == 'citeseer':
        return citegrh.load_citeseer()
    elif dataset_name == 'pubmed':
        return citegrh.load_pubmed()
    elif dataset_name == "PPI":
        return PPIDataset('test')

    elif dataset_name is not None and dataset_name.startswith('reddit'):
        return RedditDataset(self_loop=self_loops)
    else:
        raise ValueError('Unknown dataset: {}'.format(dataset_name))
예제 #7
0
def load_data(dataset="cora"):
    assert dataset in ["cora", "pubmed", "citeseer", "synthetic"]
    if dataset == "cora":
        data = citegrh.load_cora()
    elif dataset == "pubmed":
        data = citegrh.load_pubmed()
    elif dataset == "citeseer":
        data = citegrh.load_citeseer()
    else:
        data = synthetic_data()
    data.features = th.FloatTensor(data.features)
    data.labels = th.LongTensor(data.labels)
    data.size = data.labels.shape[0]
    g = data.graph
    g.remove_edges_from(nx.selfloop_edges(g))
    g = DGLGraph(g)
    g.add_edges(g.nodes(), g.nodes())
    data.g = g
    data.adj = g.adjacency_matrix(transpose=None).to_dense()
    data.Prob = normalize(th.FloatTensor(data.adj), p=1, dim=1)
    print("============Successfully Load %s===============" % dataset)
    return data
예제 #8
0
from itertools import product

import torch

from runtime.dgl.gcn import GCN
from runtime.dgl.gat import GAT
from runtime.dgl.train import train_runtime

from dgl.data import citation_graph
from dgl import DGLGraph

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
Cora = citation_graph.load_cora()
CiteSeer = citation_graph.load_citeseer()
PubMed = citation_graph.load_pubmed()

for d, Net in product([Cora, CiteSeer, PubMed], [GCN, GAT]):
    g = DGLGraph(d.graph)
    x = torch.tensor(d.features, dtype=torch.float, device=device)
    y = torch.tensor(d.labels, dtype=torch.long, device=device)
    mask = torch.tensor(d.train_mask, dtype=torch.uint8, device=device)
    g.add_edges(g.nodes(), g.nodes())
    norm = torch.pow(g.in_degrees().float(), -0.5)
    norm[torch.isinf(norm)] = 0
    g.ndata['norm'] = norm.unsqueeze(1).to(device)
    model = Net(g, x.size(1), d.num_labels)
    t = train_runtime(model, x, y, mask, epochs=200, device=device)
    print('{} - {}: {:.2f}s'.format(d.name, Net.__name__, t))
예제 #9
0
import networkx as nx
import matplotlib.pyplot as plt
import torch
import torch as th
import torch.nn as nn
import torch.nn.functional as F
import sys
import dgl
from dgl.data import citation_graph as citegrh

#data = citegrh.load_cora()
data = citegrh.load_citeseer()
G = dgl.DGLGraph(data.graph)
labels = th.tensor(data.labels)


def visualize(labels, g):
    pos = nx.random_layout(g)  #, seed=1)
    plt.figure(figsize=(8, 8))
    plt.axis('off')
    nx.draw_networkx(g,
                     pos=pos,
                     node_size=50,
                     cmap=plt.get_cmap('coolwarm'),
                     node_color=labels,
                     edge_color='k',
                     arrows=False,
                     width=0.5,
                     style='dotted',
                     with_labels=False)
    plt.savefig('citeseer4.png')
예제 #10
0
 lrate = args.r
 bsize = args.b
 it = args.it
 path = args.p
 algo = args.algo
 if len(path) > 0:
     #G = mmread(path)
     #nxgraph = nx.Graph(G)
     #graph = dgl.from_networkx(nxgraph)
     edges = readmtxGraph(path)
     graph = dgl.graph(edges)
     #print(graph.edges())
 elif graph == "simple":
     graph = dgl.graph(([0, 0, 1, 1, 2, 3], [1, 2, 2, 4, 3, 4]))
 elif graph == "citeseer":
     data = load_citeseer(".")
     graph = data[0]
 elif graph == "pubmed":
     data = load_pubmed(".")
     graph = data[0]
 else:
     data = load_cora(".")
     graph = data[0]
 N = len(graph.nodes())
 print("#Nodes:", N, "#Edges:", len(graph.edges()[0]))
 embed = torch.rand(N, dim)
 #print(embed)
 #need to check batch processing ...
 print("Creating batch graphs...")
 if bsize == 256:
     bgraphs = batch_process(graph, 1024, 50)
# from dgl/data/citation_graph.py

if __name__ == '__main__':
    from dgl.data.citation_graph import load_citeseer

    data = load_citeseer()
    print(data)

예제 #12
0
    def __init__(self, name, seed, self_loop=False, split=None):
        super(SmallGraphDataset, self).__init__()
        if name == 'cora':
            data = citegrh.load_cora()
            graph = data.graph
            if self_loop:
                graph = self.add_selfloop(graph)
            graph = dgl.DGLGraph(graph)
            features = data.features
            labels = data.labels

        elif name == 'citeseer':
            data = citegrh.load_citeseer()
            graph = data.graph
            if self_loop:
                graph = self.add_selfloop(graph)
            graph = dgl.DGLGraph(graph)
            features = data.features
            labels = data.labels

        elif name == 'pubmed':
            data = citegrh.load_pubmed()
            graph = data.graph
            if self_loop:
                graph = self.add_selfloop(graph)
            graph = dgl.DGLGraph(graph)
            features = data.features
            labels = data.labels

        elif name == 'amazon':
            assert(split!=None)
            data = AmazonCoBuy(name='computers')
            graph = data.data[0]
            if self_loop:
                graph.remove_edges(graph.edge_ids(graph.nodes(), graph.nodes()))
                graph.add_edges(graph.nodes(), graph.nodes())
            # must create split
            features = graph.ndata['feat']
            labels = graph.ndata['label']
        elif name =='karate':
            kG = nx.karate_club_graph()
            labels = np.array(
            [kG.nodes[i]['club'] != 'Mr. Hi' for i in kG.nodes]).astype(np.int64)
            graph = dgl.DGLGraph(kG)
            if self_loop:
                graph.remove_edges(graph.edge_ids(graph.nodes(), graph.nodes()))
                graph.add_edges(graph.nodes(), graph.nodes())
            features = torch.eye(n=graph.number_of_nodes())
            # graph.ndata['feat'] = features

            # Mr.Hi's club:1, John A's club:0
            self.train_mask = torch.zeros(graph.number_of_nodes(), dtype=torch.bool)
            self.train_mask[0] = True #Mr.Hi
            self.train_mask[33] = True # John A
            self.test_mask = ~self.train_mask



        graph = self.compute_norm(graph)

        self.graph = graph
        self.features = torch.FloatTensor(features)
        self.n_features = self.features.size(1)
        self.labels = torch.LongTensor(labels)
        self.n_label = torch.unique(self.labels).size(0)
        self.n_nodes = graph.number_of_nodes()
        if hasattr(self, 'train_mask'):
            return

        if split:
            print('using {} for training data.'.format(split))
            assert(split > 0.0)
            assert(split < 1.0)
            sample_size = ceil(self.n_nodes*split)
            train_np = np.zeros(self.n_nodes, dtype=np.bool)
            test_np = np.zeros(self.n_nodes, dtype=np.bool)
            test_np[range(500,1500)] = 1

            if seed ==0:
                # use first few data points as seed 
                train_idx = range(sample_size)
                train_np[train_idx] = 1
            else:
                random.seed(seed)
                train_idx = random.sample(range(self.n_nodes-1000), sample_size)
                mapped_train_idx = [idx if idx<500 else idx+1000 for idx in train_idx]
                train_np[mapped_train_idx] =1 
            

            self.train_mask = torch.tensor(train_np, dtype=torch.bool)
            self.test_mask = torch.tensor(test_np, dtype=torch.bool)
        else: # use original split
            self.train_mask = torch.BoolTensor(data.train_mask)
            self.test_mask = torch.BoolTensor(data.test_mask)
예제 #13
0
파일: utils.py 프로젝트: MH-0/RPGAE
def load_custom_dataset(dataset_name, with_attributes, with_labels, directed,
                        separator):
    """
    loads the dataset into memory
    :param dataset_name: The name of the dataset (As named in the folder data)
    :param with_attributes: if it has attributes
    :param with_labels: if the dataset has labels (ground truth)
    :param directed: if the graph is directed
    :param separator: the separator character in the files (" " or "," or "\t")
    """
    global data_path
    global graph_path
    global topo_features_path
    global topo_features_labels_path
    global embedding_path
    global graph
    global node_labels
    global number_classes
    global input
    global input_size
    global is_directed

    # data folder path
    data_path = "data\\" + dataset_name + "\\"

    # graph folder path
    graph_path = data_path + "graph\\"

    # features folder path
    topo_features_path = data_path + "top_features\\"

    # features classes folder path
    topo_features_labels_path = data_path + "top_features_labels\\"

    # pretreatment folder path
    embedding_path = data_path + "embedding\\"

    # scores folder path
    scores_path = data_path + "scores\\"

    # The graph is directed
    is_directed = directed

    # Load graphs
    if dataset_name == "cora":
        data = cg.load_cora()
        graph = data.graph
        graph = nx.Graph(graph)
        node_labels = data.labels
        input = torch.tensor(data.features).float()
    elif dataset_name == "citeseer":
        data = cg.load_citeseer()
        graph = data.graph
        graph = nx.Graph(graph)
        node_labels = data.labels
        input = torch.tensor(data.features).float()
    else:
        graph = load_graph(graph_path + "edges.txt",
                           0,
                           separator,
                           print_details=True,
                           directed=directed)
        if with_labels:
            node_labels = load_groundtruth(graph_path + "groundtruth.txt", 0,
                                           separator)
        else:
            node_labels = []
        if with_attributes:
            input = load_attributes(graph_path + "attributes.txt", 0,
                                    separator)
        else:
            input = torch.eye(len(graph.nodes))

    # input layer size
    input_size = len(input[0])

    # number of classes for the node labels
    number_classes = len(set(node_labels))

    # create directories if they do not exist
    # folder that holds the embeddings
    Path(embedding_path).mkdir(parents=True, exist_ok=True)
    # folder that holds the topological features
    Path(topo_features_path).mkdir(parents=True, exist_ok=True)
    # folder that holds the classes of the topological features
    Path(topo_features_labels_path).mkdir(parents=True, exist_ok=True)
    # folder that holds the scores of the experiments
    Path(scores_path).mkdir(parents=True, exist_ok=True)

    print("graph details:", dataset_name)
    print("------------------")
    print("nodes", len(graph.nodes))
    print("edges", len(graph.edges))
    print("classes", len(set(node_labels)))
    print("------------------")