def load_citation_graph(graph_name):
    """
    Loads one of the DGL-hosted citation graph datasets

    :param graph_name: name of the citation graph to load; one of
        ['cora', 'citeseer', 'pubmed']
    :return: namedtuple for the citation graph dataset; attributes:
        [graph, features, labels, mask]
    """
    # retrieve the dataset
    if graph_name == 'cora':
        dataset = citation_graph.load_cora()
    elif graph_name == 'citeseer':
        dataset = citation_graph.load_citeseer()
    elif graph_name == 'pubmed':
        dataset = citation_graph.load_pubmed()
    else:
        raise ValueError(
            "Unknown citation graph name <{:s}>; "
            "Expected one of [cora, citeseer, pubmed]".format(graph_name))
    #endif

    # return the datasets' components
    dataset_tuple = namedtuple("citation_graph",
                               ["graph", "features", "labels", "mask"])
    return dataset_tuple(DGLGraph(dataset.graph),
                         torch.FloatTensor(dataset.features),
                         torch.LongTensor(dataset.labels),
                         torch.BoolTensor(dataset.train_mask))
Beispiel #2
0
 def load_pubmed_data():
     data = citegrh.load_pubmed()
     features = torch.FloatTensor(data.features)
     labels = torch.LongTensor(data.labels)
     train_mask = torch.BoolTensor(data.train_mask)
     test_mask = torch.BoolTensor(data.test_mask)
     g = DGLGraph(data.graph)
     return g, features, labels, train_mask, test_mask
Beispiel #3
0
def load_data(dataset):
    if dataset == 'cora':
        data = citegrh.load_cora()
        features = th.FloatTensor(data.features)
        labels = th.LongTensor(data.labels)
        num_labels = data.num_labels
        g = DGLGraph(data.graph)
    elif dataset == 'pubmed':
        data = citegrh.load_pubmed()
        features = th.FloatTensor(data.features)
        labels = th.LongTensor(data.labels)
        num_labels = data.num_labels
        g = DGLGraph(data.graph)
    elif dataset == 'citeseer':
        data = citegrh.load_citeseer()
        features = th.FloatTensor(data.features)
        labels = th.LongTensor(data.labels)
        num_labels = data.num_labels
        g = DGLGraph(data.graph)
    elif dataset == 'amazon-computers':
        dataset = gnn_benckmark.AmazonCoBuy('computers')
        g = dataset[0]
        features = th.FloatTensor(g.ndata['feat'].float())
        labels = th.LongTensor(g.ndata['label'])
        num_labels = int(th.max(labels) + 1)
    elif dataset == 'amazon-photo':
        dataset = gnn_benckmark.AmazonCoBuy('photo')
        g = dataset[0]
        features = th.FloatTensor(g.ndata['feat'].float())
        labels = th.LongTensor(g.ndata['label'])
        num_labels = int(th.max(labels) + 1)
    elif dataset == 'coauthor-cs':
        dataset = gnn_benckmark.Coauthor('cs')
        g = dataset[0]
        features = th.FloatTensor(g.ndata['feat'].float())
        labels = th.LongTensor(g.ndata['label'])
        num_labels = int(th.max(labels) + 1)
    else:
        dataset = gnn_benckmark.Coauthor('physics')
        g = dataset[0]
        features = th.FloatTensor(g.ndata['feat'].float())
        labels = th.LongTensor(g.ndata['label'])
        num_labels = int(th.max(labels) + 1)
    # 数据集划分点
    split1 = int(0.7 * len(labels))
    split2 = int(0.9 * len(labels))
    train_mask = th.BoolTensor(_sample_mask(range(split1), labels.shape[0]))
    val_mask = th.BoolTensor(
        _sample_mask(range(split1, split2), labels.shape[0]))
    test_mask = th.BoolTensor(
        _sample_mask(range(split2, labels.shape[0] - 1), labels.shape[0]))
    print(
        "Total size: {:}| Feature dims: {:}| Train size: {:}| Val size: {:}| Test size: {:}| Num of labels: {:}"
        .format(features.size(0), features.size(1), len(labels[train_mask]),
                len(labels[val_mask]), len(labels[test_mask]), num_labels))
    return g, features, labels, num_labels, train_mask, val_mask, test_mask
def load_data(dataset_name: str):
    if dataset_name == "cora":
        data = citegrh.load_cora()
    if dataset_name == "citeseer":
        data = citegrh.load_citeseer()
    if dataset_name == "pubmed":
        data = citegrh.load_pubmed()

    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    train_mask = torch.BoolTensor(data.train_mask)
    test_mask = torch.BoolTensor(data.test_mask)
    g = DGLGraph(data.graph)
    return g, features, labels, train_mask, test_mask
Beispiel #5
0
def load_data(dataset_name, self_loops):
    if dataset_name == 'cora':
        return citegrh.load_cora()
    elif dataset_name == 'citeseer':
        return citegrh.load_citeseer()
    elif dataset_name == 'pubmed':
        return citegrh.load_pubmed()
    elif dataset_name == "PPI":
        return PPIDataset('test')

    elif dataset_name is not None and dataset_name.startswith('reddit'):
        return RedditDataset(self_loop=self_loops)
    else:
        raise ValueError('Unknown dataset: {}'.format(dataset_name))
Beispiel #6
0
def load_pubmed_data():
    data = citegrh.load_pubmed()
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    mask = torch.ByteTensor(data.train_mask)
    val_mask = torch.BoolTensor(data.val_mask)
    test_mask = torch.BoolTensor(data.test_mask)
    g = data.graph
    # add self loop, A^hat = A + I in the paper
    g.remove_edges_from(nx.selfloop_edges(g))
    g = DGLGraph(g)
    g.add_edges(g.nodes(), g.nodes())
    # return graph, node features, labels, and training mask
    return g, features, labels, mask, val_mask, test_mask
def load_data(dataset="cora"):
    assert dataset in ["cora", "pubmed", "citeseer", "synthetic"]
    if dataset == "cora":
        data = citegrh.load_cora()
    elif dataset == "pubmed":
        data = citegrh.load_pubmed()
    elif dataset == "citeseer":
        data = citegrh.load_citeseer()
    else:
        data = synthetic_data()
    data.features = th.FloatTensor(data.features)
    data.labels = th.LongTensor(data.labels)
    data.size = data.labels.shape[0]
    g = data.graph
    g.remove_edges_from(nx.selfloop_edges(g))
    g = DGLGraph(g)
    g.add_edges(g.nodes(), g.nodes())
    data.g = g
    data.adj = g.adjacency_matrix(transpose=None).to_dense()
    data.Prob = normalize(th.FloatTensor(data.adj), p=1, dim=1)
    print("============Successfully Load %s===============" % dataset)
    return data
Beispiel #8
0
            x = F.log_softmax(self.layer2(g, x))
            return x

    from dgl.data import citation_graph as citegrh
    import networkx as nx

    def load_cora_data():
        data = citegrh.load_cora()
        features = torch.FloatTensor(data.features)
        labels = torch.LongTensor(data.labels)
        train_mask = torch.BoolTensor(data.train_mask)
        test_mask = torch.BoolTensor(data.test_mask)
        g = DGLGraph(data.graph)
        return g, features, labels, train_mask, test_mask

    data = citegrh.load_pubmed()
    #features = torch.FloatTensor(data.features)
    #g = DGLGraph(data.graph).to(device)

    #dataset = da.CoraGraphDataset()

    device = torch.device('cuda')

    #model = Net()
    model = Net().to(device)

    features = torch.FloatTensor(data.features).to(device)
    g = DGLGraph(data.graph).to(device)

    #data = dataset[0].to(device)
Beispiel #9
0
from itertools import product

import torch

from runtime.dgl.gcn import GCN
from runtime.dgl.gat import GAT
from runtime.dgl.train import train_runtime

from dgl.data import citation_graph
from dgl import DGLGraph

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
Cora = citation_graph.load_cora()
CiteSeer = citation_graph.load_citeseer()
PubMed = citation_graph.load_pubmed()

for d, Net in product([Cora, CiteSeer, PubMed], [GCN, GAT]):
    g = DGLGraph(d.graph)
    x = torch.tensor(d.features, dtype=torch.float, device=device)
    y = torch.tensor(d.labels, dtype=torch.long, device=device)
    mask = torch.tensor(d.train_mask, dtype=torch.uint8, device=device)
    g.add_edges(g.nodes(), g.nodes())
    norm = torch.pow(g.in_degrees().float(), -0.5)
    norm[torch.isinf(norm)] = 0
    g.ndata['norm'] = norm.unsqueeze(1).to(device)
    model = Net(g, x.size(1), d.num_labels)
    t = train_runtime(model, x, y, mask, epochs=200, device=device)
    print('{} - {}: {:.2f}s'.format(d.name, Net.__name__, t))
Beispiel #10
0
 path = args.p
 algo = args.algo
 if len(path) > 0:
     #G = mmread(path)
     #nxgraph = nx.Graph(G)
     #graph = dgl.from_networkx(nxgraph)
     edges = readmtxGraph(path)
     graph = dgl.graph(edges)
     #print(graph.edges())
 elif graph == "simple":
     graph = dgl.graph(([0, 0, 1, 1, 2, 3], [1, 2, 2, 4, 3, 4]))
 elif graph == "citeseer":
     data = load_citeseer(".")
     graph = data[0]
 elif graph == "pubmed":
     data = load_pubmed(".")
     graph = data[0]
 else:
     data = load_cora(".")
     graph = data[0]
 N = len(graph.nodes())
 print("#Nodes:", N, "#Edges:", len(graph.edges()[0]))
 embed = torch.rand(N, dim)
 #print(embed)
 #need to check batch processing ...
 print("Creating batch graphs...")
 if bsize == 256:
     bgraphs = batch_process(graph, 1024, 50)
 elif bsize == 1:
     bgraphs = [[graph, 0, N]]
 else:
Beispiel #11
0
    def __init__(self, name, seed, self_loop=False, split=None):
        super(SmallGraphDataset, self).__init__()
        if name == 'cora':
            data = citegrh.load_cora()
            graph = data.graph
            if self_loop:
                graph = self.add_selfloop(graph)
            graph = dgl.DGLGraph(graph)
            features = data.features
            labels = data.labels

        elif name == 'citeseer':
            data = citegrh.load_citeseer()
            graph = data.graph
            if self_loop:
                graph = self.add_selfloop(graph)
            graph = dgl.DGLGraph(graph)
            features = data.features
            labels = data.labels

        elif name == 'pubmed':
            data = citegrh.load_pubmed()
            graph = data.graph
            if self_loop:
                graph = self.add_selfloop(graph)
            graph = dgl.DGLGraph(graph)
            features = data.features
            labels = data.labels

        elif name == 'amazon':
            assert(split!=None)
            data = AmazonCoBuy(name='computers')
            graph = data.data[0]
            if self_loop:
                graph.remove_edges(graph.edge_ids(graph.nodes(), graph.nodes()))
                graph.add_edges(graph.nodes(), graph.nodes())
            # must create split
            features = graph.ndata['feat']
            labels = graph.ndata['label']
        elif name =='karate':
            kG = nx.karate_club_graph()
            labels = np.array(
            [kG.nodes[i]['club'] != 'Mr. Hi' for i in kG.nodes]).astype(np.int64)
            graph = dgl.DGLGraph(kG)
            if self_loop:
                graph.remove_edges(graph.edge_ids(graph.nodes(), graph.nodes()))
                graph.add_edges(graph.nodes(), graph.nodes())
            features = torch.eye(n=graph.number_of_nodes())
            # graph.ndata['feat'] = features

            # Mr.Hi's club:1, John A's club:0
            self.train_mask = torch.zeros(graph.number_of_nodes(), dtype=torch.bool)
            self.train_mask[0] = True #Mr.Hi
            self.train_mask[33] = True # John A
            self.test_mask = ~self.train_mask



        graph = self.compute_norm(graph)

        self.graph = graph
        self.features = torch.FloatTensor(features)
        self.n_features = self.features.size(1)
        self.labels = torch.LongTensor(labels)
        self.n_label = torch.unique(self.labels).size(0)
        self.n_nodes = graph.number_of_nodes()
        if hasattr(self, 'train_mask'):
            return

        if split:
            print('using {} for training data.'.format(split))
            assert(split > 0.0)
            assert(split < 1.0)
            sample_size = ceil(self.n_nodes*split)
            train_np = np.zeros(self.n_nodes, dtype=np.bool)
            test_np = np.zeros(self.n_nodes, dtype=np.bool)
            test_np[range(500,1500)] = 1

            if seed ==0:
                # use first few data points as seed 
                train_idx = range(sample_size)
                train_np[train_idx] = 1
            else:
                random.seed(seed)
                train_idx = random.sample(range(self.n_nodes-1000), sample_size)
                mapped_train_idx = [idx if idx<500 else idx+1000 for idx in train_idx]
                train_np[mapped_train_idx] =1 
            

            self.train_mask = torch.tensor(train_np, dtype=torch.bool)
            self.test_mask = torch.tensor(test_np, dtype=torch.bool)
        else: # use original split
            self.train_mask = torch.BoolTensor(data.train_mask)
            self.test_mask = torch.BoolTensor(data.test_mask)
Beispiel #12
0
def load_ppi_data():
    data = citegrh.load_pubmed()
    labels = th.LongTensor(data.labels)
    features = th.FloatTensor(data.features)
    g = DGLGraph(data.graph)
    return g,labels,features