def load_citation_graph(graph_name): """ Loads one of the DGL-hosted citation graph datasets :param graph_name: name of the citation graph to load; one of ['cora', 'citeseer', 'pubmed'] :return: namedtuple for the citation graph dataset; attributes: [graph, features, labels, mask] """ # retrieve the dataset if graph_name == 'cora': dataset = citation_graph.load_cora() elif graph_name == 'citeseer': dataset = citation_graph.load_citeseer() elif graph_name == 'pubmed': dataset = citation_graph.load_pubmed() else: raise ValueError( "Unknown citation graph name <{:s}>; " "Expected one of [cora, citeseer, pubmed]".format(graph_name)) #endif # return the datasets' components dataset_tuple = namedtuple("citation_graph", ["graph", "features", "labels", "mask"]) return dataset_tuple(DGLGraph(dataset.graph), torch.FloatTensor(dataset.features), torch.LongTensor(dataset.labels), torch.BoolTensor(dataset.train_mask))
def load_citseer_data(): data = citegrh.load_citeseer() features = th.FloatTensor(data.features) labels = th.LongTensor(data.labels) train_mask = th.BoolTensor(data.train_mask) test_mask = th.BoolTensor(data.test_mask) g = DGLGraph(data.graph) return g, features, labels, train_mask, test_mask
def load_data(dataset): if dataset == 'cora': data = citegrh.load_cora() features = th.FloatTensor(data.features) labels = th.LongTensor(data.labels) num_labels = data.num_labels g = DGLGraph(data.graph) elif dataset == 'pubmed': data = citegrh.load_pubmed() features = th.FloatTensor(data.features) labels = th.LongTensor(data.labels) num_labels = data.num_labels g = DGLGraph(data.graph) elif dataset == 'citeseer': data = citegrh.load_citeseer() features = th.FloatTensor(data.features) labels = th.LongTensor(data.labels) num_labels = data.num_labels g = DGLGraph(data.graph) elif dataset == 'amazon-computers': dataset = gnn_benckmark.AmazonCoBuy('computers') g = dataset[0] features = th.FloatTensor(g.ndata['feat'].float()) labels = th.LongTensor(g.ndata['label']) num_labels = int(th.max(labels) + 1) elif dataset == 'amazon-photo': dataset = gnn_benckmark.AmazonCoBuy('photo') g = dataset[0] features = th.FloatTensor(g.ndata['feat'].float()) labels = th.LongTensor(g.ndata['label']) num_labels = int(th.max(labels) + 1) elif dataset == 'coauthor-cs': dataset = gnn_benckmark.Coauthor('cs') g = dataset[0] features = th.FloatTensor(g.ndata['feat'].float()) labels = th.LongTensor(g.ndata['label']) num_labels = int(th.max(labels) + 1) else: dataset = gnn_benckmark.Coauthor('physics') g = dataset[0] features = th.FloatTensor(g.ndata['feat'].float()) labels = th.LongTensor(g.ndata['label']) num_labels = int(th.max(labels) + 1) # 数据集划分点 split1 = int(0.7 * len(labels)) split2 = int(0.9 * len(labels)) train_mask = th.BoolTensor(_sample_mask(range(split1), labels.shape[0])) val_mask = th.BoolTensor( _sample_mask(range(split1, split2), labels.shape[0])) test_mask = th.BoolTensor( _sample_mask(range(split2, labels.shape[0] - 1), labels.shape[0])) print( "Total size: {:}| Feature dims: {:}| Train size: {:}| Val size: {:}| Test size: {:}| Num of labels: {:}" .format(features.size(0), features.size(1), len(labels[train_mask]), len(labels[val_mask]), len(labels[test_mask]), num_labels)) return g, features, labels, num_labels, train_mask, val_mask, test_mask
def load_citeseer_data(): data = citegrh.load_citeseer() features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) mask = torch.ByteTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) g = data.graph # add self loop, A^hat = A + I in the paper g.remove_edges_from(nx.selfloop_edges(g)) g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) # return graph, node features, labels, and training mask return g, features, labels, mask, val_mask, test_mask
def load_data(dataset_name: str): if dataset_name == "cora": data = citegrh.load_cora() if dataset_name == "citeseer": data = citegrh.load_citeseer() if dataset_name == "pubmed": data = citegrh.load_pubmed() features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) train_mask = torch.BoolTensor(data.train_mask) test_mask = torch.BoolTensor(data.test_mask) g = DGLGraph(data.graph) return g, features, labels, train_mask, test_mask
def load_data(dataset_name, self_loops): if dataset_name == 'cora': return citegrh.load_cora() elif dataset_name == 'citeseer': return citegrh.load_citeseer() elif dataset_name == 'pubmed': return citegrh.load_pubmed() elif dataset_name == "PPI": return PPIDataset('test') elif dataset_name is not None and dataset_name.startswith('reddit'): return RedditDataset(self_loop=self_loops) else: raise ValueError('Unknown dataset: {}'.format(dataset_name))
def load_data(dataset="cora"): assert dataset in ["cora", "pubmed", "citeseer", "synthetic"] if dataset == "cora": data = citegrh.load_cora() elif dataset == "pubmed": data = citegrh.load_pubmed() elif dataset == "citeseer": data = citegrh.load_citeseer() else: data = synthetic_data() data.features = th.FloatTensor(data.features) data.labels = th.LongTensor(data.labels) data.size = data.labels.shape[0] g = data.graph g.remove_edges_from(nx.selfloop_edges(g)) g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) data.g = g data.adj = g.adjacency_matrix(transpose=None).to_dense() data.Prob = normalize(th.FloatTensor(data.adj), p=1, dim=1) print("============Successfully Load %s===============" % dataset) return data
from itertools import product import torch from runtime.dgl.gcn import GCN from runtime.dgl.gat import GAT from runtime.dgl.train import train_runtime from dgl.data import citation_graph from dgl import DGLGraph device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') Cora = citation_graph.load_cora() CiteSeer = citation_graph.load_citeseer() PubMed = citation_graph.load_pubmed() for d, Net in product([Cora, CiteSeer, PubMed], [GCN, GAT]): g = DGLGraph(d.graph) x = torch.tensor(d.features, dtype=torch.float, device=device) y = torch.tensor(d.labels, dtype=torch.long, device=device) mask = torch.tensor(d.train_mask, dtype=torch.uint8, device=device) g.add_edges(g.nodes(), g.nodes()) norm = torch.pow(g.in_degrees().float(), -0.5) norm[torch.isinf(norm)] = 0 g.ndata['norm'] = norm.unsqueeze(1).to(device) model = Net(g, x.size(1), d.num_labels) t = train_runtime(model, x, y, mask, epochs=200, device=device) print('{} - {}: {:.2f}s'.format(d.name, Net.__name__, t))
import networkx as nx import matplotlib.pyplot as plt import torch import torch as th import torch.nn as nn import torch.nn.functional as F import sys import dgl from dgl.data import citation_graph as citegrh #data = citegrh.load_cora() data = citegrh.load_citeseer() G = dgl.DGLGraph(data.graph) labels = th.tensor(data.labels) def visualize(labels, g): pos = nx.random_layout(g) #, seed=1) plt.figure(figsize=(8, 8)) plt.axis('off') nx.draw_networkx(g, pos=pos, node_size=50, cmap=plt.get_cmap('coolwarm'), node_color=labels, edge_color='k', arrows=False, width=0.5, style='dotted', with_labels=False) plt.savefig('citeseer4.png')
lrate = args.r bsize = args.b it = args.it path = args.p algo = args.algo if len(path) > 0: #G = mmread(path) #nxgraph = nx.Graph(G) #graph = dgl.from_networkx(nxgraph) edges = readmtxGraph(path) graph = dgl.graph(edges) #print(graph.edges()) elif graph == "simple": graph = dgl.graph(([0, 0, 1, 1, 2, 3], [1, 2, 2, 4, 3, 4])) elif graph == "citeseer": data = load_citeseer(".") graph = data[0] elif graph == "pubmed": data = load_pubmed(".") graph = data[0] else: data = load_cora(".") graph = data[0] N = len(graph.nodes()) print("#Nodes:", N, "#Edges:", len(graph.edges()[0])) embed = torch.rand(N, dim) #print(embed) #need to check batch processing ... print("Creating batch graphs...") if bsize == 256: bgraphs = batch_process(graph, 1024, 50)
# from dgl/data/citation_graph.py if __name__ == '__main__': from dgl.data.citation_graph import load_citeseer data = load_citeseer() print(data)
def __init__(self, name, seed, self_loop=False, split=None): super(SmallGraphDataset, self).__init__() if name == 'cora': data = citegrh.load_cora() graph = data.graph if self_loop: graph = self.add_selfloop(graph) graph = dgl.DGLGraph(graph) features = data.features labels = data.labels elif name == 'citeseer': data = citegrh.load_citeseer() graph = data.graph if self_loop: graph = self.add_selfloop(graph) graph = dgl.DGLGraph(graph) features = data.features labels = data.labels elif name == 'pubmed': data = citegrh.load_pubmed() graph = data.graph if self_loop: graph = self.add_selfloop(graph) graph = dgl.DGLGraph(graph) features = data.features labels = data.labels elif name == 'amazon': assert(split!=None) data = AmazonCoBuy(name='computers') graph = data.data[0] if self_loop: graph.remove_edges(graph.edge_ids(graph.nodes(), graph.nodes())) graph.add_edges(graph.nodes(), graph.nodes()) # must create split features = graph.ndata['feat'] labels = graph.ndata['label'] elif name =='karate': kG = nx.karate_club_graph() labels = np.array( [kG.nodes[i]['club'] != 'Mr. Hi' for i in kG.nodes]).astype(np.int64) graph = dgl.DGLGraph(kG) if self_loop: graph.remove_edges(graph.edge_ids(graph.nodes(), graph.nodes())) graph.add_edges(graph.nodes(), graph.nodes()) features = torch.eye(n=graph.number_of_nodes()) # graph.ndata['feat'] = features # Mr.Hi's club:1, John A's club:0 self.train_mask = torch.zeros(graph.number_of_nodes(), dtype=torch.bool) self.train_mask[0] = True #Mr.Hi self.train_mask[33] = True # John A self.test_mask = ~self.train_mask graph = self.compute_norm(graph) self.graph = graph self.features = torch.FloatTensor(features) self.n_features = self.features.size(1) self.labels = torch.LongTensor(labels) self.n_label = torch.unique(self.labels).size(0) self.n_nodes = graph.number_of_nodes() if hasattr(self, 'train_mask'): return if split: print('using {} for training data.'.format(split)) assert(split > 0.0) assert(split < 1.0) sample_size = ceil(self.n_nodes*split) train_np = np.zeros(self.n_nodes, dtype=np.bool) test_np = np.zeros(self.n_nodes, dtype=np.bool) test_np[range(500,1500)] = 1 if seed ==0: # use first few data points as seed train_idx = range(sample_size) train_np[train_idx] = 1 else: random.seed(seed) train_idx = random.sample(range(self.n_nodes-1000), sample_size) mapped_train_idx = [idx if idx<500 else idx+1000 for idx in train_idx] train_np[mapped_train_idx] =1 self.train_mask = torch.tensor(train_np, dtype=torch.bool) self.test_mask = torch.tensor(test_np, dtype=torch.bool) else: # use original split self.train_mask = torch.BoolTensor(data.train_mask) self.test_mask = torch.BoolTensor(data.test_mask)
def load_custom_dataset(dataset_name, with_attributes, with_labels, directed, separator): """ loads the dataset into memory :param dataset_name: The name of the dataset (As named in the folder data) :param with_attributes: if it has attributes :param with_labels: if the dataset has labels (ground truth) :param directed: if the graph is directed :param separator: the separator character in the files (" " or "," or "\t") """ global data_path global graph_path global topo_features_path global topo_features_labels_path global embedding_path global graph global node_labels global number_classes global input global input_size global is_directed # data folder path data_path = "data\\" + dataset_name + "\\" # graph folder path graph_path = data_path + "graph\\" # features folder path topo_features_path = data_path + "top_features\\" # features classes folder path topo_features_labels_path = data_path + "top_features_labels\\" # pretreatment folder path embedding_path = data_path + "embedding\\" # scores folder path scores_path = data_path + "scores\\" # The graph is directed is_directed = directed # Load graphs if dataset_name == "cora": data = cg.load_cora() graph = data.graph graph = nx.Graph(graph) node_labels = data.labels input = torch.tensor(data.features).float() elif dataset_name == "citeseer": data = cg.load_citeseer() graph = data.graph graph = nx.Graph(graph) node_labels = data.labels input = torch.tensor(data.features).float() else: graph = load_graph(graph_path + "edges.txt", 0, separator, print_details=True, directed=directed) if with_labels: node_labels = load_groundtruth(graph_path + "groundtruth.txt", 0, separator) else: node_labels = [] if with_attributes: input = load_attributes(graph_path + "attributes.txt", 0, separator) else: input = torch.eye(len(graph.nodes)) # input layer size input_size = len(input[0]) # number of classes for the node labels number_classes = len(set(node_labels)) # create directories if they do not exist # folder that holds the embeddings Path(embedding_path).mkdir(parents=True, exist_ok=True) # folder that holds the topological features Path(topo_features_path).mkdir(parents=True, exist_ok=True) # folder that holds the classes of the topological features Path(topo_features_labels_path).mkdir(parents=True, exist_ok=True) # folder that holds the scores of the experiments Path(scores_path).mkdir(parents=True, exist_ok=True) print("graph details:", dataset_name) print("------------------") print("nodes", len(graph.nodes)) print("edges", len(graph.edges)) print("classes", len(set(node_labels))) print("------------------")