def load_cora_data(): data = citegrh.load_cora() features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) mask = torch.BoolTensor(data.train_mask) g = data[0] return g, features, labels, mask
def load_citation_graph(graph_name): """ Loads one of the DGL-hosted citation graph datasets :param graph_name: name of the citation graph to load; one of ['cora', 'citeseer', 'pubmed'] :return: namedtuple for the citation graph dataset; attributes: [graph, features, labels, mask] """ # retrieve the dataset if graph_name == 'cora': dataset = citation_graph.load_cora() elif graph_name == 'citeseer': dataset = citation_graph.load_citeseer() elif graph_name == 'pubmed': dataset = citation_graph.load_pubmed() else: raise ValueError( "Unknown citation graph name <{:s}>; " "Expected one of [cora, citeseer, pubmed]".format(graph_name)) #endif # return the datasets' components dataset_tuple = namedtuple("citation_graph", ["graph", "features", "labels", "mask"]) return dataset_tuple(DGLGraph(dataset.graph), torch.FloatTensor(dataset.features), torch.LongTensor(dataset.labels), torch.BoolTensor(dataset.train_mask))
def load_cora_data(): data = citegrh.load_cora() features = th.FloatTensor(data.features) labels = th.LongTensor(data.labels) mask = th.ByteTensor(data.train_mask) g = DGLGraph(data.graph) return g, features, labels, mask
def load_cora_data(): data = citegrh.load_cora() features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) train_mask = torch.BoolTensor(data.train_mask) test_mask = torch.BoolTensor(data.test_mask) g = DGLGraph(data.graph) return g, features, labels, train_mask, test_mask
def load_cora_data(): data = citegrh.load_cora() features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) mask = torch.BoolTensor(data.train_mask) g = DGLGraph(data.graph) n_classes = data.num_classes return n_classes, g, features, labels, mask
def load_data(dataset): if dataset == 'cora': data = citegrh.load_cora() features = th.FloatTensor(data.features) labels = th.LongTensor(data.labels) num_labels = data.num_labels g = DGLGraph(data.graph) elif dataset == 'pubmed': data = citegrh.load_pubmed() features = th.FloatTensor(data.features) labels = th.LongTensor(data.labels) num_labels = data.num_labels g = DGLGraph(data.graph) elif dataset == 'citeseer': data = citegrh.load_citeseer() features = th.FloatTensor(data.features) labels = th.LongTensor(data.labels) num_labels = data.num_labels g = DGLGraph(data.graph) elif dataset == 'amazon-computers': dataset = gnn_benckmark.AmazonCoBuy('computers') g = dataset[0] features = th.FloatTensor(g.ndata['feat'].float()) labels = th.LongTensor(g.ndata['label']) num_labels = int(th.max(labels) + 1) elif dataset == 'amazon-photo': dataset = gnn_benckmark.AmazonCoBuy('photo') g = dataset[0] features = th.FloatTensor(g.ndata['feat'].float()) labels = th.LongTensor(g.ndata['label']) num_labels = int(th.max(labels) + 1) elif dataset == 'coauthor-cs': dataset = gnn_benckmark.Coauthor('cs') g = dataset[0] features = th.FloatTensor(g.ndata['feat'].float()) labels = th.LongTensor(g.ndata['label']) num_labels = int(th.max(labels) + 1) else: dataset = gnn_benckmark.Coauthor('physics') g = dataset[0] features = th.FloatTensor(g.ndata['feat'].float()) labels = th.LongTensor(g.ndata['label']) num_labels = int(th.max(labels) + 1) # 数据集划分点 split1 = int(0.7 * len(labels)) split2 = int(0.9 * len(labels)) train_mask = th.BoolTensor(_sample_mask(range(split1), labels.shape[0])) val_mask = th.BoolTensor( _sample_mask(range(split1, split2), labels.shape[0])) test_mask = th.BoolTensor( _sample_mask(range(split2, labels.shape[0] - 1), labels.shape[0])) print( "Total size: {:}| Feature dims: {:}| Train size: {:}| Val size: {:}| Test size: {:}| Num of labels: {:}" .format(features.size(0), features.size(1), len(labels[train_mask]), len(labels[val_mask]), len(labels[test_mask]), num_labels)) return g, features, labels, num_labels, train_mask, val_mask, test_mask
def load_Coradata(): # load the data from citation data = citation.load_cora() # load all labels labels = torch.LongTensor(data.labels) # load num_classes num_classes = data.num_labels graph = data.graph features = torch.FloatTensor(data.features) return graph, num_classes, labels, features
def load_cora_data(): data = citegraph.load_cora() features = torch.Tensor(data.features).float() labels = torch.Tensor(data.labels).long() mask = torch.Tensor(data.train_mask).byte() g = data.graph g.remove_edges_from(g.selfloog_edges()) g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) return g, features, labels, mask
def load_cora_data(): data = citegrh.load_cora() features = th.FloatTensor(data.features) labels = th.LongTensor(data.labels) train_mask = th.BoolTensor(data.train_mask) test_mask = th.BoolTensor(data.test_mask) g = dgl.DGLGraph(data.graph) g.add_edges(g.nodes(), g.nodes()) return g, features, labels, train_mask, test_mask
def load_cora_data(): data = citegrh.load_cora() features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) mask = torch.BoolTensor(data.train_mask) g = data.graph # add self loop g.remove_edges_from(nx.selfloop_edges(g)) g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) return g, features, labels, mask
def load_cora_data(): """ Load the CORA dataset. """ data = citegrh.load_cora() features = th.FloatTensor(data.features) labels = th.LongTensor(data.labels) train_mask = th.BoolTensor(data.train_mask) test_mask = th.BoolTensor(data.test_mask) g = data.graph g.remove_edges_from(nx.selfloop_edges(g)) # add self loop g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) return g, features, labels, train_mask, test_mask
def load_cora_data(device): data = citation_graph.load_cora() features = torch.FloatTensor(data.features).to(device) labels = torch.LongTensor(data.labels).to(device) train_mask = torch.BoolTensor(data.train_mask).to(device) valid_mask = torch.BoolTensor(data.val_mask).to(device) test_mask = torch.BoolTensor(data.test_mask).to(device) g = data.graph # add self loop g.remove_edges_from(nx.selfloop_edges(g)) g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) return g, features, labels, train_mask, valid_mask, test_mask
def load_data(dataset_name: str): if dataset_name == "cora": data = citegrh.load_cora() if dataset_name == "citeseer": data = citegrh.load_citeseer() if dataset_name == "pubmed": data = citegrh.load_pubmed() features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) train_mask = torch.BoolTensor(data.train_mask) test_mask = torch.BoolTensor(data.test_mask) g = DGLGraph(data.graph) return g, features, labels, train_mask, test_mask
def load_data(dataset_name, self_loops): if dataset_name == 'cora': return citegrh.load_cora() elif dataset_name == 'citeseer': return citegrh.load_citeseer() elif dataset_name == 'pubmed': return citegrh.load_pubmed() elif dataset_name == "PPI": return PPIDataset('test') elif dataset_name is not None and dataset_name.startswith('reddit'): return RedditDataset(self_loop=self_loops) else: raise ValueError('Unknown dataset: {}'.format(dataset_name))
def load_cora_data(): data = citegrh.load_cora() print(data.num_labels) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) mask = torch.ByteTensor(data.train_mask) print(len(features), len(labels)) print(len(mask)) print(data.train_mask) g = data.graph # add self loop g.remove_edges_from(g.selfloop_edges()) g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) print(g.number_of_nodes(), g.number_of_edges()) return g, features, labels, mask
def load_cora_data(): ''' Cora dataset function ''' data = citegrh.load_cora() features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) mask = torch.ByteTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) # graph preprocess and calculate normalization factor g = data.graph g.remove_edges_from(nx.selfloop_edges(g)) g = DGLGraph(g) # return graph, node features, labels, and training mask return g, features, labels, mask, val_mask, test_mask
def load_cora_data(show=False): data = citation_graph.load_cora() features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) train_mask = torch.BoolTensor(data.train_mask) test_mask = torch.BoolTensor(data.test_mask) g = data.graph g.remove_edges_from(nx.selfloop_edges(g)) g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) if show is True: figs, ax = plt.subplots() nx.draw(g.to_networkx(), ax=ax) ax.set_title('Cora citation graph') plt.show() return g, features, labels, train_mask, test_mask
def load_cora_data(): data = citegrh.load_cora() features = th.FloatTensor(data.features) labels = th.LongTensor(data.labels) mask = th.ByteTensor(data.train_mask) g = DGLGraph(data.graph) print("g: ") print(g.all_edges()) print(g.edata) print("features: ") print(features.size()) print(features) print("labels: ") print(labels.size()) print(labels) print("mask: ") print(mask.size()) print(mask) return g, features, labels, mask
def load_data(dataset="cora"): assert dataset in ["cora", "pubmed", "citeseer", "synthetic"] if dataset == "cora": data = citegrh.load_cora() elif dataset == "pubmed": data = citegrh.load_pubmed() elif dataset == "citeseer": data = citegrh.load_citeseer() else: data = synthetic_data() data.features = th.FloatTensor(data.features) data.labels = th.LongTensor(data.labels) data.size = data.labels.shape[0] g = data.graph g.remove_edges_from(nx.selfloop_edges(g)) g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) data.g = g data.adj = g.adjacency_matrix(transpose=None).to_dense() data.Prob = normalize(th.FloatTensor(data.adj), p=1, dim=1) print("============Successfully Load %s===============" % dataset) return data
def load_data(): # load the data from package data = citation.load_cora() # load the labels of every node labels = data.labels # load the num_classes num_classes = data.num_labels # load the graph and build the model model = construct_graph(data.graph) # compute the adjency_matrix model.adjency_matrix() # build the instance of the MeanAggregator data_features = data.features features = nn.Embedding(data_features.shape[0], data_features.shape[1]) features.weight = nn.Parameter(torch.tensor(data_features, dtype=torch.float32), requires_grad=False) nodes = model.nodes # find the neighbors of corresponding nodes aggregator.py adj_list = model.index_adjmatrix() return features, adj_list, labels
def load_custom_dataset(dataset_name, with_attributes, with_labels, directed, separator): """ loads the dataset into memory :param dataset_name: The name of the dataset (As named in the folder data) :param with_attributes: if it has attributes :param with_labels: if the dataset has labels (ground truth) :param directed: if the graph is directed :param separator: the separator character in the files (" " or "," or "\t") """ global data_path global graph_path global topo_features_path global topo_features_labels_path global embedding_path global graph global node_labels global number_classes global input global input_size global is_directed # data folder path data_path = "data\\" + dataset_name + "\\" # graph folder path graph_path = data_path + "graph\\" # features folder path topo_features_path = data_path + "top_features\\" # features classes folder path topo_features_labels_path = data_path + "top_features_labels\\" # pretreatment folder path embedding_path = data_path + "embedding\\" # scores folder path scores_path = data_path + "scores\\" # The graph is directed is_directed = directed # Load graphs if dataset_name == "cora": data = cg.load_cora() graph = data.graph graph = nx.Graph(graph) node_labels = data.labels input = torch.tensor(data.features).float() elif dataset_name == "citeseer": data = cg.load_citeseer() graph = data.graph graph = nx.Graph(graph) node_labels = data.labels input = torch.tensor(data.features).float() else: graph = load_graph(graph_path + "edges.txt", 0, separator, print_details=True, directed=directed) if with_labels: node_labels = load_groundtruth(graph_path + "groundtruth.txt", 0, separator) else: node_labels = [] if with_attributes: input = load_attributes(graph_path + "attributes.txt", 0, separator) else: input = torch.eye(len(graph.nodes)) # input layer size input_size = len(input[0]) # number of classes for the node labels number_classes = len(set(node_labels)) # create directories if they do not exist # folder that holds the embeddings Path(embedding_path).mkdir(parents=True, exist_ok=True) # folder that holds the topological features Path(topo_features_path).mkdir(parents=True, exist_ok=True) # folder that holds the classes of the topological features Path(topo_features_labels_path).mkdir(parents=True, exist_ok=True) # folder that holds the scores of the experiments Path(scores_path).mkdir(parents=True, exist_ok=True) print("graph details:", dataset_name) print("------------------") print("nodes", len(graph.nodes)) print("edges", len(graph.edges)) print("classes", len(set(node_labels))) print("------------------")
from dgl import DGLGraph from model.inference_time import inference from model.gcn import GCN from model.gcn_GRU import GCN_GRU from model.gcn_gated import GCN_GATED from model.graphsage import GraphSAGE from dgl.nn.pytorch.conv import SGConv from model.tagcn import TAGCN from model.rgcn_ import RGCN_Class root = './result/feature/' device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') Cora = citation_graph.load_cora() #CiteSeer = citation_graph.load_citeseer() #PubMed = citation_graph.load_pubmed() #Nell = citation_graph.load_nell_0_001() #Coauthor_cs = Coauthor('cs') #Coauthor_physics = Coauthor('cs') #Amazon_computer = AmazonCoBuy('computers') #Amazon_photo = AmazonCoBuy('photo') #CoraFull = CoraFull() #Reddit = RedditDataset(self_loop=True) #Enwiki = citation_graph.load_RMAT('enwiki',100,10) #Amazon = citation_graph.load_RMAT('amazon',100,10) #M3 = citation_graph.load_RMAT('3M',100,10) #3M_276M = citation_graph.load_RMAT('3M_276M',100,10) #_21M = citation_graph.load_RMAT('21',100,10)
x = tf.nn.log_softmax(self.layer2(g, x)) return x from dgl.data import citation_graph as citegrh import networkx as nx #data = citegrh.load_cora() #with tf.device('/GPU:0'): with tf.device('/device:CPU:0'): #model = Net() #features = tf.convert_to_tensor(torch.FloatTensor(data.features).numpy(), numpy.float32) #feaures = torch.FloatTensor(data.features) #with torch.no_grad(): with tf.profiler.experimental.Profile('logdir'): #tf.profiler.experimental.start('logdir'): data = citegrh.load_cora() model = Net() features = tf.convert_to_tensor(data.features, numpy.float32) g = DGLGraph(data.graph) out = model(g, features) pass #tf.profiler.experimental.stop()
def __init__(self, name, seed, self_loop=False, split=None): super(SmallGraphDataset, self).__init__() if name == 'cora': data = citegrh.load_cora() graph = data.graph if self_loop: graph = self.add_selfloop(graph) graph = dgl.DGLGraph(graph) features = data.features labels = data.labels elif name == 'citeseer': data = citegrh.load_citeseer() graph = data.graph if self_loop: graph = self.add_selfloop(graph) graph = dgl.DGLGraph(graph) features = data.features labels = data.labels elif name == 'pubmed': data = citegrh.load_pubmed() graph = data.graph if self_loop: graph = self.add_selfloop(graph) graph = dgl.DGLGraph(graph) features = data.features labels = data.labels elif name == 'amazon': assert(split!=None) data = AmazonCoBuy(name='computers') graph = data.data[0] if self_loop: graph.remove_edges(graph.edge_ids(graph.nodes(), graph.nodes())) graph.add_edges(graph.nodes(), graph.nodes()) # must create split features = graph.ndata['feat'] labels = graph.ndata['label'] elif name =='karate': kG = nx.karate_club_graph() labels = np.array( [kG.nodes[i]['club'] != 'Mr. Hi' for i in kG.nodes]).astype(np.int64) graph = dgl.DGLGraph(kG) if self_loop: graph.remove_edges(graph.edge_ids(graph.nodes(), graph.nodes())) graph.add_edges(graph.nodes(), graph.nodes()) features = torch.eye(n=graph.number_of_nodes()) # graph.ndata['feat'] = features # Mr.Hi's club:1, John A's club:0 self.train_mask = torch.zeros(graph.number_of_nodes(), dtype=torch.bool) self.train_mask[0] = True #Mr.Hi self.train_mask[33] = True # John A self.test_mask = ~self.train_mask graph = self.compute_norm(graph) self.graph = graph self.features = torch.FloatTensor(features) self.n_features = self.features.size(1) self.labels = torch.LongTensor(labels) self.n_label = torch.unique(self.labels).size(0) self.n_nodes = graph.number_of_nodes() if hasattr(self, 'train_mask'): return if split: print('using {} for training data.'.format(split)) assert(split > 0.0) assert(split < 1.0) sample_size = ceil(self.n_nodes*split) train_np = np.zeros(self.n_nodes, dtype=np.bool) test_np = np.zeros(self.n_nodes, dtype=np.bool) test_np[range(500,1500)] = 1 if seed ==0: # use first few data points as seed train_idx = range(sample_size) train_np[train_idx] = 1 else: random.seed(seed) train_idx = random.sample(range(self.n_nodes-1000), sample_size) mapped_train_idx = [idx if idx<500 else idx+1000 for idx in train_idx] train_np[mapped_train_idx] =1 self.train_mask = torch.tensor(train_np, dtype=torch.bool) self.test_mask = torch.tensor(test_np, dtype=torch.bool) else: # use original split self.train_mask = torch.BoolTensor(data.train_mask) self.test_mask = torch.BoolTensor(data.test_mask)
#G = mmread(path) #nxgraph = nx.Graph(G) #graph = dgl.from_networkx(nxgraph) edges = readmtxGraph(path) graph = dgl.graph(edges) #print(graph.edges()) elif graph == "simple": graph = dgl.graph(([0, 0, 1, 1, 2, 3], [1, 2, 2, 4, 3, 4])) elif graph == "citeseer": data = load_citeseer(".") graph = data[0] elif graph == "pubmed": data = load_pubmed(".") graph = data[0] else: data = load_cora(".") graph = data[0] N = len(graph.nodes()) print("#Nodes:", N, "#Edges:", len(graph.edges()[0])) embed = torch.rand(N, dim) #print(embed) #need to check batch processing ... print("Creating batch graphs...") if bsize == 256: bgraphs = batch_process(graph, 1024, 50) elif bsize == 1: bgraphs = [[graph, 0, N]] else: bgraphs = batch_process(graph, bsize) print("Done!") #cacheflush()
def load_cora_data(): data = citegrh.load_cora() features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) g = dgl.from_networkx(data.graph) return g, features, labels