Esempio n. 1
0
def load_cora_data():
    data = citegrh.load_cora()
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    mask = torch.BoolTensor(data.train_mask)
    g = data[0]
    return g, features, labels, mask
def load_citation_graph(graph_name):
    """
    Loads one of the DGL-hosted citation graph datasets

    :param graph_name: name of the citation graph to load; one of
        ['cora', 'citeseer', 'pubmed']
    :return: namedtuple for the citation graph dataset; attributes:
        [graph, features, labels, mask]
    """
    # retrieve the dataset
    if graph_name == 'cora':
        dataset = citation_graph.load_cora()
    elif graph_name == 'citeseer':
        dataset = citation_graph.load_citeseer()
    elif graph_name == 'pubmed':
        dataset = citation_graph.load_pubmed()
    else:
        raise ValueError(
            "Unknown citation graph name <{:s}>; "
            "Expected one of [cora, citeseer, pubmed]".format(graph_name))
    #endif

    # return the datasets' components
    dataset_tuple = namedtuple("citation_graph",
                               ["graph", "features", "labels", "mask"])
    return dataset_tuple(DGLGraph(dataset.graph),
                         torch.FloatTensor(dataset.features),
                         torch.LongTensor(dataset.labels),
                         torch.BoolTensor(dataset.train_mask))
Esempio n. 3
0
def load_cora_data():
    data = citegrh.load_cora()
    features = th.FloatTensor(data.features)
    labels = th.LongTensor(data.labels)
    mask = th.ByteTensor(data.train_mask)
    g = DGLGraph(data.graph)
    return g, features, labels, mask
Esempio n. 4
0
 def load_cora_data():
     data = citegrh.load_cora()
     features = torch.FloatTensor(data.features)
     labels = torch.LongTensor(data.labels)
     train_mask = torch.BoolTensor(data.train_mask)
     test_mask = torch.BoolTensor(data.test_mask)
     g = DGLGraph(data.graph)
     return g, features, labels, train_mask, test_mask
Esempio n. 5
0
def load_cora_data():
    data = citegrh.load_cora()
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    mask = torch.BoolTensor(data.train_mask)
    g = DGLGraph(data.graph)
    n_classes = data.num_classes
    return n_classes, g, features, labels, mask
Esempio n. 6
0
def load_data(dataset):
    if dataset == 'cora':
        data = citegrh.load_cora()
        features = th.FloatTensor(data.features)
        labels = th.LongTensor(data.labels)
        num_labels = data.num_labels
        g = DGLGraph(data.graph)
    elif dataset == 'pubmed':
        data = citegrh.load_pubmed()
        features = th.FloatTensor(data.features)
        labels = th.LongTensor(data.labels)
        num_labels = data.num_labels
        g = DGLGraph(data.graph)
    elif dataset == 'citeseer':
        data = citegrh.load_citeseer()
        features = th.FloatTensor(data.features)
        labels = th.LongTensor(data.labels)
        num_labels = data.num_labels
        g = DGLGraph(data.graph)
    elif dataset == 'amazon-computers':
        dataset = gnn_benckmark.AmazonCoBuy('computers')
        g = dataset[0]
        features = th.FloatTensor(g.ndata['feat'].float())
        labels = th.LongTensor(g.ndata['label'])
        num_labels = int(th.max(labels) + 1)
    elif dataset == 'amazon-photo':
        dataset = gnn_benckmark.AmazonCoBuy('photo')
        g = dataset[0]
        features = th.FloatTensor(g.ndata['feat'].float())
        labels = th.LongTensor(g.ndata['label'])
        num_labels = int(th.max(labels) + 1)
    elif dataset == 'coauthor-cs':
        dataset = gnn_benckmark.Coauthor('cs')
        g = dataset[0]
        features = th.FloatTensor(g.ndata['feat'].float())
        labels = th.LongTensor(g.ndata['label'])
        num_labels = int(th.max(labels) + 1)
    else:
        dataset = gnn_benckmark.Coauthor('physics')
        g = dataset[0]
        features = th.FloatTensor(g.ndata['feat'].float())
        labels = th.LongTensor(g.ndata['label'])
        num_labels = int(th.max(labels) + 1)
    # 数据集划分点
    split1 = int(0.7 * len(labels))
    split2 = int(0.9 * len(labels))
    train_mask = th.BoolTensor(_sample_mask(range(split1), labels.shape[0]))
    val_mask = th.BoolTensor(
        _sample_mask(range(split1, split2), labels.shape[0]))
    test_mask = th.BoolTensor(
        _sample_mask(range(split2, labels.shape[0] - 1), labels.shape[0]))
    print(
        "Total size: {:}| Feature dims: {:}| Train size: {:}| Val size: {:}| Test size: {:}| Num of labels: {:}"
        .format(features.size(0), features.size(1), len(labels[train_mask]),
                len(labels[val_mask]), len(labels[test_mask]), num_labels))
    return g, features, labels, num_labels, train_mask, val_mask, test_mask
Esempio n. 7
0
def load_Coradata():
	# load the data from citation
	data = citation.load_cora()
	# load all labels
	labels = torch.LongTensor(data.labels)
	# load num_classes
	num_classes = data.num_labels
	graph = data.graph
	features = torch.FloatTensor(data.features)
	return graph, num_classes, labels, features
Esempio n. 8
0
def load_cora_data():
    data = citegraph.load_cora()
    features = torch.Tensor(data.features).float()
    labels = torch.Tensor(data.labels).long()
    mask = torch.Tensor(data.train_mask).byte()
    g = data.graph
    g.remove_edges_from(g.selfloog_edges())
    g = DGLGraph(g)
    g.add_edges(g.nodes(), g.nodes())
    return g, features, labels, mask
Esempio n. 9
0
def load_cora_data():
    data = citegrh.load_cora()
    features = th.FloatTensor(data.features)
    labels = th.LongTensor(data.labels)
    train_mask = th.BoolTensor(data.train_mask)
    test_mask = th.BoolTensor(data.test_mask)
    g = dgl.DGLGraph(data.graph)
    g.add_edges(g.nodes(), g.nodes())

    return g, features, labels, train_mask, test_mask
Esempio n. 10
0
def load_cora_data():
    data = citegrh.load_cora()
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    mask = torch.BoolTensor(data.train_mask)
    g = data.graph
    # add self loop
    g.remove_edges_from(nx.selfloop_edges(g))
    g = DGLGraph(g)
    g.add_edges(g.nodes(), g.nodes())
    return g, features, labels, mask
Esempio n. 11
0
def load_cora_data():
    """
    Load the CORA dataset.
    """
    data = citegrh.load_cora()
    features = th.FloatTensor(data.features)
    labels = th.LongTensor(data.labels)
    train_mask = th.BoolTensor(data.train_mask)
    test_mask = th.BoolTensor(data.test_mask)
    g = data.graph
    g.remove_edges_from(nx.selfloop_edges(g))  # add self loop
    g = DGLGraph(g)
    g.add_edges(g.nodes(), g.nodes())
    return g, features, labels, train_mask, test_mask
Esempio n. 12
0
def load_cora_data(device):
    data = citation_graph.load_cora()

    features = torch.FloatTensor(data.features).to(device)
    labels = torch.LongTensor(data.labels).to(device)
    train_mask = torch.BoolTensor(data.train_mask).to(device)
    valid_mask = torch.BoolTensor(data.val_mask).to(device)
    test_mask = torch.BoolTensor(data.test_mask).to(device)
    g = data.graph
    # add self loop
    g.remove_edges_from(nx.selfloop_edges(g))
    g = DGLGraph(g)
    g.add_edges(g.nodes(), g.nodes())
    return g, features, labels, train_mask, valid_mask, test_mask
def load_data(dataset_name: str):
    if dataset_name == "cora":
        data = citegrh.load_cora()
    if dataset_name == "citeseer":
        data = citegrh.load_citeseer()
    if dataset_name == "pubmed":
        data = citegrh.load_pubmed()

    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    train_mask = torch.BoolTensor(data.train_mask)
    test_mask = torch.BoolTensor(data.test_mask)
    g = DGLGraph(data.graph)
    return g, features, labels, train_mask, test_mask
Esempio n. 14
0
def load_data(dataset_name, self_loops):
    if dataset_name == 'cora':
        return citegrh.load_cora()
    elif dataset_name == 'citeseer':
        return citegrh.load_citeseer()
    elif dataset_name == 'pubmed':
        return citegrh.load_pubmed()
    elif dataset_name == "PPI":
        return PPIDataset('test')

    elif dataset_name is not None and dataset_name.startswith('reddit'):
        return RedditDataset(self_loop=self_loops)
    else:
        raise ValueError('Unknown dataset: {}'.format(dataset_name))
Esempio n. 15
0
def load_cora_data():
    data = citegrh.load_cora()
    print(data.num_labels)
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    mask = torch.ByteTensor(data.train_mask)
    print(len(features), len(labels))
    print(len(mask))
    print(data.train_mask)
    g = data.graph
    # add self loop
    g.remove_edges_from(g.selfloop_edges())
    g = DGLGraph(g)
    g.add_edges(g.nodes(), g.nodes())
    print(g.number_of_nodes(), g.number_of_edges())
    return g, features, labels, mask
Esempio n. 16
0
def load_cora_data():
    '''
    Cora dataset function
    '''
    data = citegrh.load_cora()
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    mask = torch.ByteTensor(data.train_mask)
    val_mask = torch.BoolTensor(data.val_mask)
    test_mask = torch.BoolTensor(data.test_mask)
    # graph preprocess and calculate normalization factor
    g = data.graph
    g.remove_edges_from(nx.selfloop_edges(g))
    g = DGLGraph(g)
    # return graph, node features, labels, and training mask
    return g, features, labels, mask, val_mask, test_mask
Esempio n. 17
0
File: gcn.py Progetto: vndee/gnn
def load_cora_data(show=False):
    data = citation_graph.load_cora()
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    train_mask = torch.BoolTensor(data.train_mask)
    test_mask = torch.BoolTensor(data.test_mask)

    g = data.graph
    g.remove_edges_from(nx.selfloop_edges(g))
    g = DGLGraph(g)
    g.add_edges(g.nodes(), g.nodes())

    if show is True:
        figs, ax = plt.subplots()
        nx.draw(g.to_networkx(), ax=ax)
        ax.set_title('Cora citation graph')
        plt.show()

    return g, features, labels, train_mask, test_mask
Esempio n. 18
0
def load_cora_data():
    data = citegrh.load_cora()
    features = th.FloatTensor(data.features)
    labels = th.LongTensor(data.labels)
    mask = th.ByteTensor(data.train_mask)
    g = DGLGraph(data.graph)
    print("g: ")
    print(g.all_edges())
    print(g.edata)
    print("features: ")
    print(features.size())
    print(features)
    print("labels: ")
    print(labels.size())
    print(labels)
    print("mask: ")
    print(mask.size())
    print(mask)
    return g, features, labels, mask
Esempio n. 19
0
def load_data(dataset="cora"):
    assert dataset in ["cora", "pubmed", "citeseer", "synthetic"]
    if dataset == "cora":
        data = citegrh.load_cora()
    elif dataset == "pubmed":
        data = citegrh.load_pubmed()
    elif dataset == "citeseer":
        data = citegrh.load_citeseer()
    else:
        data = synthetic_data()
    data.features = th.FloatTensor(data.features)
    data.labels = th.LongTensor(data.labels)
    data.size = data.labels.shape[0]
    g = data.graph
    g.remove_edges_from(nx.selfloop_edges(g))
    g = DGLGraph(g)
    g.add_edges(g.nodes(), g.nodes())
    data.g = g
    data.adj = g.adjacency_matrix(transpose=None).to_dense()
    data.Prob = normalize(th.FloatTensor(data.adj), p=1, dim=1)
    print("============Successfully Load %s===============" % dataset)
    return data
Esempio n. 20
0
def load_data():
    # load the data from package
    data = citation.load_cora()
    # load the labels of every node
    labels = data.labels
    # load the num_classes
    num_classes = data.num_labels
    # load the graph and build the model
    model = construct_graph(data.graph)
    # compute the adjency_matrix
    model.adjency_matrix()
    # build the instance of the MeanAggregator
    data_features = data.features
    features = nn.Embedding(data_features.shape[0], data_features.shape[1])
    features.weight = nn.Parameter(torch.tensor(data_features,
                                                dtype=torch.float32),
                                   requires_grad=False)
    nodes = model.nodes
    # find the neighbors of corresponding nodes aggregator.py
    adj_list = model.index_adjmatrix()

    return features, adj_list, labels
Esempio n. 21
0
File: utils.py Progetto: MH-0/RPGAE
def load_custom_dataset(dataset_name, with_attributes, with_labels, directed,
                        separator):
    """
    loads the dataset into memory
    :param dataset_name: The name of the dataset (As named in the folder data)
    :param with_attributes: if it has attributes
    :param with_labels: if the dataset has labels (ground truth)
    :param directed: if the graph is directed
    :param separator: the separator character in the files (" " or "," or "\t")
    """
    global data_path
    global graph_path
    global topo_features_path
    global topo_features_labels_path
    global embedding_path
    global graph
    global node_labels
    global number_classes
    global input
    global input_size
    global is_directed

    # data folder path
    data_path = "data\\" + dataset_name + "\\"

    # graph folder path
    graph_path = data_path + "graph\\"

    # features folder path
    topo_features_path = data_path + "top_features\\"

    # features classes folder path
    topo_features_labels_path = data_path + "top_features_labels\\"

    # pretreatment folder path
    embedding_path = data_path + "embedding\\"

    # scores folder path
    scores_path = data_path + "scores\\"

    # The graph is directed
    is_directed = directed

    # Load graphs
    if dataset_name == "cora":
        data = cg.load_cora()
        graph = data.graph
        graph = nx.Graph(graph)
        node_labels = data.labels
        input = torch.tensor(data.features).float()
    elif dataset_name == "citeseer":
        data = cg.load_citeseer()
        graph = data.graph
        graph = nx.Graph(graph)
        node_labels = data.labels
        input = torch.tensor(data.features).float()
    else:
        graph = load_graph(graph_path + "edges.txt",
                           0,
                           separator,
                           print_details=True,
                           directed=directed)
        if with_labels:
            node_labels = load_groundtruth(graph_path + "groundtruth.txt", 0,
                                           separator)
        else:
            node_labels = []
        if with_attributes:
            input = load_attributes(graph_path + "attributes.txt", 0,
                                    separator)
        else:
            input = torch.eye(len(graph.nodes))

    # input layer size
    input_size = len(input[0])

    # number of classes for the node labels
    number_classes = len(set(node_labels))

    # create directories if they do not exist
    # folder that holds the embeddings
    Path(embedding_path).mkdir(parents=True, exist_ok=True)
    # folder that holds the topological features
    Path(topo_features_path).mkdir(parents=True, exist_ok=True)
    # folder that holds the classes of the topological features
    Path(topo_features_labels_path).mkdir(parents=True, exist_ok=True)
    # folder that holds the scores of the experiments
    Path(scores_path).mkdir(parents=True, exist_ok=True)

    print("graph details:", dataset_name)
    print("------------------")
    print("nodes", len(graph.nodes))
    print("edges", len(graph.edges))
    print("classes", len(set(node_labels)))
    print("------------------")
Esempio n. 22
0
from dgl import DGLGraph

from model.inference_time import inference
from model.gcn import GCN
from model.gcn_GRU import GCN_GRU
from model.gcn_gated import GCN_GATED
from model.graphsage import GraphSAGE
from dgl.nn.pytorch.conv import SGConv
from model.tagcn import TAGCN

from model.rgcn_ import RGCN_Class

root = './result/feature/'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
Cora = citation_graph.load_cora()
#CiteSeer = citation_graph.load_citeseer()
#PubMed = citation_graph.load_pubmed()
#Nell   = citation_graph.load_nell_0_001()
#Coauthor_cs = Coauthor('cs')
#Coauthor_physics = Coauthor('cs')
#Amazon_computer = AmazonCoBuy('computers')
#Amazon_photo = AmazonCoBuy('photo')

#CoraFull = CoraFull()
#Reddit = RedditDataset(self_loop=True)
#Enwiki = citation_graph.load_RMAT('enwiki',100,10)
#Amazon = citation_graph.load_RMAT('amazon',100,10)
#M3 = citation_graph.load_RMAT('3M',100,10)
#3M_276M = citation_graph.load_RMAT('3M_276M',100,10)
#_21M = citation_graph.load_RMAT('21',100,10)
Esempio n. 23
0
        x = tf.nn.log_softmax(self.layer2(g, x))
        return x


from dgl.data import citation_graph as citegrh
import networkx as nx

#data = citegrh.load_cora()

#with tf.device('/GPU:0'):
with tf.device('/device:CPU:0'):

    #model = Net()

    #features = tf.convert_to_tensor(torch.FloatTensor(data.features).numpy(), numpy.float32)
    #feaures = torch.FloatTensor(data.features)
    #with torch.no_grad():

    with tf.profiler.experimental.Profile('logdir'):
        #tf.profiler.experimental.start('logdir'):
        data = citegrh.load_cora()

        model = Net()

        features = tf.convert_to_tensor(data.features, numpy.float32)
        g = DGLGraph(data.graph)

        out = model(g, features)
    pass
    #tf.profiler.experimental.stop()
Esempio n. 24
0
    def __init__(self, name, seed, self_loop=False, split=None):
        super(SmallGraphDataset, self).__init__()
        if name == 'cora':
            data = citegrh.load_cora()
            graph = data.graph
            if self_loop:
                graph = self.add_selfloop(graph)
            graph = dgl.DGLGraph(graph)
            features = data.features
            labels = data.labels

        elif name == 'citeseer':
            data = citegrh.load_citeseer()
            graph = data.graph
            if self_loop:
                graph = self.add_selfloop(graph)
            graph = dgl.DGLGraph(graph)
            features = data.features
            labels = data.labels

        elif name == 'pubmed':
            data = citegrh.load_pubmed()
            graph = data.graph
            if self_loop:
                graph = self.add_selfloop(graph)
            graph = dgl.DGLGraph(graph)
            features = data.features
            labels = data.labels

        elif name == 'amazon':
            assert(split!=None)
            data = AmazonCoBuy(name='computers')
            graph = data.data[0]
            if self_loop:
                graph.remove_edges(graph.edge_ids(graph.nodes(), graph.nodes()))
                graph.add_edges(graph.nodes(), graph.nodes())
            # must create split
            features = graph.ndata['feat']
            labels = graph.ndata['label']
        elif name =='karate':
            kG = nx.karate_club_graph()
            labels = np.array(
            [kG.nodes[i]['club'] != 'Mr. Hi' for i in kG.nodes]).astype(np.int64)
            graph = dgl.DGLGraph(kG)
            if self_loop:
                graph.remove_edges(graph.edge_ids(graph.nodes(), graph.nodes()))
                graph.add_edges(graph.nodes(), graph.nodes())
            features = torch.eye(n=graph.number_of_nodes())
            # graph.ndata['feat'] = features

            # Mr.Hi's club:1, John A's club:0
            self.train_mask = torch.zeros(graph.number_of_nodes(), dtype=torch.bool)
            self.train_mask[0] = True #Mr.Hi
            self.train_mask[33] = True # John A
            self.test_mask = ~self.train_mask



        graph = self.compute_norm(graph)

        self.graph = graph
        self.features = torch.FloatTensor(features)
        self.n_features = self.features.size(1)
        self.labels = torch.LongTensor(labels)
        self.n_label = torch.unique(self.labels).size(0)
        self.n_nodes = graph.number_of_nodes()
        if hasattr(self, 'train_mask'):
            return

        if split:
            print('using {} for training data.'.format(split))
            assert(split > 0.0)
            assert(split < 1.0)
            sample_size = ceil(self.n_nodes*split)
            train_np = np.zeros(self.n_nodes, dtype=np.bool)
            test_np = np.zeros(self.n_nodes, dtype=np.bool)
            test_np[range(500,1500)] = 1

            if seed ==0:
                # use first few data points as seed 
                train_idx = range(sample_size)
                train_np[train_idx] = 1
            else:
                random.seed(seed)
                train_idx = random.sample(range(self.n_nodes-1000), sample_size)
                mapped_train_idx = [idx if idx<500 else idx+1000 for idx in train_idx]
                train_np[mapped_train_idx] =1 
            

            self.train_mask = torch.tensor(train_np, dtype=torch.bool)
            self.test_mask = torch.tensor(test_np, dtype=torch.bool)
        else: # use original split
            self.train_mask = torch.BoolTensor(data.train_mask)
            self.test_mask = torch.BoolTensor(data.test_mask)
Esempio n. 25
0
     #G = mmread(path)
     #nxgraph = nx.Graph(G)
     #graph = dgl.from_networkx(nxgraph)
     edges = readmtxGraph(path)
     graph = dgl.graph(edges)
     #print(graph.edges())
 elif graph == "simple":
     graph = dgl.graph(([0, 0, 1, 1, 2, 3], [1, 2, 2, 4, 3, 4]))
 elif graph == "citeseer":
     data = load_citeseer(".")
     graph = data[0]
 elif graph == "pubmed":
     data = load_pubmed(".")
     graph = data[0]
 else:
     data = load_cora(".")
     graph = data[0]
 N = len(graph.nodes())
 print("#Nodes:", N, "#Edges:", len(graph.edges()[0]))
 embed = torch.rand(N, dim)
 #print(embed)
 #need to check batch processing ...
 print("Creating batch graphs...")
 if bsize == 256:
     bgraphs = batch_process(graph, 1024, 50)
 elif bsize == 1:
     bgraphs = [[graph, 0, N]]
 else:
     bgraphs = batch_process(graph, bsize)
 print("Done!")
 #cacheflush()
Esempio n. 26
0
def load_cora_data():
    data = citegrh.load_cora()
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    g = dgl.from_networkx(data.graph)
    return g, features, labels