예제 #1
0
def test_karate():
    dataset = KarateClub()

    assert len(dataset) == 1
    assert dataset.num_features == 34
    assert dataset.num_classes == 2
    assert dataset.__repr__() == 'KarateClub()'

    assert len(dataset[0]) == 3
    assert dataset[0].edge_index.size() == (2, 156)
    assert dataset[0].x.size() == (34, 34)
    assert dataset[0].y.size() == (34, )
    assert dataset[0].y.sum().item() == 17
예제 #2
0
def test_influence():
    data = KarateClub()[0]
    x = torch.randn(data.num_nodes, 8)

    out = influence(Net(x.size(1), 16), x, data.edge_index)
    assert out.size() == (data.num_nodes, data.num_nodes)
    assert torch.allclose(out.sum(dim=-1), torch.ones(data.num_nodes))
예제 #3
0
def load_dataset(root: str, name: str, *args, **kwargs) -> Dataset:
    r"""Returns a variety of datasets according to :obj:`name`."""
    if 'karate' in name.lower():
        from torch_geometric.datasets import KarateClub
        return KarateClub(*args, **kwargs)
    if name.lower() in ['cora', 'citeseer', 'pubmed']:
        from torch_geometric.datasets import Planetoid
        path = osp.join(root, 'Planetoid', name)
        return Planetoid(path, name, *args, **kwargs)
    if name in ['BZR', 'ENZYMES', 'IMDB-BINARY', 'MUTAG']:
        from torch_geometric.datasets import TUDataset
        path = osp.join(root, 'TUDataset')
        return TUDataset(path, name, *args, **kwargs)
    if name in ['ego-facebook', 'soc-Slashdot0811', 'wiki-vote']:
        from torch_geometric.datasets import SNAPDataset
        path = osp.join(root, 'SNAPDataset')
        return SNAPDataset(path, name, *args, **kwargs)
    if name.lower() in ['bashapes']:
        from torch_geometric.datasets import BAShapes
        return BAShapes(*args, **kwargs)
    if name.lower() in ['dblp']:
        from torch_geometric.datasets import DBLP
        path = osp.join(root, 'DBLP')
        return DBLP(path, *args, **kwargs)
    if name in ['citationCiteseer', 'illc1850']:
        from torch_geometric.datasets import SuiteSparseMatrixCollection
        path = osp.join(root, 'SuiteSparseMatrixCollection')
        return SuiteSparseMatrixCollection(path, name=name, *args, **kwargs)

    raise NotImplementedError
예제 #4
0
    def test_visualize_KarateClub(self):
        """
    export LD_LIBRARY_PATH=/usr/local/cuda-10.0/lib64:/usr/local/cudnn-10.0-v7.6.5.32
    proxychains python -c "from template_lib.examples.DGL.geometric.test_pytorch_geometric import TestingGeometric;\
      TestingGeometric().test_learning_methods_on_graphs()"

    """
        if 'CUDA_VISIBLE_DEVICES' not in os.environ:
            os.environ['CUDA_VISIBLE_DEVICES'] = '0'
        if 'PORT' not in os.environ:
            os.environ['PORT'] = '6006'
        if 'TIME_STR' not in os.environ:
            os.environ['TIME_STR'] = '0' if utils.is_debugging() else '1'
        # func name
        assert sys._getframe().f_code.co_name.startswith('test_')
        command = sys._getframe().f_code.co_name[5:]
        class_name = self.__class__.__name__[7:] \
          if self.__class__.__name__.startswith('Testing') \
          else self.__class__.__name__
        outdir = f'results/{class_name}/{command}'

        from datetime import datetime
        TIME_STR = bool(int(os.getenv('TIME_STR', 0)))
        time_str = datetime.now().strftime("%Y%m%d-%H_%M_%S_%f")[:-3]
        outdir = outdir if not TIME_STR else (outdir + '_' + time_str)
        print(outdir)

        import collections, shutil
        shutil.rmtree(outdir, ignore_errors=True)
        os.makedirs(outdir, exist_ok=True)

        from torch_geometric.datasets import KarateClub
        dataset = KarateClub()

        for i in dataset[0]:
            print(i)
        # this torch.geometric.datasets object comprises of edge(edge information for each node), x(nodes) and y(labels for each node)

        edge, x, y = dataset[0]
        numpyx = x[1].numpy()
        numpyy = y[1].numpy()
        numpyedge = edge[1].numpy()

        import networkx as nx

        g = nx.Graph(numpyx)

        name, edgeinfo = edge

        src = edgeinfo[0].numpy()
        dst = edgeinfo[1].numpy()
        edgelist = zip(src, dst)

        for i, j in edgelist:
            g.add_edge(i, j)

        nx.draw_networkx(g)
        pass
예제 #5
0
    def test_GCN(self):
        g = KarateClub().data

        gcn = GCN(g.x.shape[1], len(np.unique(g.y)), n_hidden_gcn=64)

        epochs = 100
        criterion = th.nn.CrossEntropyLoss(reduction='mean')
        optimizer = th.optim.Adam(gcn.parameters(), lr=0.02)

        device = th.device('cuda' if th.cuda.is_available() else 'cpu')
        gcn = gcn.to(device).float()
        g = g.to(device)

        length = len(str(epochs))
        print("#### TRAINING START ####")
        test_mask = th.logical_not(g.train_mask)
        for epoch in range(epochs):
            gcn.train()
            outputs = gcn(g)[g.train_mask]
            loss = criterion(outputs, g.y[g.train_mask])
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            gcn.eval()
            with th.no_grad():
                predictions = np.argmax(
                    gcn(g)[test_mask].cpu().detach().numpy(), axis=1)
                pred_train = np.argmax(
                    gcn(g)[g.train_mask].cpu().detach().numpy(), axis=1)
                acc = accuracy_score(g.y.cpu()[test_mask].detach(),
                                     predictions)
                acc_train = accuracy_score(g.y.cpu()[g.train_mask].detach(),
                                           pred_train)
                print(
                    f"[{epoch + 1:{length}}] loss: {loss.item(): .3f}, "
                    f"training accuracy: {acc_train: .3f}, val_accuracy: {acc: .3f}"
                )
예제 #6
0
def train_net():
    club = KarateClub()
    data = club.data
    data.num_nodes = data.num_nodes[0]
    print(data)
    data_loader = NeighborSampler(data,
                                  size=[20, 10],
                                  num_hops=2,
                                  batch_size=8,
                                  shuffle=True,
                                  add_self_loops=True)
    net = SAGENet(34, 2)
    criterion = nn.NLLLoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=lr)
    losses = AverageMeter()
    for i in range(epoch):
        result = []
        for data_flow in data_loader():
            label = data.y[data_flow.n_id]
            # print(label)
            optimizer.zero_grad()
            out = net(data.x, data_flow.to(device))

            # print(out)
            # print(label)
            result.append((label, out))
            # Calculate loss
            loss = criterion(out, label)

            # Back prop.
            optimizer.zero_grad()
            loss.backward()

            # Clip gradients
            clip_gradient(optimizer, grad_clip)

            # Update weights
            optimizer.step()
            losses.update(loss.item())

            # Print status
        print(validation(result))
        if i % print_freq == 0:
            print('Epoch: {0} Loss {loss.val:.5f} ({loss.avg:.5f})\t'.format(
                i, loss=losses))

    return losses.avg
예제 #7
0
def test_karate():
    from torch_geometric.datasets import KarateClub

    dataset = KarateClub('../data/KarateClub')
    config = Config('../data/KarateClub/', True, multilabel=False)
    data = prepare_dataset(dataset, config, 2)
    index = np.arange(data.y.shape[0])
    train, test = train_test_split(index,
                                   stratify=data.y,
                                   test_size=0.6,
                                   random_state=0)
    data.train_mask = torch.zeros(index.shape[0], dtype=torch.uint8)
    data.train_mask[train] = 1
    data.test_mask = torch.zeros(index.shape[0], dtype=torch.uint8)
    data.test_mask[test] = 1

    # ss = sparse.coo_matrix(data.x.numpy())
    # i = torch.LongTensor([ss.row, ss.col])
    # data.x = torch.sparse.FloatTensor(
    #     i, torch.from_numpy(ss.data), torch.Size(ss.shape))

    import networkx as nx

    g = nx.from_edgelist(data.edge_index.numpy().T)
    train_set = set(train)
    x = []
    for nid in sorted(g.nodes):
        inter = list(train_set.intersection(g.neighbors(nid)))
        ixs, vals = torch.unique(data.y[inter], return_counts=True)
        counts = torch.zeros(data.num_classes, dtype=torch.int64)
        counts[ixs] = vals
        x.append(counts)

    data.x = torch.stack(x).to(torch.float)
    # sub_edge_index, _ = subgraph(torch.from_numpy(train), data.edge_index)

    deg = degree(data.edge_index[1].to(torch.long),
                 data.num_nodes,
                 dtype=torch.int)
    loader = NeighborSampler(data,
                             size=[5, 5],
                             deg=deg,
                             batch_size=5,
                             shuffle=True)

    transductive_sage(loader, config, steps=200, test_every=20)
예제 #8
0
def load_pyg(name, dataset_dir):
    """
    Load PyG dataset objects. (More PyG datasets will be supported)

    Args:
        name (string): dataset name
        dataset_dir (string): data directory

    Returns: PyG dataset object

    """
    dataset_dir = '{}/{}'.format(dataset_dir, name)
    if name in ['Cora', 'CiteSeer', 'PubMed']:
        dataset = Planetoid(dataset_dir, name)
    elif name[:3] == 'TU_':
        # TU_IMDB doesn't have node features
        if name[3:] == 'IMDB':
            name = 'IMDB-MULTI'
            dataset = TUDataset(dataset_dir, name, transform=T.Constant())
        else:
            dataset = TUDataset(dataset_dir, name[3:])
    elif name == 'Karate':
        dataset = KarateClub()
    elif 'Coauthor' in name:
        if 'CS' in name:
            dataset = Coauthor(dataset_dir, name='CS')
        else:
            dataset = Coauthor(dataset_dir, name='Physics')
    elif 'Amazon' in name:
        if 'Computers' in name:
            dataset = Amazon(dataset_dir, name='Computers')
        else:
            dataset = Amazon(dataset_dir, name='Photo')
    elif name == 'MNIST':
        dataset = MNISTSuperpixels(dataset_dir)
    elif name == 'PPI':
        dataset = PPI(dataset_dir)
    elif name == 'QM7b':
        dataset = QM7b(dataset_dir)
    else:
        raise ValueError('{} not support'.format(name))

    return dataset
예제 #9
0
def load_pyg_dataset(dataset_name, root='dataset/'):
    from ogb.nodeproppred import PygNodePropPredDataset, Evaluator
    source, name = dataset_name.split('-', maxsplit=1)
    assert source in ['ogbn', 'pyg', 'custom']
    if source == 'ogbn':
        dataset = PygNodePropPredDataset(name=dataset_name, root=root)
        return dataset, dataset.get_idx_split(), Evaluator(dataset_name)
    elif source == 'pyg':
        from torch_geometric.datasets import KarateClub, CoraFull
        if name == "karate":
            dataset = KarateClub()
        elif name == "cora":
            dataset = CoraFull(root)
        else:
            raise Exception("Dataset not recognized")

        num_nodes = dataset[0].x.shape[0]
        num_train = int(num_nodes * 0.8)
        num_val = int(num_nodes * 0.1)

        perm = np.arange(num_nodes, dtype=int)
        np.random.shuffle(perm)
        split_idx = {
            'train': perm[:num_train],
            'valid': perm[num_train:num_train + num_val],
            'test': perm[num_train + num_val:]
        }
        return dataset, split_idx, Evaluator('ogbn-arxiv')
    elif source == "custom":
        from dataset import registry
        dataset = registry[name]()
        split_idx = {
            'train': dataset[0].idx_train,
            'valid': dataset[0].idx_val,
            'test': dataset[0].idx_test
        }
        return dataset, split_idx, CustomEvaluator()

    else:
        raise Exception("Dataset not recognized")
    if torch.is_tensor(h):
        h = h.detach().cpu().numpy()
        plt.scatter(h[:, 0], h[:, 1], s=140, c=color, cmap="Set2")
        if epoch is not None and loss is not None:
            plt.xlabel(f'Epoch: {epoch}, Loss: {loss.item():.4f}', fontsize=16)
    else:
        nx.draw_networkx(h,
                         pos=nx.spring_layout(h, seed=42),
                         with_labels=False,
                         node_color=color,
                         cmap="Set2")
    plt.show()


## Getting some data
dataset = KarateClub()
graph = dataset[0]
G_nx = to_networkx(graph, to_undirected=True)

# Uncomment for initial vis.

# visualize(G_nx, color=graph.y)


## Defining a GCN Model
# The meat!!!
class GCN(torch.nn.Module):
    def __init__(self):
        super(GCN, self).__init__()
        torch.manual_seed(12345)
# GCN进行节点分类

import torch
import torch.nn.functional as F

from torch_geometric.nn import GCNConv
from torch_geometric.data import Data
from torch_geometric.datasets import KarateClub

from torch_geometric.utils import to_networkx

import networkx as nx
from matplotlib import pyplot as plt
import numpy as np

dataset = KarateClub()

print("图数量:", len(dataset))
print("类数量:", dataset.num_classes)

data = dataset[0]


# 查看创建出来的图
def check_graph(data):
    print("图结构:", data)
    print("图中的键:", data.keys)
    print("图中节点数:", data.num_nodes)
    print("图中边数:", data.num_edges)
    print("图中节点属性特征数:", data.num_node_features)
    print("图中是否存在孤立节点", data.contains_isolated_nodes())
예제 #12
0
full_description = ''
with open('./EGONETCONFIG.py', 'r') as f:
    full_description = f.read()

wandb.init(name=current_dataset['name'] + " - " + job_id,
           project="ego-net",
           notes=full_description)

# ---------------------------------------------------------------
print("Done 1")
wandb.log({'action': 'Done 1'})

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
real_data = None
if DATASET == "Karate Club":
    real_data = KarateClub()
elif DATASET == "Cora" or DATASET == "Citeseer" or DATASET == "Pubmed":
    real_data = Planetoid(root=input_path, name=DATASET, split="public")
elif DATASET == "Reddit":
    real_data = Reddit(root=input_path)
elif DATASET == "Amazon Computers":
    real_data = Amazon(root=input_path, name="Computers")
elif DATASET == "Amazon Photos":
    real_data = Amazon(root=input_path, name="Photo")
elif DATASET == "CLUSTER":
    real_data = GNNBenchmarkDataset(root=input_path,
                                    name="CLUSTER",
                                    split="test")
elif DATASET == "PATTERN":
    real_data = GNNBenchmarkDataset(root=input_path,
                                    name="PATTERN",
예제 #13
0
    avg_path_length = path_length_sum / (obj_size-1)
    return avg_path_length


# parameters
dataset = 'Cora'
sample_label_idxes = [0, 2, 7] # label7 means augmented vertices
color_list = ['#e41a1c', '#377eb8', '#4daf4a', '#984ea3', '#ff7f00', '#000000', '#a65628', '#ffff33']
step = 3300
edge_BA = 1
edge_TF = 5

# data loading
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
if(dataset == 'KarateClub'):
    data = KarateClub(transform=GraphAugmenter(step, edge_BA, edge_TF))
else:
    data = Planetoid(root='./data/experiment/', name=dataset,
                        transform=GraphAugmenter(step, edge_BA, edge_TF))
data_augmented = data[0].to(device)
print(data_augmented)

# make networkx object from torch tensor object
G = graph_data_obj_to_nx(data_augmented)

# make subgraph of G
v_idxes_of_subgraph = []
for v_id, v_label_id in enumerate(data_augmented.y):
    if(v_label_id in sample_label_idxes):
        v_idxes_of_subgraph.append(v_id)
G_sub = nx.subgraph(G, nbunch=v_idxes_of_subgraph)
예제 #14
0
import torch
from torch import Tensor
import networkx as nx
import matplotlib.pyplot as plt

from torch_geometric.datasets import KarateClub

dataset: torch.utils.data.Dataset = KarateClub()
print(f'Dataset: {dataset}:')
print('======================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

# Gather some statistics about the graph.

data = dataset[0]
# 由于现在的KarateClub没有train_mask, 所以自己加上
import numpy as np
# randint的区间[0, 2)
np_array = np.random.randint(0, 2, 34)

mask = np_array >= 1
# print(mask)
train_mask = torch.from_numpy(mask)
# 加入自动随机生成的mask
data['train_mask'] = train_mask

print(data)
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
예제 #15
0
    def test_MessagePassing(self):
        """
    export LD_LIBRARY_PATH=/usr/local/cuda-10.0/lib64:/usr/local/cudnn-10.0-v7.6.5.32
    proxychains python -c "from template_lib.examples.DGL.geometric.test_pytorch_geometric import TestingGeometric;\
      TestingGeometric().test_learning_methods_on_graphs()"

    """
        if 'CUDA_VISIBLE_DEVICES' not in os.environ:
            os.environ['CUDA_VISIBLE_DEVICES'] = '0'
        if 'PORT' not in os.environ:
            os.environ['PORT'] = '6006'
        if 'TIME_STR' not in os.environ:
            os.environ['TIME_STR'] = '0' if utils.is_debugging() else '1'
        # func name
        assert sys._getframe().f_code.co_name.startswith('test_')
        command = sys._getframe().f_code.co_name[5:]
        class_name = self.__class__.__name__[7:] \
          if self.__class__.__name__.startswith('Testing') \
          else self.__class__.__name__
        outdir = f'results/{class_name}/{command}'

        from datetime import datetime
        TIME_STR = bool(int(os.getenv('TIME_STR', 0)))
        time_str = datetime.now().strftime("%Y%m%d-%H_%M_%S_%f")[:-3]
        outdir = outdir if not TIME_STR else (outdir + '_' + time_str)
        print(outdir)

        import collections, shutil
        shutil.rmtree(outdir, ignore_errors=True)
        os.makedirs(outdir, exist_ok=True)

        import torch
        from torch_geometric.nn import MessagePassing
        from torch_geometric.utils import add_self_loops, degree

        class GCNConv(MessagePassing):
            def __init__(self, in_channels, out_channels):
                super(GCNConv, self).__init__(aggr='add')  # "Add" aggregation.
                self.lin = torch.nn.Linear(in_channels, out_channels)

            def forward(self, x, edge_index):
                # x has shape [N, in_channels]
                # edge_index has shape [2, E]

                # Step 1: Add self-loops to the adjacency matrix.
                edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))

                # Step 2: Linearly transform node feature matrix.
                x = self.lin(x)

                # Step 3: Compute normalization
                row, col = edge_index
                deg = degree(row, x.size(0), dtype=x.dtype)
                deg_inv_sqrt = deg.pow(-0.5)
                norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]

                # Step 4-6: Start propagating messages.
                return self.propagate(edge_index,
                                      size=(x.size(0), x.size(0)),
                                      x=x,
                                      norm=norm)

            def message(self, x_j, norm):
                # x_j has shape [E, out_channels]

                # Step 4: Normalize node features.
                return norm.view(-1, 1) * x_j

            def update(self, aggr_out):
                # aggr_out has shape [N, out_channels]

                # Step 6: Return new node embeddings.
                return aggr_out

        from torch_geometric.datasets import KarateClub
        dataset = KarateClub()
        x = dataset[0].x
        edge_index = dataset[0].edge_index
        conv = GCNConv(34, 64)
        x = conv(x, edge_index)
        pass