Ejemplo n.º 1
0
def test_one_hot_degree():
    assert OneHotDegree(max_degree=3).__repr__() == 'OneHotDegree(3)'

    edge_index = torch.tensor([[0, 0, 0, 1, 2, 3], [1, 2, 3, 0, 0, 0]])
    x = torch.Tensor([1, 1, 1, 1])

    data = Data(edge_index=edge_index, num_nodes=4)
    data = OneHotDegree(max_degree=3)(data)
    assert len(data) == 2
    assert data.edge_index.tolist() == edge_index.tolist()
    assert data.x.tolist() == [[0, 0, 0, 1], [0, 1, 0, 0], [0, 1, 0, 0],
                               [0, 1, 0, 0]]

    data = Data(edge_index=edge_index, x=x)
    data = OneHotDegree(max_degree=3)(data)
    assert len(data) == 2
    assert data.edge_index.tolist() == edge_index.tolist()
    assert data.x.tolist() == [[1, 0, 0, 0, 1], [1, 0, 1, 0, 0],
                               [1, 0, 1, 0, 0], [1, 0, 1, 0, 0]]
    def process_dataset(self):
        """
        Downloading and processing dataset.
        """
        print("\nPreparing dataset.\n")
        '''
        # TODO test ALKANE
        # ALKANE train/test pairs are currently lacking GED precalculated values, so for compatibility I ignore the test
        # graphs provided.
        if self.args.dataset == "ALKANE":
            dataset = GEDDataset('../datasets/{}'.format(self.args.dataset), self.args.dataset, train=True)

            self.training_graphs = dataset[:90]
            self.testing_graphs = dataset[90:]

        else:
        '''
        self.training_graphs = GEDDataset('../datasets/{}'.format(
            self.args.dataset),
                                          self.args.dataset,
                                          train=True)  # [:560]
        self.testing_graphs = GEDDataset('../datasets/{}'.format(
            self.args.dataset),
                                         self.args.dataset,
                                         train=False)  # [:140]

        self.ged_matrix = self.training_graphs.ged
        self.nged_matrix = self.training_graphs.norm_ged

        # and in ndarray format for debugging
        self.ge_m = self.ged_matrix.numpy()
        self.nge_m = self.nged_matrix.numpy()

        # tests.testGEDcalclulation(self)
        # tests.testNetworkX(self)

        if self.training_graphs[0].x is None:
            max_degree = 0
            for g in self.training_graphs + self.testing_graphs:
                if g.edge_index.size(1) > 0:
                    max_degree = max(max_degree,
                                     int(degree(g.edge_index[0]).max().item()))
            one_hot_degree = OneHotDegree(max_degree, cat=False)
            self.training_graphs.transform = one_hot_degree
            self.testing_graphs.transform = one_hot_degree

        self.number_of_node_labels = self.training_graphs.num_features
        self.number_of_edge_labels = self.training_graphs.num_edge_features
Ejemplo n.º 3
0
    def process_dataset(self):
        """
        Downloading and processing dataset.
        """
        print("\nPreparing dataset.\n")

        self.training_graphs = GEDDataset('datasets/{}'.format(self.args.dataset), self.args.dataset, train=True) 
        self.testing_graphs = GEDDataset('datasets/{}'.format(self.args.dataset), self.args.dataset, train=False) 
        self.nged_matrix = self.training_graphs.norm_ged
        self.real_data_size = self.nged_matrix.size(0)
        
        if self.args.synth:
            # self.synth_data_1, self.synth_data_2, _, synth_nged_matrix = gen_synth_data(500, 10, 12, 0.5, 0, 3)
            self.synth_data_1, self.synth_data_2, _, synth_nged_matrix = gen_pairs(self.training_graphs.shuffle()[:500], 0, 3)  
            
            real_data_size = self.nged_matrix.size(0)
            synth_data_size = synth_nged_matrix.size(0)
            self.nged_matrix = torch.cat((self.nged_matrix, torch.full((real_data_size, synth_data_size), float('inf'))), dim=1)
            synth_nged_matrix = torch.cat((torch.full((synth_data_size, real_data_size), float('inf')), synth_nged_matrix), dim=1)
            self.nged_matrix = torch.cat((self.nged_matrix, synth_nged_matrix))
        
        if self.training_graphs[0].x is None:
            max_degree = 0
            for g in self.training_graphs + self.testing_graphs + (self.synth_data_1 + self.synth_data_2 if self.args.synth else []):
                if g.edge_index.size(1) > 0:
                    max_degree = max(max_degree, int(degree(g.edge_index[0]).max().item()))
            one_hot_degree = OneHotDegree(max_degree, cat=False)
            self.training_graphs.transform = one_hot_degree
            self.testing_graphs.transform = one_hot_degree
        
        # labeling of synth data according to real data format    
            if self.args.synth:
                for g in self.synth_data_1 + self.synth_data_2:
                    g = one_hot_degree(g)
                    g.i = g.i + real_data_size
        elif self.args.synth:
            for g in self.synth_data_1 + self.synth_data_2:
                g.i = g.i + real_data_size
                # g.x = torch.cat((g.x, torch.zeros((g.x.size(0), self.training_graphs.num_features-1))), dim=1)
                    
        self.number_of_labels = self.training_graphs.num_features
Ejemplo n.º 4
0
    def process_dataset(self):
        """
        Downloading and processing dataset.
        """
        print("\nPreparing dataset.\n")

        self.training_graphs = GEDDataset('datasets/{}'.format(self.args.dataset), self.args.dataset, train=True)
        self.testing_graphs = GEDDataset('datasets/{}'.format(self.args.dataset), self.args.dataset, train=False)
        self.nged_matrix = self.training_graphs.norm_ged
        self.real_data_size = self.nged_matrix.size(0)

        if self.training_graphs[0].x is None:
            max_degree = 0
            for g in self.training_graphs + self.testing_graphs:
                if g.edge_index.size(1) > 0:
                    max_degree = max(max_degree, int(degree(g.edge_index[0]).max().item()))
            one_hot_degree = OneHotDegree(max_degree, cat=False)
            self.training_graphs.transform = one_hot_degree
            self.testing_graphs.transform = one_hot_degree

        self.number_of_labels = self.training_graphs.num_features
Ejemplo n.º 5
0
import os.path as osp

import torch
import torch.nn.functional as F
from torch.nn import Sequential, Linear, ReLU, BatchNorm1d as BatchNorm
from torch_geometric.transforms import OneHotDegree
from torch_geometric.datasets import TUDataset
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GINConv, global_add_pool

path = osp.join(osp.dirname(osp.realpath(__file__)), '..', '..', 'data', 'TU')
dataset = TUDataset(path, name='IMDB-BINARY', transform=OneHotDegree(135))
dataset = dataset.shuffle()
test_dataset = dataset[:len(dataset) // 10]
train_dataset = dataset[len(dataset) // 10:]
test_loader = DataLoader(test_dataset, batch_size=128)
train_loader = DataLoader(train_dataset, batch_size=128)


class Net(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers):
        super(Net, self).__init__()

        self.convs = torch.nn.ModuleList()
        self.batch_norms = torch.nn.ModuleList()

        for i in range(num_layers):
            mlp = Sequential(
                Linear(in_channels, 2 * hidden_channels),
                BatchNorm(2 * hidden_channels),
                ReLU(),
Ejemplo n.º 6
0
        data = data.to(device)
        output = model(data)
        pred = output.max(dim=1)[1]
        correct += pred.eq(data.y).sum().item()
    return correct / len(loader.dataset)


def lr_scheduler(lr, epoch, optimizer):
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr * (0.995**(epoch / 5))


# Define the transform to use in the dataset
if args.one_hot:
    max_degree = max_degree[args.k][args.n]
    transform = OneHotDegree(max_degree, cat=False)
    model_config['num_input_features'] = max_degree + 1
    if args.n == 28:
        transform = OneHotDegree(max_degree=7, cat=False)
        model_config['num_input_features'] = 8
elif args.identifiers:
    transform = EyeTransform()
    model_config['num_input_features'] = args.n
elif args.random:
    transform = RandomId()
    model_config['num_input_features'] = 1
else:
    transform = None
    model_config['num_input_features'] = 1

start = time.time()
Ejemplo n.º 7
0
    def test_transforms(self, tmpdir):
        tudataset = TUDataset(root=tmpdir, name="KKI")
        train_dataset = tudataset
        val_dataset = tudataset
        test_dataset = tudataset
        predict_dataset = tudataset

        # instantiate the data module
        dm = GraphClassificationData.from_datasets(
            train_dataset=train_dataset,
            val_dataset=val_dataset,
            test_dataset=test_dataset,
            predict_dataset=predict_dataset,
            train_transform=merge_transforms(
                GraphClassificationPreprocess.default_transforms(),
                {
                    "pre_tensor_transform":
                    OneHotDegree(tudataset.num_features - 1)
                },
            ),
            val_transform=merge_transforms(
                GraphClassificationPreprocess.default_transforms(),
                {
                    "pre_tensor_transform":
                    OneHotDegree(tudataset.num_features - 1)
                },
            ),
            test_transform=merge_transforms(
                GraphClassificationPreprocess.default_transforms(),
                {
                    "pre_tensor_transform":
                    OneHotDegree(tudataset.num_features - 1)
                },
            ),
            predict_transform=merge_transforms(
                GraphClassificationPreprocess.default_transforms(),
                {
                    "pre_tensor_transform":
                    OneHotDegree(tudataset.num_features - 1)
                },
            ),
            batch_size=2,
        )
        assert dm is not None
        assert dm.train_dataloader() is not None
        assert dm.val_dataloader() is not None
        assert dm.test_dataloader() is not None

        # check training data
        data = next(iter(dm.train_dataloader()))
        assert list(data.x.size())[1] == tudataset.num_features * 2
        assert list(data.y.size()) == [2]

        # check val data
        data = next(iter(dm.val_dataloader()))
        assert list(data.x.size())[1] == tudataset.num_features * 2
        assert list(data.y.size()) == [2]

        # check test data
        data = next(iter(dm.test_dataloader()))
        assert list(data.x.size())[1] == tudataset.num_features * 2
        assert list(data.y.size()) == [2]
Ejemplo n.º 8
0
    return correct / len(loader.dataset)


def lr_scheduler(lr, epoch, optimizer):
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr * (0.995**(epoch / 5))


# Define the transform to use in the dataset
transform = None
if 'GIN' or 'RP' in model_name:
    if config.one_hot:
        # Cannot always be used in an inductive setting,
        # because the maximal degree might be bigger than during training
        degree = max_degree[args.k][args.n]
        transform = OneHotDegree(degree, cat=False)
        config.num_input_features = degree + 1
    elif config.identifiers:
        # Cannot be used in an inductive setting
        transform = EyeTransform(max_num_nodes[args.k][args.n])
        config.num_input_features = max_num_nodes[args.k][args.n]
    elif config.random:
        # Can be used in an inductive setting
        transform = RandomId()
        transform_val = RandomId()
        transform_test = RandomId()
        config.num_input_features = 1

if transform is None:
    transform_val = None
    transform_test = None
Ejemplo n.º 9
0
 def per_sample_transform(self):
     return Compose([
         super().per_sample_transform(),
         PyGTransformAdapter(
             OneHotDegree(tudataset.num_features - 1))
     ])