def test_add_self_loops():
    assert AddSelfLoops().__repr__() == 'AddSelfLoops()'

    edge_index = torch.tensor([[0, 1, 1, 2], [1, 0, 2, 1]])
    edge_weight = torch.tensor([1, 2, 3, 4])
    edge_attr = torch.tensor([[1, 2], [3, 4], [5, 6], [7, 8]])

    data = Data(edge_index=edge_index, num_nodes=3)
    data = AddSelfLoops()(data)
    assert len(data) == 2
    assert data.edge_index.tolist() == [[0, 1, 1, 2, 0, 1, 2],
                                        [1, 0, 2, 1, 0, 1, 2]]
    assert data.num_nodes == 3

    data = Data(edge_index=edge_index, edge_weight=edge_weight, num_nodes=3)
    data = AddSelfLoops(attr='edge_weight', fill_value=5)(data)
    assert data.edge_index.tolist() == [[0, 1, 1, 2, 0, 1, 2],
                                        [1, 0, 2, 1, 0, 1, 2]]
    assert data.num_nodes == 3
    assert data.edge_weight.tolist() == [1, 2, 3, 4, 5, 5, 5]

    data = Data(edge_index=edge_index, edge_attr=edge_attr, num_nodes=3)
    data = AddSelfLoops(attr='edge_attr', fill_value='add')(data)
    assert data.edge_index.tolist() == [[0, 1, 1, 2, 0, 1, 2],
                                        [1, 0, 2, 1, 0, 1, 2]]
    assert data.num_nodes == 3
    assert data.edge_attr.tolist() == [[1, 2], [3, 4], [5, 6], [7, 8], [3, 4],
                                       [8, 10], [5, 6]]
def test_add_self_loops():
    assert AddSelfLoops().__repr__() == 'AddSelfLoops()'

    edge_index = torch.tensor([[0, 1, 1, 2], [1, 0, 2, 1]])
    data = Data(edge_index=edge_index)

    out = AddSelfLoops()(data).edge_index
    assert out.tolist() == [[0, 0, 1, 1, 1, 2, 2], [0, 1, 0, 1, 2, 1, 2]]
def test_add_self_loops():
    assert AddSelfLoops().__repr__() == 'AddSelfLoops()'

    edge_index = torch.tensor([[0, 1, 1, 2], [1, 0, 2, 1]])

    data = Data(edge_index=edge_index, num_nodes=3)
    data = AddSelfLoops()(data)
    assert len(data) == 1
    assert data.edge_index.tolist() == [[0, 0, 1, 1, 1, 2, 2],
                                        [0, 1, 0, 1, 2, 1, 2]]
Exemple #4
0
 def edge_creator(dat):
     KNNGraph(k=5, loop=False, force_undirected = False)(dat)
     dat.adj_t = None
     ToUndirected()(dat)
     AddSelfLoops()(dat)
     dat.edge_index = dat.edge_index.flip(dims=[0])
     return dat
def test_hetero_add_self_loops():
    edge_index = torch.tensor([[0, 1, 1, 2], [1, 0, 2, 1]])

    data = HeteroData()
    data['v'].num_nodes = 3
    data['w'].num_nodes = 3
    data['v', 'v'].edge_index = edge_index
    data['v', 'w'].edge_index = edge_index
    data = AddSelfLoops()(data)
    assert data['v', 'v'].edge_index.tolist() == [[0, 1, 1, 2, 0, 1, 2],
                                                  [1, 0, 2, 1, 0, 1, 2]]
    assert data['v', 'w'].edge_index.tolist() == edge_index.tolist()
Exemple #6
0
def load_dataset(dataset, name):
    with open('./data/' + name + '/' + dataset + '/full_feature',
              'rb') as node_features:
        x_train = pickle.load(node_features)
    with open('./data/' + name + '/' + dataset + '/edge', 'rb') as f:
        edge_index_train = pickle.load(f)
    y_train = load_tensor('./data/' + name + '/' + dataset + '/Interactions',
                          torch.FloatTensor)

    d = []
    for i in range(len(y_train)):
        data = Data(x=x_train[i], edge_index=edge_index_train[i], y=y_train[i])
        data = AddSelfLoops()(data)
        data.atom_num = x_train[i].shape[0]
        d.append(data)
    set = TestDataset(d)
    return set
Exemple #7
0
def get_small_dataset(dataset_name,
                      normalize_attributes=False,
                      add_self_loops=False,
                      remove_isolated_nodes=False,
                      make_undirected=False,
                      graph_availability=None,
                      seed=0,
                      create_adjacency_lists=True):
    """
    Get the pytorch_geometric.data.Data object associated with the specified dataset name.
    :param dataset_name: str => One of the datasets mentioned below.
    :param normalize_attributes: Whether the attributes for each node should be normalized to sum to 1.
    :param add_self_loops: Add self loops to the input Graph.
    :param remove_isolated_nodes: Remove isolated nodes.
    :param make_undirected: Make the Graph undirected.
    :param graph_availability: Either inductive and transductive. If transductive, all the graph nodes are available
                               during training. Otherwise, only training split nodes are available.
    :param seed: The random seed to use while splitting into train/val/test splits.
    :param create_adjacency_lists: Whether to process and store adjacency lists that can be used for efficient
                                   r-radius neighborhood sampling.
    :return: A pytorch_geometric.data.Data object for that dataset.
    """
    assert dataset_name in {
        'amazon-computers', 'amazon-photo', 'citeseer', 'coauthor-cs',
        'coauthor-physics', 'cora', 'cora-full', 'ppi', 'pubmed', 'reddit'
    }
    assert graph_availability in {'inductive', 'transductive'}

    # Compose transforms that should be applied.
    transforms = []
    if normalize_attributes:
        transforms.append(NormalizeFeatures())
    if remove_isolated_nodes:
        transforms.append(RemoveIsolatedNodes())
    if add_self_loops:
        transforms.append(AddSelfLoops())
    transforms = Compose(transforms) if transforms else None

    # Load the specified dataset and apply transforms.
    root_dir = '/tmp/{dir}'.format(dir=dataset_name)
    processed_dir = os.path.join(root_dir, dataset_name, 'processed')
    # Remove any previously pre-processed data, so pytorch_geometric can pre-process it again.
    if os.path.exists(processed_dir) and os.path.isdir(processed_dir):
        shutil.rmtree(processed_dir)

    data = None

    def split_function(y):
        return _get_train_val_test_masks(y.shape[0], y, 0.2, 0.2, seed)

    if dataset_name in ['citeseer', 'cora', 'pubmed']:
        data = Planetoid(root=root_dir,
                         name=dataset_name,
                         pre_transform=transforms,
                         split='full').data
        if seed != 0:
            data.train_mask, data.val_mask, data.test_mask = split_function(
                data.y.numpy())
        data.graphs = [data]
    elif dataset_name == 'cora-full':
        data = CoraFull(root=root_dir, pre_transform=transforms).data
        data.train_mask, data.val_mask, data.test_mask = split_function(
            data.y.numpy())
        data.graphs = [data]
    elif dataset_name == 'amazon-computers':
        data = Amazon(root=root_dir,
                      name='Computers',
                      pre_transform=transforms).data
        data.train_mask, data.val_mask, data.test_mask = split_function(
            data.y.numpy())
        data.graphs = [data]
    elif dataset_name == 'amazon-photo':
        data = Amazon(root=root_dir, name='Photo',
                      pre_transform=transforms).data
        data.train_mask, data.val_mask, data.test_mask = split_function(
            data.y.numpy())
        data.graphs = [data]
    elif dataset_name == 'coauthor-cs':
        data = Coauthor(root=root_dir, name='CS',
                        pre_transform=transforms).data
        data.train_mask, data.val_mask, data.test_mask = split_function(
            data.y.numpy())
        data.graphs = [data]
    elif dataset_name == 'coauthor-physics':
        data = Coauthor(root=root_dir,
                        name='Physics',
                        pre_transform=transforms).data
        data.train_mask, data.val_mask, data.test_mask = split_function(
            data.y.numpy())
        data.graphs = [data]
    elif dataset_name == 'reddit':
        data = Reddit(root=root_dir, pre_transform=transforms).data
        if seed != 0:
            data.train_mask, data.val_mask, data.test_mask = split_function(
                data.y.numpy())
        data.graphs = [data]
    elif dataset_name == 'ppi':
        data = SimpleNamespace()
        data.graphs = []
        for split in ['train', 'val', 'test']:
            split_data = PPI(root=root_dir,
                             split=split,
                             pre_transform=transforms)
            x_idxs = split_data.slices['x'].numpy()
            edge_idxs = split_data.slices['edge_index'].numpy()
            split_data = split_data.data
            for x_start, x_end, e_start, e_end in zip(x_idxs, x_idxs[1:],
                                                      edge_idxs,
                                                      edge_idxs[1:]):
                graph = Data(split_data.x[x_start:x_end],
                             split_data.edge_index[:, e_start:e_end],
                             y=split_data.y[x_start:x_end])
                graph.num_nodes = int(x_end - x_start)
                graph.split = split
                all_true = torch.ones(graph.num_nodes).bool()
                all_false = torch.zeros(graph.num_nodes).bool()
                graph.train_mask = all_true if split == 'train' else all_false
                graph.val_mask = all_true if split == 'val' else all_false
                graph.test_mask = all_true if split == 'test' else all_false
                data.graphs.append(graph)
        if seed != 0:
            temp_random = random.Random(seed)
            val_graphs = temp_random.sample(range(len(data.graphs)), 2)
            test_candidates = [
                graph_idx for graph_idx in range(len(data.graphs))
                if graph_idx not in val_graphs
            ]
            test_graphs = temp_random.sample(test_candidates, 2)
            for graph_idx, graph in enumerate(data.graphs):
                all_true = torch.ones(graph.num_nodes).bool()
                all_false = torch.zeros(graph.num_nodes).bool()
                graph.split = 'test' if graph_idx in test_graphs else 'val' if graph_idx in val_graphs else 'train'
                graph.train_mask = all_true if graph.split == 'train' else all_false
                graph.val_mask = all_true if graph.split == 'val' else all_false
                graph.test_mask = all_true if graph.split == 'test' else all_false

    if make_undirected:
        for graph in data.graphs:
            graph.edge_index = to_undirected(graph.edge_index, graph.num_nodes)

    LOG.info(f'Downloaded and transformed {len(data.graphs)} graph(s).')

    # Populate adjacency lists for efficient k-neighborhood sampling.
    # Only retain edges coming into a node and reverse the edges for the purpose of adjacency lists.
    LOG.info('Processing adjacency lists and degree information.')

    for graph in data.graphs:
        train_in_degrees = np.zeros(graph.num_nodes, dtype=np.int64)
        val_in_degrees = np.zeros(graph.num_nodes, dtype=np.int64)
        test_in_degrees = np.zeros(graph.num_nodes, dtype=np.int64)
        adjacency_lists = defaultdict(list)
        not_val_test_mask = (~graph.val_mask & ~graph.test_mask).numpy()
        val_mask = graph.val_mask.numpy()
        test_mask = graph.test_mask.numpy()

        if create_adjacency_lists:
            num_edges = graph.edge_index[0].shape[0]
            sources, dests = graph.edge_index[0].numpy(
            ), graph.edge_index[1].numpy()
            for source, dest in tqdm(zip(sources, dests),
                                     total=num_edges,
                                     leave=False):
                if not_val_test_mask[dest] and not_val_test_mask[source]:
                    train_in_degrees[dest] += 1
                    val_in_degrees[dest] += 1
                elif val_mask[dest] and not test_mask[source]:
                    val_in_degrees[dest] += 1
                test_in_degrees[dest] += 1
                adjacency_lists[dest].append(source)

        graph.adjacency_lists = dict(adjacency_lists)
        graph.train_in_degrees = torch.from_numpy(train_in_degrees).long()
        graph.val_in_degrees = torch.from_numpy(val_in_degrees).long()
        graph.test_in_degrees = torch.from_numpy(test_in_degrees).long()
        if graph_availability == 'transductive':
            graph.train_in_degrees = data.test_in_degrees
            graph.val_in_degrees = data.test_in_degrees

        graph.graph_availability = graph_availability

        # To accumulate any neighborhood perturbations to the graph.
        graph.perturbed_neighborhoods = defaultdict(set)
        graph.added_nodes = defaultdict(set)
        graph.modified_degrees = {}

        # For small datasets, cache the neighborhoods for all nodes for at least 3 different radii queries.
        graph.use_cache = True
        graph.neighborhood_cache = NeighborhoodCache(graph.num_nodes * 3)

        graph.train_mask_original = graph.train_mask
        graph.val_mask_original = graph.val_mask
        graph.test_mask_original = graph.test_mask

        graph.train_mask = torch.ones(
            graph.num_nodes).bool() & ~graph.val_mask & ~graph.test_mask

    return data