def __call__(self, data: HeteroData) -> HeteroData: edge_types = data.edge_types # save original edge types data.metapath_dict = {} for j, metapath in enumerate(self.metapaths): for edge_type in metapath: assert data._to_canonical( edge_type) in edge_types, f"'{edge_type}' not present" edge_type = metapath[0] adj1 = SparseTensor.from_edge_index( edge_index=data[edge_type].edge_index, sparse_sizes=data[edge_type].size()) for i, edge_type in enumerate(metapath[1:]): adj2 = SparseTensor.from_edge_index( edge_index=data[edge_type].edge_index, sparse_sizes=data[edge_type].size()) adj1 = adj1 @ adj2 row, col, _ = adj1.coo() new_edge_type = (metapath[0][0], f'metapath_{j}', metapath[-1][-1]) data[new_edge_type].edge_index = torch.vstack([row, col]) data.metapath_dict[new_edge_type] = metapath if self.drop_orig_edges: for i in edge_types: if self.keep_same_node_type and i[0] == i[-1]: continue else: del data[i] return data
def test_hetero_to_undirected(): edge_index = torch.tensor([[2, 0], [3, 1]]) edge_weight = torch.randn(edge_index.size(1)) edge_attr = torch.randn(edge_index.size(1), 8) perm = torch.tensor([1, 1, 0, 0]) data = HeteroData() data['v'].num_nodes = 4 data['w'].num_nodes = 4 data['v', 'v'].edge_index = edge_index data['v', 'v'].edge_weight = edge_weight data['v', 'v'].edge_attr = edge_attr data['v', 'w'].edge_index = edge_index data['v', 'w'].edge_weight = edge_weight data['v', 'w'].edge_attr = edge_attr from torch_geometric.transforms import ToUndirected assert not data.is_undirected() data = ToUndirected()(data) assert data.is_undirected() assert data['v', 'v'].edge_index.tolist() == [[0, 1, 2, 3], [1, 0, 3, 2]] assert data['v', 'v'].edge_weight.tolist() == edge_weight[perm].tolist() assert data['v', 'v'].edge_attr.tolist() == edge_attr[perm].tolist() assert data['v', 'w'].edge_index.tolist() == edge_index.tolist() assert data['v', 'w'].edge_weight.tolist() == edge_weight.tolist() assert data['v', 'w'].edge_attr.tolist() == edge_attr.tolist() assert data['w', 'v'].edge_index.tolist() == [[3, 1], [2, 0]] assert data['w', 'v'].edge_weight.tolist() == edge_weight.tolist() assert data['w', 'v'].edge_attr.tolist() == edge_attr.tolist()
def test_to_homogeneous(): data = HeteroData() data['paper'].x = torch.randn(100, 128) data['author'].x = torch.randn(200, 128) data['paper', 'paper'].edge_index = get_edge_index(100, 100, 250) data['paper', 'paper'].edge_weight = torch.randn(250, ) data['paper', 'paper'].edge_attr = torch.randn(250, 64) data['paper', 'author'].edge_index = get_edge_index(100, 200, 500) data['paper', 'author'].edge_weight = torch.randn(500, ) data['paper', 'author'].edge_attr = torch.randn(500, 64) data['author', 'paper'].edge_index = get_edge_index(200, 100, 1000) data['author', 'paper'].edge_weight = torch.randn(1000, ) data['author', 'paper'].edge_attr = torch.randn(1000, 64) data = data.to_homogeneous() assert len(data) == 5 assert data.num_nodes == 300 assert data.num_edges == 1750 assert data.num_node_features == 128 assert data.num_edge_features == 64 assert data.edge_type.size() == (1750, ) assert data.edge_type.min() == 0 assert data.edge_type.max() == 2 assert len(data._node_slices) == 2 assert len(data._edge_slices) == 3 assert len(data._edge_type_dict) == 3
def test_heterogeneous_neighbor_loader_on_cora(directed): root = osp.join('/', 'tmp', str(random.randrange(sys.maxsize))) dataset = Planetoid(root, 'Cora') data = dataset[0] data.edge_weight = torch.rand(data.num_edges) hetero_data = HeteroData() hetero_data['paper'].x = data.x hetero_data['paper'].n_id = torch.arange(data.num_nodes) hetero_data['paper', 'paper'].edge_index = data.edge_index hetero_data['paper', 'paper'].edge_weight = data.edge_weight split_idx = torch.arange(5, 8) loader = NeighborLoader(hetero_data, num_neighbors=[-1, -1], batch_size=split_idx.numel(), input_nodes=('paper', split_idx), directed=directed) assert len(loader) == 1 hetero_batch = next(iter(loader)) batch_size = hetero_batch['paper'].batch_size if not directed: n_id, _, _, e_mask = k_hop_subgraph(split_idx, num_hops=2, edge_index=data.edge_index, num_nodes=data.num_nodes) n_id = n_id.sort()[0] assert n_id.tolist() == hetero_batch['paper'].n_id.sort()[0].tolist() assert hetero_batch['paper', 'paper'].num_edges == int(e_mask.sum()) class GNN(torch.nn.Module): def __init__(self, in_channels, hidden_channels, out_channels): super().__init__() self.conv1 = GraphConv(in_channels, hidden_channels) self.conv2 = GraphConv(hidden_channels, out_channels) def forward(self, x, edge_index, edge_weight): x = self.conv1(x, edge_index, edge_weight).relu() x = self.conv2(x, edge_index, edge_weight).relu() return x model = GNN(dataset.num_features, 16, dataset.num_classes) hetero_model = to_hetero(model, hetero_data.metadata()) out1 = model(data.x, data.edge_index, data.edge_weight)[split_idx] out2 = hetero_model(hetero_batch.x_dict, hetero_batch.edge_index_dict, hetero_batch.edge_weight_dict)['paper'][:batch_size] assert torch.allclose(out1, out2, atol=1e-6) try: shutil.rmtree(root) except PermissionError: pass
def __call__(self, data: HeteroData) -> HeteroData: edge_types = data.edge_types # save original edge types data.metapath_dict = {} for j, metapath in enumerate(self.metapaths): for edge_type in metapath: assert data._to_canonical( edge_type) in edge_types, f"'{edge_type}' not present" edge_type = metapath[0] edge_weight = self._get_edge_weight(data, edge_type) adj1 = SparseTensor.from_edge_index( edge_index=data[edge_type].edge_index, sparse_sizes=data[edge_type].size(), edge_attr=edge_weight) if self.max_sample is not None: adj1 = self.sample_adj(adj1) for i, edge_type in enumerate(metapath[1:]): edge_weight = self._get_edge_weight(data, edge_type) adj2 = SparseTensor.from_edge_index( edge_index=data[edge_type].edge_index, sparse_sizes=data[edge_type].size(), edge_attr=edge_weight) adj1 = adj1 @ adj2 if self.max_sample is not None: adj1 = self.sample_adj(adj1) row, col, edge_weight = adj1.coo() new_edge_type = (metapath[0][0], f'metapath_{j}', metapath[-1][-1]) data[new_edge_type].edge_index = torch.vstack([row, col]) if self.weighted: data[new_edge_type].edge_weight = edge_weight data.metapath_dict[new_edge_type] = metapath if self.drop_orig_edges: for i in edge_types: if self.keep_same_node_type and i[0] == i[-1]: continue else: del data[i] # remove nodes not connected by any edge type. if self.drop_unconnected_nodes: new_edge_types = data.edge_types node_types = data.node_types connected_nodes = set() for i in new_edge_types: connected_nodes.add(i[0]) connected_nodes.add(i[-1]) for node in node_types: if node not in connected_nodes: del data[node] return data
def test_hetero_data_to_canonical(): data = HeteroData() assert isinstance(data['user', 'product'], EdgeStorage) assert len(data.edge_types) == 1 assert isinstance(data['user', 'to', 'product'], EdgeStorage) assert len(data.edge_types) == 1 data = HeteroData() assert isinstance(data['user', 'buys', 'product'], EdgeStorage) assert isinstance(data['user', 'clicks', 'product'], EdgeStorage) assert len(data.edge_types) == 2 with pytest.raises(TypeError, match="missing 1 required"): data['user', 'product']
def test_hgt_loader_on_cora(get_dataset): dataset = get_dataset(name='Cora') data = dataset[0] data.edge_weight = torch.rand(data.num_edges) hetero_data = HeteroData() hetero_data['paper'].x = data.x hetero_data['paper'].n_id = torch.arange(data.num_nodes) hetero_data['paper', 'paper'].edge_index = data.edge_index hetero_data['paper', 'paper'].edge_weight = data.edge_weight split_idx = torch.arange(5, 8) # Sample the complete two-hop neighborhood: loader = HGTLoader(hetero_data, num_samples=[data.num_nodes] * 2, batch_size=split_idx.numel(), input_nodes=('paper', split_idx)) assert len(loader) == 1 hetero_batch = next(iter(loader)) batch_size = hetero_batch['paper'].batch_size n_id, _, _, e_mask = k_hop_subgraph(split_idx, num_hops=2, edge_index=data.edge_index, num_nodes=data.num_nodes) n_id = n_id.sort()[0] assert n_id.tolist() == hetero_batch['paper'].n_id.sort()[0].tolist() assert hetero_batch['paper', 'paper'].num_edges == int(e_mask.sum()) class GNN(torch.nn.Module): def __init__(self, in_channels, hidden_channels, out_channels): super().__init__() self.conv1 = GraphConv(in_channels, hidden_channels) self.conv2 = GraphConv(hidden_channels, out_channels) def forward(self, x, edge_index, edge_weight): x = self.conv1(x, edge_index, edge_weight).relu() x = self.conv2(x, edge_index, edge_weight).relu() return x model = GNN(dataset.num_features, 16, dataset.num_classes) hetero_model = to_hetero(model, hetero_data.metadata()) out1 = model(data.x, data.edge_index, data.edge_weight)[split_idx] out2 = hetero_model(hetero_batch.x_dict, hetero_batch.edge_index_dict, hetero_batch.edge_weight_dict)['paper'][:batch_size] assert torch.allclose(out1, out2, atol=1e-6)
def test_hetero_data_functions(): data = HeteroData() data['paper'].x = x_paper data['author'].x = x_author data['paper', 'paper'].edge_index = edge_index_paper_paper data['paper', 'author'].edge_index = edge_index_paper_author data['author', 'paper'].edge_index = edge_index_author_paper data['paper', 'paper'].edge_attr = edge_attr_paper_paper assert len(data) == 3 assert sorted(data.keys) == ['edge_attr', 'edge_index', 'x'] assert 'x' in data and 'edge_index' in data and 'edge_attr' in data assert data.num_nodes == 15 assert data.num_edges == 110 assert data.num_node_features == {'paper': 16, 'author': 32} assert data.num_edge_features == { ('paper', 'to', 'paper'): 8, ('paper', 'to', 'author'): 0, ('author', 'to', 'paper'): 0, } node_types, edge_types = data.metadata() assert node_types == ['paper', 'author'] assert edge_types == [ ('paper', 'to', 'paper'), ('paper', 'to', 'author'), ('author', 'to', 'paper'), ] x_dict = data.collect('x') assert len(x_dict) == 2 assert x_dict['paper'].tolist() == x_paper.tolist() assert x_dict['author'].tolist() == x_author.tolist() assert x_dict == data.x_dict data.y = 0 assert data['y'] == 0 and data.y == 0 assert len(data) == 4 assert sorted(data.keys) == ['edge_attr', 'edge_index', 'x', 'y'] del data['paper', 'author'] node_types, edge_types = data.metadata() assert node_types == ['paper', 'author'] assert edge_types == [('paper', 'to', 'paper'), ('author', 'to', 'paper')] assert len(data.to_dict()) == 5 assert len(data.to_namedtuple()) == 5 assert data.to_namedtuple().y == 0 assert len(data.to_namedtuple().paper) == 1
def test_hetero_data_subgraph(): data = HeteroData() data.num_node_types = 3 data['paper'].x = x_paper data['paper'].name = 'paper' data['paper'].num_nodes = x_paper.size(0) data['author'].x = x_author data['author'].num_nodes = x_author.size(0) data['conference'].x = x_conference data['conference'].num_nodes = x_conference.size(0) data['paper', 'paper'].edge_index = edge_index_paper_paper data['paper', 'paper'].edge_attr = edge_attr_paper_paper data['paper', 'paper'].name = 'cites' data['author', 'paper'].edge_index = edge_index_author_paper data['paper', 'author'].edge_index = edge_index_paper_author data['paper', 'conference'].edge_index = edge_index_paper_conference subset = { 'paper': torch.randperm(x_paper.size(0))[:4], 'author': torch.randperm(x_author.size(0))[:2] } out = data.subgraph(subset) assert out.num_node_types == data.num_node_types assert out.node_types == ['paper', 'author'] assert len(out['paper']) == 3 assert torch.allclose(out['paper'].x, data['paper'].x[subset['paper']]) assert out['paper'].name == 'paper' assert out['paper'].num_nodes == 4 assert len(out['author']) == 2 assert torch.allclose(out['author'].x, data['author'].x[subset['author']]) assert out['author'].num_nodes == 2 assert out.edge_types == [ ('paper', 'to', 'paper'), ('author', 'to', 'paper'), ('paper', 'to', 'author'), ] assert len(out['paper', 'paper']) == 3 assert out['paper', 'paper'].edge_index is not None assert out['paper', 'paper'].edge_attr is not None assert out['paper', 'paper'].name == 'cites' assert len(out['paper', 'author']) == 1 assert out['paper', 'author'].edge_index is not None assert len(out['author', 'paper']) == 1 assert out['author', 'paper'].edge_index is not None
def test_heterogeneous_link_neighbor_loader_loop(directed): torch.manual_seed(12345) data = HeteroData() data['paper'].x = torch.arange(100) data['author'].x = torch.arange(100, 300) data['paper', 'paper'].edge_index = get_edge_index(100, 100, 500) data['paper', 'author'].edge_index = get_edge_index(100, 200, 1000) data['author', 'paper'].edge_index = get_edge_index(200, 100, 1000) loader = LinkNeighborLoader(data, num_neighbors=[-1] * 2, edge_label_index=('paper', 'paper'), batch_size=20, directed=directed) for batch in loader: assert batch['paper'].x.size(0) <= 100 assert batch['paper'].x.min() >= 0 and batch['paper'].x.max() < 100 # Assert positive samples are present in the original graph: edge_index = unique_edge_pairs(batch['paper', 'paper'].edge_index) edge_label_index = batch['paper', 'paper'].edge_label_index edge_label_index = unique_edge_pairs(edge_label_index) assert len(edge_index | edge_label_index) == len(edge_index)
def test_hetero_conv(aggr): data = HeteroData() data['paper'].x = torch.randn(50, 32) data['author'].x = torch.randn(30, 64) data['paper', 'paper'].edge_index = get_edge_index(50, 50, 200) data['paper', 'author'].edge_index = get_edge_index(50, 30, 100) data['author', 'paper'].edge_index = get_edge_index(30, 50, 100) data['paper', 'paper'].edge_weight = torch.rand(200) conv = HeteroConv( { ('paper', 'to', 'paper'): GCNConv(-1, 64), ('author', 'to', 'paper'): SAGEConv((-1, -1), 64), ('paper', 'to', 'author'): GATConv((-1, -1), 64), }, aggr=aggr) assert len(list(conv.parameters())) > 0 assert str(conv) == 'HeteroConv(num_relations=3)' out = conv(data.x_dict, data.edge_index_dict, edge_weight_dict=data.edge_weight_dict) assert len(out) == 2 if aggr is not None: assert out['paper'].size() == (50, 64) assert out['author'].size() == (30, 64) else: assert out['paper'].size() == (50, 2, 64) assert out['author'].size() == (30, 1, 64)
def test_copy_hetero_data(): data = HeteroData() data['paper'].x = x_paper data['paper', 'to', 'paper'].edge_index = edge_index_paper_paper out = copy.copy(data) assert id(data) != id(out) assert len(data.stores) == len(out.stores) for store1, store2 in zip(data.stores, out.stores): assert id(store1) != id(store2) assert id(data) == id(store1._parent()) assert id(out) == id(store2._parent()) assert out['paper']._key == 'paper' assert data['paper'].x.data_ptr() == out['paper'].x.data_ptr() assert out['to']._key == ('paper', 'to', 'paper') assert data['to'].edge_index.data_ptr() == out['to'].edge_index.data_ptr() out = copy.deepcopy(data) assert id(data) != id(out) assert len(data.stores) == len(out.stores) for store1, store2 in zip(data.stores, out.stores): assert id(store1) != id(store2) assert id(out) == id(out['paper']._parent()) assert out['paper']._key == 'paper' assert data['paper'].x.data_ptr() != out['paper'].x.data_ptr() assert data['paper'].x.tolist() == out['paper'].x.tolist() assert id(out) == id(out['to']._parent()) assert out['to']._key == ('paper', 'to', 'paper') assert data['to'].edge_index.data_ptr() != out['to'].edge_index.data_ptr() assert data['to'].edge_index.tolist() == out['to'].edge_index.tolist()
def test_remove_isolated_nodes_in_hetero_data(): data = HeteroData() data['p'].x = torch.arange(6) data['a'].x = torch.arange(6) data['i'].num_nodes = 4 # isolated paper nodes: {4} # isolated author nodes: {3, 4, 5} # isolated institution nodes: {0, 1, 2, 3} data['p', '1', 'p'].edge_index = torch.tensor([[0, 1, 2], [0, 1, 3]]) data['p', '2', 'a'].edge_index = torch.tensor([[1, 3, 5], [0, 1, 2]]) data['p', '2', 'a'].edge_attr = torch.arange(3) data['p', '3', 'a'].edge_index = torch.tensor([[5], [2]]) data = RemoveIsolatedNodes()(data) assert len(data) == 4 assert data['p'].num_nodes == 5 assert data['a'].num_nodes == 3 assert data['i'].num_nodes == 0 assert data['p'].x.tolist() == [0, 1, 2, 3, 5] assert data['a'].x.tolist() == [0, 1, 2] assert data['1'].edge_index.tolist() == [[0, 1, 2], [0, 1, 3]] assert data['2'].edge_index.tolist() == [[1, 3, 4], [0, 1, 2]] assert data['2'].edge_attr.tolist() == [0, 1, 2] assert data['3'].edge_index.tolist() == [[4], [2]]
def __getitem__(self, time_index: Union[int, slice]): if isinstance(time_index, slice): snapshot = StaticHeteroGraphTemporalSignal( self.edge_index_dict, self.edge_weight_dict, self.feature_dicts[time_index], self.target_dicts[time_index], **{ key: getattr(self, key)[time_index] for key in self.additional_feature_keys }) else: x_dict = self._get_features(time_index) edge_index_dict = self._get_edge_index() edge_weight_dict = self._get_edge_weight() y_dict = self._get_target(time_index) additional_features = self._get_additional_features(time_index) snapshot = HeteroData() if x_dict: for key, value in x_dict.items(): snapshot[key].x = value if edge_index_dict: for key, value in edge_index_dict.items(): snapshot[key].edge_index = value if edge_weight_dict: for key, value in edge_weight_dict.items(): snapshot[key].edge_attr = value if y_dict: for key, value in y_dict.items(): snapshot[key].y = value if additional_features: for feature_name, feature_dict in additional_features.items(): if feature_dict: for key, value in feature_dict.items(): snapshot[key][feature_name] = value return snapshot
def test_hetero_conv_with_dot_syntax_node_types(): data = HeteroData() data['src.paper'].x = torch.randn(50, 32) data['author'].x = torch.randn(30, 64) data['src.paper', 'src.paper'].edge_index = get_edge_index(50, 50, 200) data['src.paper', 'author'].edge_index = get_edge_index(50, 30, 100) data['author', 'src.paper'].edge_index = get_edge_index(30, 50, 100) data['src.paper', 'src.paper'].edge_weight = torch.rand(200) conv = HeteroConv({ ('src.paper', 'to', 'src.paper'): GCNConv(-1, 64), ('author', 'to', 'src.paper'): SAGEConv((-1, -1), 64), ('src.paper', 'to', 'author'): GATConv((-1, -1), 64, add_self_loops=False), }) assert len(list(conv.parameters())) > 0 assert str(conv) == 'HeteroConv(num_relations=3)' out = conv(data.x_dict, data.edge_index_dict, edge_weight_dict=data.edge_weight_dict) assert len(out) == 2 assert out['src.paper'].size() == (50, 64) assert out['author'].size() == (30, 64)
def test_init_hetero_data(): data = HeteroData() data['v1'].x = 1 data['paper'].x = x_paper data['author'].x = x_author data['paper', 'paper'].edge_index = edge_index_paper_paper data['paper', 'author'].edge_index = edge_index_paper_author data['author', 'paper'].edge_index = edge_index_author_paper assert len(data) == 2 assert len(data.edge_types) == 3 assert data.node_types == ['v1', 'paper', 'author'] data = HeteroData( v1={'x': 1}, paper={'x': x_paper}, author={'x': x_author}, paper__paper={'edge_index': edge_index_paper_paper}, paper__author={'edge_index': edge_index_paper_author}, author__paper={'edge_index': edge_index_author_paper}, ) assert len(data) == 2 assert len(data.edge_types) == 3 assert data.node_types == ['v1', 'paper', 'author'] data = HeteroData({ 'v1': { 'x': 1 }, 'paper': { 'x': x_paper }, 'author': { 'x': x_author }, ('paper', 'paper'): { 'edge_index': edge_index_paper_paper }, ('paper', 'author'): { 'edge_index': edge_index_paper_author }, ('author', 'paper'): { 'edge_index': edge_index_author_paper }, }) assert len(data) == 2 assert len(data.edge_types) == 3 assert data.node_types == ['v1', 'paper', 'author']
def generate_graph(self): data = HeteroData() data = self.define_graph_nodes_and_labels(data) data = self.define_graph_edges(data) torch.save( data, ''.join( (self.seed_data_path, '_', self.file_type, '_data.pt'))) return
def process(self): import pandas as pd data = HeteroData() path = osp.join(self.raw_dir, 'node-feat', 'paper', 'node-feat.csv.gz') x_paper = pd.read_csv(path, compression='gzip', header=None, dtype=np.float32).values data['paper'].x = torch.from_numpy(x_paper) path = osp.join(self.raw_dir, 'node-feat', 'paper', 'node_year.csv.gz') year_paper = pd.read_csv(path, compression='gzip', header=None, dtype=np.int64).values data['paper'].year = torch.from_numpy(year_paper).view(-1) path = osp.join(self.raw_dir, 'node-label', 'paper', 'node-label.csv.gz') y_paper = pd.read_csv(path, compression='gzip', header=None, dtype=np.int64).values.flatten() data['paper'].y = torch.from_numpy(y_paper) if self.preprocess is None: path = osp.join(self.raw_dir, 'num-node-dict.csv.gz') num_nodes_df = pd.read_csv(path, compression='gzip') for node_type in ['author', 'institution', 'field_of_study']: data[node_type].num_nodes = num_nodes_df[node_type].tolist()[0] else: emb_dict = torch.load(self.raw_paths[-1]) for key, value in emb_dict.items(): if key != 'paper': data[key].x = value for edge_type in [('author', 'affiliated_with', 'institution'), ('author', 'writes', 'paper'), ('paper', 'cites', 'paper'), ('paper', 'has_topic', 'field_of_study')]: f = '___'.join(edge_type) path = osp.join(self.raw_dir, 'relations', f, 'edge.csv.gz') edge_index = pd.read_csv(path, compression='gzip', header=None, dtype=np.int64).values edge_index = torch.from_numpy(edge_index).t().contiguous() data[edge_type].edge_index = edge_index for f, v in [('train', 'train'), ('valid', 'val'), ('test', 'test')]: path = osp.join(self.raw_dir, 'split', 'time', 'paper', f'{f}.csv.gz') idx = pd.read_csv(path, compression='gzip', header=None, dtype=np.int64).values.flatten() idx = torch.from_numpy(idx) mask = torch.zeros(data['paper'].num_nodes, dtype=torch.bool) mask[idx] = True data['paper'][f'{v}_mask'] = mask if self.pre_transform is not None: data = self.pre_transform(data) torch.save(self.collate([data]), self.processed_paths[0])
def test_hetero_normalize_scale(): x = torch.tensor([[1, 0, 1], [0, 1, 0], [0, 0, 0]], dtype=torch.float) data = HeteroData() data['v'].x = x data['w'].x = x data = NormalizeFeatures()(data) assert data['v'].x.tolist() == [[0.5, 0, 0.5], [0, 1, 0], [0, 0, 0]] assert data['w'].x.tolist() == [[0.5, 0, 0.5], [0, 1, 0], [0, 0, 0]]
def test_hetero_data_rename(): data = HeteroData() data['paper'].x = x_paper data['author'].x = x_author data['paper', 'paper'].edge_index = edge_index_paper_paper data['paper', 'author'].edge_index = edge_index_paper_author data['author', 'paper'].edge_index = edge_index_author_paper data = data.rename('paper', 'article') assert data.node_types == ['author', 'article'] assert data.edge_types == [ ('article', 'to', 'article'), ('article', 'to', 'author'), ('author', 'to', 'article'), ] assert data['article'].x.tolist() == x_paper.tolist() edge_index = data['article', 'article'].edge_index assert edge_index.tolist() == edge_index_paper_paper.tolist()
def test_add_metapaths(): dblp = HeteroData() dblp['paper'].x = torch.ones(5) dblp['author'].x = torch.ones(6) dblp['conference'].x = torch.ones(3) dblp['paper', 'cites', 'paper'].edge_index = torch.tensor([[0, 1, 2, 3], [1, 2, 4, 2]]) dblp['paper', 'author'].edge_index = torch.tensor([[0, 1, 2, 3, 4], [2, 2, 5, 2, 5]]) dblp['author', 'paper'].edge_index = dblp['paper', 'author'].edge_index[[1, 0]] dblp['conference', 'paper'].edge_index = torch.tensor([[0, 0, 1, 2, 2], [0, 1, 2, 3, 4]]) dblp['paper', 'conference'].edge_index = dblp['conference', 'paper'].edge_index[[1, 0]] # Test transform options: orig_edge_type = dblp.edge_types metapaths = [[('paper', 'conference'), ('conference', 'paper')]] meta1 = AddMetaPaths(metapaths)(dblp.clone()) meta2 = AddMetaPaths(metapaths, drop_orig_edges=True)(dblp.clone()) meta3 = AddMetaPaths(metapaths, drop_orig_edges=True, keep_same_node_type=True)(dblp.clone()) meta4 = AddMetaPaths(metapaths, drop_orig_edges=True, keep_same_node_type=True, drop_unconnected_nodes=True)(dblp.clone()) assert meta1['paper', 'metapath_0', 'paper'].edge_index.shape[-1] == 9 assert meta2['paper', 'metapath_0', 'paper'].edge_index.shape[-1] == 9 assert meta3['paper', 'metapath_0', 'paper'].edge_index.shape[-1] == 9 assert meta4['paper', 'metapath_0', 'paper'].edge_index.shape[-1] == 9 assert all([i in meta1.edge_types for i in orig_edge_type]) assert meta2.edge_types == [('paper', 'metapath_0', 'paper')] assert meta3.edge_types == [('paper', 'cites', 'paper'), ('paper', 'metapath_0', 'paper')] assert meta4.edge_types == [('paper', 'cites', 'paper'), ('paper', 'metapath_0', 'paper')] assert meta3.node_types == ['paper', 'author', 'conference'] assert meta4.node_types == ['paper'] # Test 4-hop metapath: metapaths = [[('author', 'paper'), ('paper', 'conference')], [('author', 'paper'), ('paper', 'conference'), ('conference', 'paper'), ('paper', 'author')]] meta1 = AddMetaPaths(metapaths)(dblp.clone()) new_edge_types = [('author', 'metapath_0', 'conference'), ('author', 'metapath_1', 'author')] assert meta1[new_edge_types[0]].edge_index.shape[-1] == 4 assert meta1[new_edge_types[1]].edge_index.shape[-1] == 4 # Test `metapath_dict` information: assert list(meta1.metapath_dict.values()) == metapaths assert list(meta1.metapath_dict.keys()) == new_edge_types
def generate_data(self) -> HeteroData: data = HeteroData() iterator = zip(self.node_types, self.num_channels) for i, (node_type, num_channels) in enumerate(iterator): num_nodes = get_num_nodes(self.avg_num_nodes, self.avg_degree) store = data[node_type] if num_channels > 0: store.x = torch.randn(num_nodes, num_channels) else: store.num_nodes = num_nodes if self._num_classes > 0 and self.task == 'node' and i == 0: store.y = torch.randint(self._num_classes, (num_nodes, )) for (src, rel, dst) in self.edge_types: store = data[(src, rel, dst)] store.edge_index = get_edge_index( data[src].num_nodes, data[dst].num_nodes, self.avg_degree, is_undirected=False, remove_loops=False, ) if self.edge_dim > 1: store.edge_attr = torch.rand(store.num_edges, self.edge_dim) elif self.edge_dim == 1: store.edge_weight = torch.rand(store.num_edges) pass if self._num_classes > 0 and self.task == 'graph': data.y = torch.tensor([random.randint(0, self._num_classes - 1)]) for feature_name, feature_shape in self.kwargs.items(): setattr(data, feature_name, torch.randn(feature_shape)) return data
def test_hgt_conv_out_of_place(): data = HeteroData() data['author'].x = torch.randn(4, 16) data['paper'].x = torch.randn(6, 32) index1 = torch.randint(0, 4, (20, ), dtype=torch.long) index2 = torch.randint(0, 6, (20, ), dtype=torch.long) data['author', 'paper'].edge_index = torch.stack([index1, index2], dim=0) data['paper', 'author'].edge_index = torch.stack([index2, index1], dim=0) conv = HGTConv(-1, 64, data.metadata(), heads=1) x_dict, edge_index_dict = data.x_dict, data.edge_index_dict assert x_dict['author'].size() == (4, 16) assert x_dict['paper'].size() == (6, 32) _ = conv(x_dict, edge_index_dict) assert x_dict['author'].size() == (4, 16) assert x_dict['paper'].size() == (6, 32)
def test_hetero_add_self_loops(): edge_index = torch.tensor([[0, 1, 1, 2], [1, 0, 2, 1]]) data = HeteroData() data['v'].num_nodes = 3 data['w'].num_nodes = 3 data['v', 'v'].edge_index = edge_index data['v', 'w'].edge_index = edge_index data = AddSelfLoops()(data) assert data['v', 'v'].edge_index.tolist() == [[0, 1, 1, 2, 0, 1, 2], [1, 0, 2, 1, 0, 1, 2]] assert data['v', 'w'].edge_index.tolist() == edge_index.tolist()
def test_hetero_in_memory_dataset(): data1 = HeteroData() data1.y = torch.randn(5) data1['paper'].x = torch.randn(10, 16) data1['paper', 'paper'].edge_index = torch.randint(0, 10, (2, 30)).long() data2 = HeteroData() data2.y = torch.randn(5) data2['paper'].x = torch.randn(10, 16) data2['paper', 'paper'].edge_index = torch.randint(0, 10, (2, 30)).long() dataset = MyTestDataset([data1, data2]) assert str(dataset) == 'MyTestDataset(2)' assert len(dataset) == 2 assert len(dataset[0]) == 3 assert dataset[0].y.tolist() == data1.y.tolist() assert dataset[0]['paper'].x.tolist() == data1['paper'].x.tolist() assert (dataset[0]['paper', 'paper'].edge_index.tolist() == data1[ 'paper', 'paper'].edge_index.tolist()) assert len(dataset[1]) == 3 assert dataset[1].y.tolist() == data2.y.tolist() assert dataset[1]['paper'].x.tolist() == data2['paper'].x.tolist() assert (dataset[1]['paper', 'paper'].edge_index.tolist() == data2[ 'paper', 'paper'].edge_index.tolist())
def test_random_link_split_on_undirected_hetero_data(): data = HeteroData() data['p'].x = torch.arange(100) data['p', 'p'].edge_index = get_edge_index(100, 100, 500) data['p', 'p'].edge_index = to_undirected(data['p', 'p'].edge_index) transform = RandomLinkSplit(is_undirected=True, edge_types=('p', 'p')) train_data, val_data, test_data = transform(data) assert train_data['p', 'p'].is_undirected() transform = RandomLinkSplit(is_undirected=True, edge_types=('p', 'p'), rev_edge_types=('p', 'p')) train_data, val_data, test_data = transform(data) assert train_data['p', 'p'].is_undirected()
def create_hetero_mock_data(n_count, feature_dict): _x_dict = { 'author': torch.FloatTensor( np.random.uniform(0, 1, (n_count, feature_dict['author']))), 'paper': torch.FloatTensor( np.random.uniform(0, 1, (n_count, feature_dict['paper']))) } _edge_index_dict = { ('author', 'writes', 'paper'): torch.LongTensor(get_edge_array(n_count)) } data = HeteroData() data['author'].x = _x_dict['author'] data['paper'].x = _x_dict['paper'] data[('author', 'writes', 'paper')].edge_index = _edge_index_dict[('author', 'writes', 'paper')] data = T.ToUndirected()(data) return data.x_dict, data.edge_index_dict, data.metadata()
def test_init_hetero_data(): data = HeteroData() data['paper'].x = x_paper data['author'].x = x_author data['paper', 'paper'].edge_index = edge_index_paper_paper data['paper', 'author'].edge_index = edge_index_paper_author data['author', 'paper'].edge_index = edge_index_author_paper assert len(data) == 2 data = HeteroData( paper={'x': x_paper}, author={'x': x_author}, paper__paper={'edge_index': edge_index_paper_paper}, paper__author={'edge_index': edge_index_paper_author}, author__paper={'edge_index': edge_index_author_paper}, ) assert len(data) == 2 data = HeteroData({ 'paper': { 'x': x_paper }, 'author': { 'x': x_author }, ('paper', 'paper'): { 'edge_index': edge_index_paper_paper }, ('paper', 'author'): { 'edge_index': edge_index_paper_author }, ('author', 'paper'): { 'edge_index': edge_index_author_paper }, }) assert len(data) == 2
def test_hetero_to_sparse_tensor(): edge_index = torch.tensor([[0, 1, 1, 2], [1, 0, 2, 1]]) data = HeteroData() data['v'].num_nodes = 3 data['w'].num_nodes = 3 data['v', 'v'].edge_index = edge_index data['v', 'w'].edge_index = edge_index data = ToSparseTensor()(data) assert data['v', 'v'].adj_t.storage.row().tolist() == [0, 1, 1, 2] assert data['v', 'v'].adj_t.storage.col().tolist() == [1, 0, 2, 1] assert data['v', 'v'].adj_t.storage.value() is None assert data['v', 'w'].adj_t.storage.row().tolist() == [0, 1, 1, 2] assert data['v', 'w'].adj_t.storage.col().tolist() == [1, 0, 2, 1] assert data['v', 'w'].adj_t.storage.value() is None
def test_hetero_conv_with_custom_conv(): data = HeteroData() data['paper'].x = torch.randn(50, 32) data['paper'].pos = torch.randn(50, 3) data['author'].x = torch.randn(30, 64) data['author'].pos = torch.randn(30, 3) data['paper', 'paper'].edge_index = get_edge_index(50, 50, 200) data['paper', 'author'].edge_index = get_edge_index(50, 30, 100) data['author', 'paper'].edge_index = get_edge_index(30, 50, 100) conv = HeteroConv({key: CustomConv(64) for key in data.edge_types}) out = conv(data.x_dict, data.edge_index_dict, data.pos_dict) assert len(out) == 2 assert out['paper'].size() == (50, 64) assert out['author'].size() == (30, 64)