def test_gnn_benchmark(): # AmazonCoBuyComputerDataset g = data.AmazonCoBuyComputerDataset()[0] assert g.num_nodes() == 13752 assert g.num_edges() == 491722 dst = F.asnumpy(g.edges()[1]) assert np.array_equal(dst, np.sort(dst)) # AmazonCoBuyPhotoDataset g = data.AmazonCoBuyPhotoDataset()[0] assert g.num_nodes() == 7650 assert g.num_edges() == 238163 dst = F.asnumpy(g.edges()[1]) assert np.array_equal(dst, np.sort(dst)) # CoauthorPhysicsDataset g = data.CoauthorPhysicsDataset()[0] assert g.num_nodes() == 34493 assert g.num_edges() == 495924 dst = F.asnumpy(g.edges()[1]) assert np.array_equal(dst, np.sort(dst)) # CoauthorCSDataset g = data.CoauthorCSDataset()[0] assert g.num_nodes() == 18333 assert g.num_edges() == 163788 dst = F.asnumpy(g.edges()[1]) assert np.array_equal(dst, np.sort(dst)) # CoraFullDataset g = data.CoraFullDataset()[0] assert g.num_nodes() == 19793 assert g.num_edges() == 126842 dst = F.asnumpy(g.edges()[1]) assert np.array_equal(dst, np.sort(dst))
def test_add_nodepred_split(): dataset = data.AmazonCoBuyComputerDataset() print('train_mask' in dataset[0].ndata) data.utils.add_nodepred_split(dataset, [0.8, 0.1, 0.1]) assert 'train_mask' in dataset[0].ndata dataset = data.AIFBDataset() print('train_mask' in dataset[0].nodes['Publikationen'].data) data.utils.add_nodepred_split(dataset, [0.8, 0.1, 0.1], ntype='Publikationen') assert 'train_mask' in dataset[0].nodes['Publikationen'].data
def test_as_nodepred2(): # test proper reprocessing # create ds = data.AsNodePredDataset(data.AmazonCoBuyComputerDataset(), [0.8, 0.1, 0.1]) assert F.sum(F.astype(ds[0].ndata['train_mask'], F.int32), 0) == int(ds[0].num_nodes() * 0.8) # read from cache ds = data.AsNodePredDataset(data.AmazonCoBuyComputerDataset(), [0.8, 0.1, 0.1]) assert F.sum(F.astype(ds[0].ndata['train_mask'], F.int32), 0) == int(ds[0].num_nodes() * 0.8) # invalid cache, re-read ds = data.AsNodePredDataset(data.AmazonCoBuyComputerDataset(), [0.1, 0.1, 0.8]) assert F.sum(F.astype(ds[0].ndata['train_mask'], F.int32), 0) == int(ds[0].num_nodes() * 0.1) # create ds = data.AsNodePredDataset(data.AIFBDataset(), [0.8, 0.1, 0.1], 'Personen', verbose=True) assert F.sum(F.astype(ds[0].nodes['Personen'].data['train_mask'], F.int32), 0) == int(ds[0].num_nodes('Personen') * 0.8) # read from cache ds = data.AsNodePredDataset(data.AIFBDataset(), [0.8, 0.1, 0.1], 'Personen', verbose=True) assert F.sum(F.astype(ds[0].nodes['Personen'].data['train_mask'], F.int32), 0) == int(ds[0].num_nodes('Personen') * 0.8) # invalid cache, re-read ds = data.AsNodePredDataset(data.AIFBDataset(), [0.1, 0.1, 0.8], 'Personen', verbose=True) assert F.sum(F.astype(ds[0].nodes['Personen'].data['train_mask'], F.int32), 0) == int(ds[0].num_nodes('Personen') * 0.1)
def test_as_nodepred1(): ds = data.AmazonCoBuyComputerDataset() print('train_mask' in ds[0].ndata) new_ds = data.AsNodePredDataset(ds, [0.8, 0.1, 0.1], verbose=True) assert len(new_ds) == 1 assert new_ds[0].num_nodes() == ds[0].num_nodes() assert new_ds[0].num_edges() == ds[0].num_edges() assert 'train_mask' in new_ds[0].ndata ds = data.AIFBDataset() print('train_mask' in ds[0].nodes['Personen'].data) new_ds = data.AsNodePredDataset(ds, [0.8, 0.1, 0.1], 'Personen', verbose=True) assert len(new_ds) == 1 assert new_ds[0].ntypes == ds[0].ntypes assert new_ds[0].canonical_etypes == ds[0].canonical_etypes assert 'train_mask' in new_ds[0].nodes['Personen'].data