def setUpClass(self): embedding_angles = [0, 10, 20, 30, 50, 60, 70, 80] embeddings1 = torch.tensor([c_f.angle_to_coord(a) for a in embedding_angles]) labels1 = torch.LongTensor([0, 0, 0, 0, 1, 1, 1, 1]) embedding_angles = [1, 11, 21, 31, 51, 59, 71, 81] embeddings2 = torch.tensor([c_f.angle_to_coord(a) for a in embedding_angles]) labels2 = torch.LongTensor([1, 1, 1, 1, 1, 0, 0, 0]) self.dataset_dict = { "train": c_f.EmbeddingDataset(embeddings1, labels1), "val": c_f.EmbeddingDataset(embeddings2, labels2), }
def test_fixed_set_of_triplets_with_batch_size(self): miner = EmbeddingsAlreadyPackagedAsTriplets() for batch_size in [3, 33, 99]: batch_of_fake_embeddings = torch.randn(batch_size, 2) for num_labels in [2, 10, 55]: for num_triplets in [100, 999, 10000]: fake_embeddings = torch.randn(10000, 2) labels = torch.randint(low=0, high=num_labels, size=(10000, )) dataset = c_f.EmbeddingDataset(fake_embeddings, labels) sampler = FixedSetOfTriplets(labels, num_triplets) iterator = iter(sampler) for _ in range(1000): x = [] for _ in range(batch_size): iterator, curr_batch = c_f.try_next_on_generator( iterator, sampler) x.append(curr_batch) curr_labels = labels[x] a, p, n = miner(batch_of_fake_embeddings, curr_labels) self.assertTrue(len(a) == batch_size // 3) dataloader = torch.utils.data.DataLoader( dataset, batch_size=batch_size, sampler=sampler, drop_last=True) for _ in range(2): for (embeddings, curr_labels) in dataloader: a, p, n = miner(batch_of_fake_embeddings, curr_labels) self.assertTrue(len(a) == batch_size // 3)
def setUpClass(self): embedding_angles = [0, 9, 21, 29, 31, 39, 51, 59] embeddings1 = torch.tensor( [c_f.angle_to_coord(a) for a in embedding_angles]) parent_labels1 = torch.LongTensor([0, 0, 0, 0, 1, 1, 1, 1]) child_labels1 = torch.LongTensor([2, 2, 3, 3, 4, 4, 5, 5]) labels1 = torch.stack([child_labels1, parent_labels1], axis=1) embedding_angles = [2, 11, 23, 32, 33, 41, 53, 89, 90] embeddings2 = torch.tensor( [c_f.angle_to_coord(a) for a in embedding_angles]) parent_labels2 = torch.LongTensor([0, 0, 0, 0, 0, 1, 1, 1, 1]) child_labels2 = torch.LongTensor([2, 2, 4, 3, 4, 4, 4, 5, 5]) labels2 = torch.stack([child_labels2, parent_labels2], axis=1) self.dataset_dict = { "train": c_f.EmbeddingDataset(embeddings1, labels1), "val": c_f.EmbeddingDataset(embeddings2, labels2), }
def test_mperclass_sampler_with_batch_size(self): for batch_size in [4, 50, 99, 100, 1024]: for m in [1, 5, 10, 17, 50]: for num_labels in [2, 10, 55]: for length_before_new_iter in [100, 999, 10000]: fake_embeddings = torch.randn(10000, 2) labels = torch.randint(low=0, high=num_labels, size=(10000, )) dataset = c_f.EmbeddingDataset(fake_embeddings, labels) args = [labels, m, batch_size, length_before_new_iter] if ((length_before_new_iter < batch_size) or (m * num_labels < batch_size) or (batch_size % m != 0)): self.assertRaises(AssertionError, MPerClassSampler, *args) continue else: sampler = MPerClassSampler(*args) iterator = iter(sampler) for _ in range(1000): x = [] for _ in range(batch_size): iterator, curr_batch = c_f.try_next_on_generator( iterator, sampler) x.append(curr_batch) curr_labels = labels[x] unique_labels, counts = torch.unique( curr_labels, return_counts=True) self.assertTrue( len(unique_labels) == batch_size // m) self.assertTrue(torch.all(counts == m)) dataloader = torch.utils.data.DataLoader( dataset, batch_size=batch_size, sampler=sampler, drop_last=False, ) for _ in range(2): for (_, curr_labels) in dataloader: unique_labels, counts = torch.unique( curr_labels, return_counts=True) self.assertTrue( len(unique_labels) == batch_size // m) self.assertTrue(torch.all(counts == m))
def test_pca(self): # just make sure pca runs without crashing model = c_f.Identity() AC = accuracy_calculator.AccuracyCalculator(include=("precision_at_1",)) embeddings = torch.randn(1024, 512) labels = torch.randint(0, 10, size=(1024,)) dataset_dict = {"train": c_f.EmbeddingDataset(embeddings, labels)} pca_size = 16 def end_of_testing_hook(tester): self.assertTrue( tester.embeddings_and_labels["train"][0].shape[1] == pca_size ) tester = GlobalEmbeddingSpaceTester( pca=pca_size, accuracy_calculator=AC, end_of_testing_hook=end_of_testing_hook, ) all_accuracies = tester.test(dataset_dict, 0, model) self.assertTrue(not hasattr(tester, "embeddings_and_labels"))