def setUpClass(self):
        embedding_angles = [0, 10, 20, 30, 50, 60, 70, 80]
        embeddings1 = torch.tensor([c_f.angle_to_coord(a) for a in embedding_angles])
        labels1 = torch.LongTensor([0, 0, 0, 0, 1, 1, 1, 1])

        embedding_angles = [1, 11, 21, 31, 51, 59, 71, 81]
        embeddings2 = torch.tensor([c_f.angle_to_coord(a) for a in embedding_angles])
        labels2 = torch.LongTensor([1, 1, 1, 1, 1, 0, 0, 0])

        self.dataset_dict = {
            "train": c_f.EmbeddingDataset(embeddings1, labels1),
            "val": c_f.EmbeddingDataset(embeddings2, labels2),
        }
Exemple #2
0
    def test_fixed_set_of_triplets_with_batch_size(self):
        miner = EmbeddingsAlreadyPackagedAsTriplets()
        for batch_size in [3, 33, 99]:
            batch_of_fake_embeddings = torch.randn(batch_size, 2)
            for num_labels in [2, 10, 55]:
                for num_triplets in [100, 999, 10000]:
                    fake_embeddings = torch.randn(10000, 2)
                    labels = torch.randint(low=0,
                                           high=num_labels,
                                           size=(10000, ))
                    dataset = c_f.EmbeddingDataset(fake_embeddings, labels)
                    sampler = FixedSetOfTriplets(labels, num_triplets)
                    iterator = iter(sampler)
                    for _ in range(1000):
                        x = []
                        for _ in range(batch_size):
                            iterator, curr_batch = c_f.try_next_on_generator(
                                iterator, sampler)
                            x.append(curr_batch)
                        curr_labels = labels[x]
                        a, p, n = miner(batch_of_fake_embeddings, curr_labels)
                        self.assertTrue(len(a) == batch_size // 3)

                    dataloader = torch.utils.data.DataLoader(
                        dataset,
                        batch_size=batch_size,
                        sampler=sampler,
                        drop_last=True)
                    for _ in range(2):
                        for (embeddings, curr_labels) in dataloader:
                            a, p, n = miner(batch_of_fake_embeddings,
                                            curr_labels)
                            self.assertTrue(len(a) == batch_size // 3)
    def setUpClass(self):
        embedding_angles = [0, 9, 21, 29, 31, 39, 51, 59]
        embeddings1 = torch.tensor(
            [c_f.angle_to_coord(a) for a in embedding_angles])
        parent_labels1 = torch.LongTensor([0, 0, 0, 0, 1, 1, 1, 1])
        child_labels1 = torch.LongTensor([2, 2, 3, 3, 4, 4, 5, 5])
        labels1 = torch.stack([child_labels1, parent_labels1], axis=1)

        embedding_angles = [2, 11, 23, 32, 33, 41, 53, 89, 90]
        embeddings2 = torch.tensor(
            [c_f.angle_to_coord(a) for a in embedding_angles])
        parent_labels2 = torch.LongTensor([0, 0, 0, 0, 0, 1, 1, 1, 1])
        child_labels2 = torch.LongTensor([2, 2, 4, 3, 4, 4, 4, 5, 5])
        labels2 = torch.stack([child_labels2, parent_labels2], axis=1)

        self.dataset_dict = {
            "train": c_f.EmbeddingDataset(embeddings1, labels1),
            "val": c_f.EmbeddingDataset(embeddings2, labels2),
        }
Exemple #4
0
    def test_mperclass_sampler_with_batch_size(self):
        for batch_size in [4, 50, 99, 100, 1024]:
            for m in [1, 5, 10, 17, 50]:
                for num_labels in [2, 10, 55]:
                    for length_before_new_iter in [100, 999, 10000]:
                        fake_embeddings = torch.randn(10000, 2)
                        labels = torch.randint(low=0,
                                               high=num_labels,
                                               size=(10000, ))
                        dataset = c_f.EmbeddingDataset(fake_embeddings, labels)
                        args = [labels, m, batch_size, length_before_new_iter]
                        if ((length_before_new_iter < batch_size)
                                or (m * num_labels < batch_size)
                                or (batch_size % m != 0)):
                            self.assertRaises(AssertionError, MPerClassSampler,
                                              *args)
                            continue
                        else:
                            sampler = MPerClassSampler(*args)
                        iterator = iter(sampler)
                        for _ in range(1000):
                            x = []
                            for _ in range(batch_size):
                                iterator, curr_batch = c_f.try_next_on_generator(
                                    iterator, sampler)
                                x.append(curr_batch)
                            curr_labels = labels[x]
                            unique_labels, counts = torch.unique(
                                curr_labels, return_counts=True)
                            self.assertTrue(
                                len(unique_labels) == batch_size // m)
                            self.assertTrue(torch.all(counts == m))

                        dataloader = torch.utils.data.DataLoader(
                            dataset,
                            batch_size=batch_size,
                            sampler=sampler,
                            drop_last=False,
                        )
                        for _ in range(2):
                            for (_, curr_labels) in dataloader:
                                unique_labels, counts = torch.unique(
                                    curr_labels, return_counts=True)
                                self.assertTrue(
                                    len(unique_labels) == batch_size // m)
                                self.assertTrue(torch.all(counts == m))
    def test_pca(self):
        # just make sure pca runs without crashing
        model = c_f.Identity()
        AC = accuracy_calculator.AccuracyCalculator(include=("precision_at_1",))
        embeddings = torch.randn(1024, 512)
        labels = torch.randint(0, 10, size=(1024,))
        dataset_dict = {"train": c_f.EmbeddingDataset(embeddings, labels)}
        pca_size = 16

        def end_of_testing_hook(tester):
            self.assertTrue(
                tester.embeddings_and_labels["train"][0].shape[1] == pca_size
            )

        tester = GlobalEmbeddingSpaceTester(
            pca=pca_size,
            accuracy_calculator=AC,
            end_of_testing_hook=end_of_testing_hook,
        )
        all_accuracies = tester.test(dataset_dict, 0, model)
        self.assertTrue(not hasattr(tester, "embeddings_and_labels"))