Beispiel #1
0
    def _get_cluster_assignment_for_split(self, task: ClassyTask, split: str):
        task.model.eval()
        logging.info("Model set to eval mode during feature extraction...")

        cluster_assignments = {}
        task.data_iterator = iter(self.task.dataloaders[split.lower()])
        while True:
            try:
                sample = next(task.data_iterator)
                assert isinstance(sample, dict)
                assert "data_idx" in sample, "Indices not passed"

                input_sample = {
                    "images": torch.cat(sample["data"]).cuda(non_blocking=True),
                    "indices": torch.cat(sample["data_idx"]).cpu().numpy(),
                }

                with torch.no_grad():
                    features = task.model(input_sample["images"])
                    features = features[0]
                    prototype_score = features[1]
                    prototype_index = prototype_score.argmax(dim=-1)
                    num_images = input_sample["indices"].shape[0]
                    for idx in range(num_images):
                        image_index = input_sample["indices"][idx]
                        cluster_assignments[image_index] = prototype_index[idx].item()
            except StopIteration:
                break
        return cluster_assignments
Beispiel #2
0
    def _get_cluster_assignment_for_split(
        self, task: ClassyTask, split: str, output_folder: str
    ):
        task.model.eval()
        logging.info("Model set to eval mode during feature extraction...")
        dist_rank = torch.distributed.get_rank()

        cluster_assignments = {}
        soft_cluster_assignments = {}
        image_indices = []
        chunk_index, buffer_size = 0, 0

        task.data_iterator = iter(self.task.dataloaders[split.lower()])
        while True:
            try:
                sample = next(task.data_iterator)
                assert isinstance(sample, dict)
                assert "data_idx" in sample, "Indices not passed"
                input_sample = {
                    "images": torch.cat(sample["data"]).cuda(non_blocking=True),
                    "indices": torch.cat(sample["data_idx"]).cpu().numpy(),
                }
                with torch.no_grad():
                    outputs = task.model(input_sample["images"])
                    prototype_score = outputs[0][1]
                    prototype_index = prototype_score.argmax(dim=-1)
                    num_images = input_sample["indices"].shape[0]
                    buffer_size += num_images
                    for idx in range(num_images):
                        image_index = input_sample["indices"][idx]
                        cluster_assignments[image_index] = prototype_index[idx].item()
                        soft_cluster_assignments[
                            image_index
                        ] = prototype_score.cpu().numpy()
                        image_indices.append(image_index)

                if buffer_size >= self.cfg.EXTRACT_FEATURES.CHUNK_THRESHOLD >= 0:
                    self._save_extracted_prototypes(
                        soft_assignments=soft_cluster_assignments,
                        out_indices=image_indices,
                        dist_rank=dist_rank,
                        chunk_index=chunk_index,
                        split=split,
                        output_folder=output_folder,
                    )
                    soft_cluster_assignments.clear()
                    image_indices.clear()
                    chunk_index += 1
                    buffer_size = 0

            except StopIteration:
                if buffer_size:
                    self._save_extracted_prototypes(
                        soft_assignments=soft_cluster_assignments,
                        out_indices=image_indices,
                        dist_rank=dist_rank,
                        chunk_index=chunk_index,
                        split=split,
                        output_folder=output_folder,
                    )
                break
        return cluster_assignments