Esempio n. 1
0
    def test_merge_features(self):
        with in_temporary_directory() as temp_dir:

            # Save the data we need to merge back
            indices, features, targets = self.prepare_data(split="train",
                                                           layer="heads",
                                                           num_shards=4,
                                                           feat_shape=(10, 16))

            # Load the data and verify that it is identical
            output = ExtractedFeaturesLoader.load_features(input_dir=temp_dir,
                                                           split="train",
                                                           layer="heads")
            self.assertEqual(output["features"].shape[0], 40)
            self.assertTrue(np.array_equal(output["inds"], indices))
            self.assertTrue(np.array_equal(output["targets"], targets))
            self.assertTrue(np.allclose(output["features"], features))

            # Sample the all data (no sampling) and check that it is identical
            output = ExtractedFeaturesLoader.sample_features(
                input_dir=temp_dir,
                split="train",
                layer="heads",
                num_samples=-1,
                seed=0)
            self.assertEqual(output["features"].shape[0], 40)
            self.assertTrue(np.array_equal(output["inds"], indices))
            self.assertTrue(np.array_equal(output["targets"], targets))
            self.assertTrue(np.allclose(output["features"], features))
Esempio n. 2
0
    def test_get_shard_file_names(self):
        with in_temporary_directory() as temp_dir:

            # Generate a bunch of split/feature files
            for split in ["train", "test"]:
                for layer in ["heads", "res5"]:
                    self.prepare_data(split=split,
                                      layer=layer,
                                      num_shards=2,
                                      feat_shape=(10, 16))

            # Check that we only consider the right files
            paths = ExtractedFeaturesLoader.get_shard_file_names(
                input_dir=temp_dir, split="train", layer="heads")
            feature_files = {
                os.path.split(path.feature_file)[1]
                for path in paths
            }
            self.assertEqual(
                feature_files,
                {
                    "chunk0_train_heads_features.npy",
                    "chunk1_train_heads_features.npy"
                },
            )
Esempio n. 3
0
def train_svm(cfg: AttrDict, output_dir: str, layername: str):
    # setup the environment variables
    set_env_vars(local_rank=0, node_id=0, cfg=cfg)
    features_dir = cfg.SVM_FEATURES_PATH

    # train the svm
    logging.info(f"Training SVM for layer: {layername}")
    trainer = SVMTrainer(cfg["SVM"], layer=layername, output_dir=output_dir)
    train_data = ExtractedFeaturesLoader.load_features(features_dir,
                                                       "train",
                                                       layername,
                                                       flatten_features=True)
    trainer.train(train_data["features"], train_data["targets"])

    # test the svm
    test_data = ExtractedFeaturesLoader.load_features(features_dir,
                                                      "test",
                                                      layername,
                                                      flatten_features=True)
    trainer.test(test_data["features"], test_data["targets"])
    logging.info("All Done!")
Esempio n. 4
0
def load_and_process_features(cfg, input_dir, split, pca=None):
    # Choose only the first layer.
    layer = cfg.MODEL.FEATURE_EVAL_SETTINGS.LINEAR_EVAL_FEAT_POOL_OPS_MAP[0][0]
    shard_file_names = ExtractedFeaturesLoader.get_shard_file_names(
        input_dir, split, layer)

    all_inds = []
    all_feats = []

    for shard in shard_file_names:
        # Load the feature shard.
        feature_shard = ExtractedFeaturesLoader.load_feature_shard(
            shard, verbose=False, allow_pickle=True)
        features = feature_shard.features
        inds = feature_shard.indices

        # Post-process (rmac | gem | l2) each image from the the feature shard .
        for i, feat in enumerate(features):
            ind = inds[i]

            if ind in all_inds:
                # TODO: Sometimes load_feature_shard returns duplicate features.
                # Feature already processed.
                continue

            processed_feat = post_process_image(
                cfg,
                feat,
                pca=pca,
            )
            all_feats.append(processed_feat)
            all_inds.append(ind)

    # Sort features by index.
    all_feats_sorted = [
        feat
        for _, feat in sorted(zip(all_inds, all_feats), key=lambda tup: tup[0])
    ]

    return all_feats_sorted
def get_data_features_for_k_means(cfg: AttrDict):
    """
    Sample the extract features from disk by reading through the
    extracted feature shards and return a sub-set
    """
    return ExtractedFeaturesLoader.sample_features(
        input_dir=cfg.CLUSTERFIT.FEATURES.PATH,
        split=cfg.CLUSTERFIT.FEATURES.DATA_PARTITION.lower(),
        layer=cfg.CLUSTERFIT.FEATURES.LAYER_NAME,
        num_samples=cfg.CLUSTERFIT.DATA_LIMIT,
        seed=cfg.CLUSTERFIT.DATA_LIMIT_SAMPLING.SEED,
        flatten_features=True,
    )
Esempio n. 6
0
    def test_sample_features(self):
        with in_temporary_directory() as temp_dir:
            # Save the data we need to sample from
            indices, features, targets = self.prepare_data(split="train",
                                                           layer="heads",
                                                           num_shards=4,
                                                           feat_shape=(10, 16))

            # Load the data and verify that it is identical
            output = ExtractedFeaturesLoader.sample_features(
                input_dir=temp_dir,
                split="train",
                layer="heads",
                num_samples=10,
                seed=0)

            # Check that the number of samples is valid
            self.assertEqual(10, len(output["inds"]))

            # Check that the samples are a subset of the original dataset
            self.assertTrue(
                np.array_equal(output["features"], features[output["inds"]]))
            self.assertTrue(
                np.array_equal(output["targets"], targets[output["inds"]]))
Esempio n. 7
0
    def test_knn_fsdp(self):
        with in_temporary_directory() as pretrain_dir:

            # Run a pre-training to have some weights to being with
            pretrain_config = self._create_pretraining_config(with_fsdp=True)
            results = run_integration_test(pretrain_config)
            losses = results.get_losses()
            print(losses)

            # Convert checkpoint to sliced checkpoint for easy loading
            CheckpointFormatConverter.sharded_to_sliced_checkpoint(
                "checkpoint.torch", "checkpoint_sliced.torch"
            )
            checkpoint_path = os.path.join(pretrain_dir, "checkpoint_sliced.torch")

            # Create a directory to contain the extracted features
            with in_temporary_directory() as extract_dir:

                # Extract head features
                extract_config_head = self._create_extract_features_config_head(
                    checkpoint_path=checkpoint_path, with_fsdp=True
                )
                extract_config_head.EXTRACT_FEATURES.OUTPUT_DIR = extract_dir
                run_integration_test(
                    extract_config_head, engine_name="extract_features"
                )

                # Extract trunk features
                extract_config_trunk = self._create_extract_features_config_trunk(
                    checkpoint_path=checkpoint_path, with_fsdp=True
                )
                extract_config_trunk.EXTRACT_FEATURES.OUTPUT_DIR = extract_dir
                run_integration_test(
                    extract_config_trunk, engine_name="extract_features"
                )

                # Verify that we can merge the heads features back
                train_feat = ExtractedFeaturesLoader.load_features(
                    extract_dir, "train", "heads", flatten_features=True
                )
                self.assertEqual(train_feat["features"].shape, torch.Size([200, 128]))
                self.assertEqual(train_feat["targets"].shape, torch.Size([200, 1]))
                self.assertEqual(train_feat["inds"].shape, torch.Size([200]))

                # Verify that we can merge the trunk features back
                train_feat = ExtractedFeaturesLoader.load_features(
                    extract_dir, "train", "res5", flatten_features=True
                )
                self.assertEqual(
                    train_feat["features"].shape, torch.Size([200, 3024 * 2 * 2])
                )
                self.assertEqual(train_feat["targets"].shape, torch.Size([200, 1]))
                self.assertEqual(train_feat["inds"].shape, torch.Size([200]))

                # Run KNN on the res5 layer
                extract_config_trunk.NEAREST_NEIGHBOR.FEATURES.PATH = extract_dir
                top_1_ref, top_5_ref, total_ref = run_knn_at_layer(
                    extract_config_trunk, layer_name="res5"
                )
                top_1_opt, top_5_opt, total_opt = run_knn_at_layer_low_memory(
                    extract_config_trunk, layer_name="res5"
                )
                self.assertEqual(total_ref, total_opt)
                # TODO - investigate: both KNN implementation have a bit of randomness
                #  in their accuracies, so the asserts are inequalities.
                self.assertLessEqual(top_1_ref, 30.0)
                self.assertLessEqual(top_1_opt, 30.0)
                self.assertGreaterEqual(top_1_ref, 29.0)
                self.assertGreaterEqual(top_1_opt, 29.0)
                # self.assertEqual(top_1_ref, top_1_opt)
                # self.assertEqual(top_5_ref, top_5_opt)

                # Run KNN on the head layer
                extract_config_head.NEAREST_NEIGHBOR.FEATURES.PATH = extract_dir
                top_1_ref, top_5_ref, total_ref = run_knn_at_layer(
                    extract_config_head, layer_name="heads"
                )
                top_1_opt, top_5_opt, total_opt = run_knn_at_layer_low_memory(
                    extract_config_head, layer_name="heads"
                )
                self.assertEqual(total_ref, total_opt)
                # TODO - investigate: both KNN implementation have a bit of randomness
                #  in their accuracies, so the asserts are inequalities.
                self.assertLessEqual(top_1_ref, 35.0)
                self.assertLessEqual(top_1_opt, 35.0)
                self.assertGreaterEqual(top_1_ref, 33.0)
                self.assertGreaterEqual(top_1_opt, 33.0)
def _create_dataset_split(
    cfg: AttrDict, data_split: str, features_dim: int, kmeans, pca: Optional[PCA] = None
):
    """
    Scan the dataset split and create a new classification dataset out of it
    where each image is associated to the centroid the closest in feature space.
    """
    num_clusters = cfg.CLUSTERFIT.NUM_CLUSTERS
    data_name = cfg.CLUSTERFIT.FEATURES.DATASET_NAME
    layer_name = cfg.CLUSTERFIT.FEATURES.LAYER_NAME

    logging.info(
        f"Computing cluster label assignment for each sample in {data_split}..."
    )
    indices = []
    distances = []
    target_clusters = []
    shard_paths = ExtractedFeaturesLoader.get_shard_file_names(
        input_dir=cfg.CLUSTERFIT.FEATURES.PATH,
        split=data_split.lower(),
        layer=cfg.CLUSTERFIT.FEATURES.LAYER_NAME,
    )
    for shard_path in shard_paths:
        shard_content = ExtractedFeaturesLoader.load_feature_shard(shard_path)
        shard_features = shard_content.features

        # TODO - factorize this with above??? normalization at least???
        # Reshape and normalize the loaded features
        shard_features = shard_features.reshape(shard_features.shape[0], -1)
        shard_features_norm = np.linalg.norm(shard_features, axis=1) + 1e-5
        shard_features = shard_features / shard_features_norm[:, np.newaxis]

        if pca is not None:
            shard_features = pca.transform(shard_features)
            shard_features = np.ascontiguousarray(shard_features)
        shard_distances, shard_cluster_labels = kmeans.index.search(shard_features, 1)
        indices.extend(shard_content.indices)
        distances.extend(shard_distances)
        target_clusters.extend(shard_cluster_labels)

    # Step 5: save clustering data and hard cluster labels for the images
    logging.info("Saving centroids and cluster assignments to file...")
    dataset_image_paths = get_image_paths(cfg, split=data_split)
    image_paths = [dataset_image_paths[i] for i in indices]
    data_split = data_split.lower()
    clustering_output_dict = {
        "sample_indices": indices,
        "hard_labels": target_clusters,
        "centroids": kmeans.centroids,
        "distances": distances,
        "images": image_paths,
    }
    output_dir = cfg.CLUSTERFIT.OUTPUT_DIR
    PathManager.mkdirs(output_dir)
    output_prefix = (
        f"{data_name}_{data_split}_{layer_name}_N{num_clusters}_D{features_dim}"
    )
    cluster_output_filepath = os.path.join(output_dir, f"{output_prefix}.pkl")
    labels_output_filepath = os.path.join(output_dir, f"{output_prefix}_labels.npy")
    image_path_filepath = os.path.join(output_dir, f"{output_prefix}_images.npy")
    out_images = np.array(image_paths)
    out_hard_labels = np.array(target_clusters, dtype=np.int64).reshape(-1)
    save_file(clustering_output_dict, cluster_output_filepath)
    save_file(out_images, image_path_filepath)
    save_file(out_hard_labels, labels_output_filepath)
Esempio n. 9
0
def merge_features(input_dir: str, split: str, layer: str):
    return ExtractedFeaturesLoader.load_features(input_dir, split, layer)
Esempio n. 10
0
    def test_extract_cluster_assignment_ddp(self):
        with in_temporary_directory() as pretrain_dir:

            # Run a pre-training to have some weights to being with
            pretrain_config = self._create_pretraining_config()
            run_integration_test(pretrain_config)

            # Create a directory to contain the extracted features
            with in_temporary_directory() as extract_dir:

                # Run the extract engine in a separate directory to check that
                # it is correctly able to output the feature in a another dir
                with in_temporary_directory():
                    extract_config = self._create_extract_features_config_head(
                        checkpoint_path=os.path.join(pretrain_dir, "checkpoint.torch")
                    )
                    extract_config.EXTRACT_FEATURES.OUTPUT_DIR = extract_dir
                    run_integration_test(extract_config, engine_name="extract_features")

                # Check the content of the directory containing the extracted dirs
                folder_content = os.listdir(extract_dir)
                print(folder_content)
                for rank in [0, 1]:
                    for chunk in range(5):
                        for file in [
                            f"rank{rank}_chunk{chunk}_train_heads_features.npy",
                            f"rank{rank}_chunk{chunk}_train_heads_inds.npy",
                            f"rank{rank}_chunk{chunk}_train_heads_targets.npy",
                        ]:
                            self.assertIn(file, folder_content)

                # Verify that we can merge the features back (train split)
                train_feat = merge_features(extract_dir, "train", "heads")
                print(train_feat)
                self.assertEqual(train_feat["features"].shape, torch.Size([40, 128]))
                self.assertEqual(train_feat["targets"].shape, torch.Size([40, 1]))
                self.assertEqual(train_feat["inds"].shape, torch.Size([40]))

                # Verify that we can merge the features back (test split)
                test_feat = merge_features(extract_dir, "test", "heads")
                self.assertEqual(test_feat["features"].shape, torch.Size([20, 128]))
                self.assertEqual(test_feat["targets"].shape, torch.Size([20, 1]))
                self.assertEqual(test_feat["inds"].shape, torch.Size([20]))

                # Run the extract engine this time for the features of the trunk
                with in_temporary_directory():
                    extract_config = self._create_extract_features_config_trunk(
                        checkpoint_path=os.path.join(pretrain_dir, "checkpoint.torch")
                    )
                    extract_config.EXTRACT_FEATURES.OUTPUT_DIR = extract_dir
                    run_integration_test(extract_config, engine_name="extract_features")

                # Verify that we can merge the features back without flattening them
                train_feat = merge_features(extract_dir, "train", "res5")
                self.assertEqual(
                    train_feat["features"].shape, torch.Size([40, 2048, 2, 2])
                )
                self.assertEqual(train_feat["targets"].shape, torch.Size([40, 1]))
                self.assertEqual(train_feat["inds"].shape, torch.Size([40]))

                # Verify that we can merge the features back without flattening them (second approach)
                train_feat = ExtractedFeaturesLoader.load_features(
                    extract_dir, "train", "res5"
                )
                self.assertEqual(
                    train_feat["features"].shape, torch.Size([40, 2048, 2, 2])
                )

                # Verify that we can merge the features back but flattened
                train_feat = ExtractedFeaturesLoader.load_features(
                    extract_dir, "train", "res5", flatten_features=True
                )
                self.assertEqual(
                    train_feat["features"].shape, torch.Size([40, 2048 * 2 * 2])
                )
                self.assertEqual(train_feat["targets"].shape, torch.Size([40, 1]))
                self.assertEqual(train_feat["inds"].shape, torch.Size([40]))

                # Verify that we can sample the features (unflattened)
                train_feat = ExtractedFeaturesLoader.sample_features(
                    input_dir=extract_dir,
                    split="train",
                    layer="res5",
                    num_samples=10,
                    seed=0,
                )
                self.assertEqual(
                    train_feat["features"].shape, torch.Size([10, 2048, 2, 2])
                )
                self.assertEqual(train_feat["targets"].shape, torch.Size([10, 1]))
                self.assertEqual(train_feat["inds"].shape, torch.Size([10]))

                # Verify that we can sample the features (flattened)
                train_feat = ExtractedFeaturesLoader.sample_features(
                    input_dir=extract_dir,
                    split="train",
                    layer="res5",
                    num_samples=10,
                    seed=0,
                    flatten_features=True,
                )
                self.assertEqual(
                    train_feat["features"].shape, torch.Size([10, 2048 * 2 * 2])
                )
                self.assertEqual(train_feat["targets"].shape, torch.Size([10, 1]))
                self.assertEqual(train_feat["inds"].shape, torch.Size([10]))
Esempio n. 11
0
def run_knn_at_layer(cfg: AttrDict, layer_name: str = "heads"):
    """
    Run the Nearest Neighbour benchmark at the layer "layer_name"
    """
    temperature = cfg.NEAREST_NEIGHBOR.SIGMA
    num_neighbors = cfg.NEAREST_NEIGHBOR.TOPK
    feature_dir = cfg.NEAREST_NEIGHBOR.FEATURES.PATH
    output_dir = get_checkpoint_folder(cfg)
    logging.info(f"Testing with sigma: {temperature}, topk neighbors: {num_neighbors}")

    ############################################################################
    # Step 1: get train and test features
    train_out = ExtractedFeaturesLoader.load_features(
        feature_dir, "train", layer_name, flatten_features=True
    )
    train_features, train_labels = train_out["features"], train_out["targets"]
    test_out = ExtractedFeaturesLoader.load_features(
        feature_dir, "test", layer_name, flatten_features=True
    )
    test_features, test_labels = test_out["features"], test_out["targets"]
    train_features = torch.from_numpy(train_features).float()
    test_features = torch.from_numpy(test_features).float()
    train_labels = torch.LongTensor(train_labels)
    num_classes = train_labels.max() + 1

    ###########################################################################
    # Step 2: calculate the nearest neighbor and the metrics
    accuracies = Accuracies()
    if cfg.NEAREST_NEIGHBOR.L2_NORM_FEATS:
        train_features = nn.functional.normalize(train_features, dim=1, p=2)
        test_features = nn.functional.normalize(test_features, dim=1, p=2)

    # put train features and labels on gpu and transpose train features
    if cfg.NEAREST_NEIGHBOR.USE_CUDA:
        train_features = train_features.cuda().t()
        test_features = test_features.cuda()
        train_labels = train_labels.cuda()
    else:
        train_features = train_features.t()

    num_test_images, num_chunks = test_labels.shape[0], 100
    imgs_per_chunk = num_test_images // num_chunks
    output_targets, output_predicted_label, output_inds = [], [], []
    with torch.no_grad():
        for idx in range(0, num_test_images, imgs_per_chunk):
            # get the features for test images and normalize the features if needed
            features = test_features[
                idx : min((idx + imgs_per_chunk), num_test_images), :
            ]
            targets = test_labels[idx : min((idx + imgs_per_chunk), num_test_images), :]
            batch_size = targets.shape[0]
            targets = torch.LongTensor(targets)
            if cfg.NEAREST_NEIGHBOR.USE_CUDA:
                targets = torch.LongTensor(targets).cuda()

            # calculate the dot product and compute top-k neighbors
            similarity = torch.mm(features, train_features)
            distances, indices = similarity.topk(
                num_neighbors, largest=True, sorted=True
            )
            candidates = train_labels.view(1, -1).expand(batch_size, -1)
            retrieved_neighbors = torch.gather(candidates, 1, indices)

            retrieval_one_hot = torch.zeros(batch_size * num_neighbors, num_classes)
            if cfg.NEAREST_NEIGHBOR.USE_CUDA:
                retrieval_one_hot = retrieval_one_hot.cuda()
            retrieval_one_hot.scatter_(1, retrieved_neighbors.view(-1, 1), 1)
            predictions = _get_sorted_predictions(
                batch_size, num_classes, distances, retrieval_one_hot, temperature
            )

            # find the predictions that match the target
            accuracies = accuracies + Accuracies.from_batch(predictions, targets)

            # get the predictions, nearest neighbors, inds to save
            output_inds.extend(range(idx, min((idx + imgs_per_chunk), num_test_images)))
            output_predicted_label.append(predictions.data.cpu().numpy())
            output_targets.append(targets.data.cpu().numpy())

    _save_knn_results(
        output_dir, layer_name, output_inds, output_predicted_label, output_targets
    )
    accuracies.log(layer_name)
    return accuracies.top_1, accuracies.top_5, accuracies.total
Esempio n. 12
0
def run_knn_at_layer_low_memory(cfg: AttrDict, layer_name: str = "heads"):
    """
    Alternate implementation of kNN which scales to bigger features
    and bigger "train" splits
    """
    if cfg.NEAREST_NEIGHBOR.USE_CUDA:
        logging.warning(
            "config.NEAREST_NEIGHBOR.USE_CUDA is not available when "
            "config.NEAREST_NEIGHBOR.OPTIMIZE_MEMORY is set to True, "
            "using CPU instead"
        )

    temperature = cfg.NEAREST_NEIGHBOR.SIGMA
    num_neighbors = cfg.NEAREST_NEIGHBOR.TOPK
    feature_dir = cfg.NEAREST_NEIGHBOR.FEATURES.PATH
    output_dir = get_checkpoint_folder(cfg)
    logging.info(f"Testing with sigma: {temperature}, topk neighbors: {num_neighbors}")

    # Step 1: get the test features (the train features might not feat in memory)
    test_out = ExtractedFeaturesLoader.load_features(
        feature_dir, "test", layer_name, flatten_features=True
    )
    test_features, test_labels = test_out["features"], test_out["targets"]
    test_features = torch.from_numpy(test_features).float()
    test_feature_num = test_features.shape[0]

    # Step 2: normalize the features if needed
    if cfg.NEAREST_NEIGHBOR.L2_NORM_FEATS:
        test_features = nn.functional.normalize(test_features, dim=1, p=2)

    # Step 3: collect the similarity score of each test feature
    # to all the train features, making sure:
    # - never to load the all train features at once to avoid OOM
    # - to keep just the 'num_neighbors' best similarity scores
    shard_paths = ExtractedFeaturesLoader.get_shard_file_names(
        input_dir=feature_dir, split="train", layer=layer_name
    )
    similarity_queue = MaxSimilarityPriorityQueue(max_size=num_neighbors)
    num_classes = 0
    for shard_path in shard_paths:
        shard_content = ExtractedFeaturesLoader.load_feature_shard(shard_path)
        train_features = torch.from_numpy(shard_content.features)
        train_features = train_features.float().reshape((train_features.shape[0], -1))
        if cfg.NEAREST_NEIGHBOR.L2_NORM_FEATS:
            train_features = nn.functional.normalize(train_features, dim=1, p=2)
        train_features = train_features.t()

        train_labels = torch.LongTensor(shard_content.targets).squeeze(-1)
        num_classes = max(num_classes, train_labels.max().item() + 1)
        similarities = torch.mm(test_features, train_features)
        if similarities.shape[0] > num_neighbors:
            distances, indices = similarities.topk(
                num_neighbors, largest=True, sorted=True
            )
        else:
            distances, indices = torch.sort(similarities, descending=True)
        closest_labels = train_labels[indices]
        similarity_queue.push_all(distances, closest_labels)

    # Step 4: collect the samples with the closest similarities
    # for each test sample, and assemble it in a matrix with
    # shape (num_test_samples, num_neighbors)
    topk_distances, topk_labels = similarity_queue.pop_all()

    # Step 5: go through each of the test samples, batch by batch,
    # to compute the label of each test sample based on the top k
    # nearest neighbors and their corresponding labels
    accuracies = Accuracies()
    output_targets, output_predicted_label, output_inds = [], [], []

    batch_size = 100
    num_test_images = test_feature_num
    for idx in range(0, num_test_images, batch_size):
        min_idx = idx
        max_idx = min(idx + batch_size, num_test_images)

        distances = topk_distances[min_idx:max_idx, ...]
        retrieved_neighbors = topk_labels[min_idx:max_idx, ...]
        targets = torch.LongTensor(test_labels[min_idx:max_idx])

        retrieval_one_hot = torch.zeros(batch_size * num_neighbors, num_classes)
        retrieval_one_hot.scatter_(1, retrieved_neighbors.view(-1, 1), 1)
        predictions = _get_sorted_predictions(
            batch_size, num_classes, distances, retrieval_one_hot, temperature
        )

        # find the predictions that match the target
        accuracies = accuracies + Accuracies.from_batch(predictions, targets)

        # get the predictions, nearest neighbors, inds to save
        output_inds.extend(range(min_idx, max_idx))
        output_predicted_label.append(predictions.data.cpu().numpy())
        output_targets.append(targets.data.cpu().numpy())

    _save_knn_results(
        output_dir, layer_name, output_inds, output_predicted_label, output_targets
    )
    accuracies.log(layer_name)
    return accuracies.top_1, accuracies.top_5, accuracies.total
Esempio n. 13
0
def extract_features_main(
    cfg: AttrDict,
    dist_run_id: str,
    checkpoint_folder: str,
    local_rank: int = 0,
    node_id: int = 0,
):
    """
    Sets up and executes feature extraction workflow per machine.

    Args:
        cfg (AttrDict): user specified input config that has optimizer, loss, meters etc
                        settings relevant to the training
        dist_run_id (str): For multi-gpu training with PyTorch, we have to specify
                           how the gpus are going to rendezvous. This requires specifying
                           the communication method: file, tcp and the unique rendezvous
                           run_id that is specific to 1 run.
                           We recommend:
                                1) for 1node: use init_method=tcp and run_id=auto
                                2) for multi-node, use init_method=tcp and specify
                                run_id={master_node}:{port}
        checkpoint_folder (str): what directory to use for checkpointing. This folder
                                 will be used to output the extracted features as well
                                 in case config.EXTRACT_FEATURES.OUTPUT_DIR is not set
        local_rank (int): id of the current device on the machine. If using gpus,
                        local_rank = gpu number on the current machine
        node_id (int): id of the current machine. starts from 0. valid for multi-gpu
    """

    # setup the environment variables
    set_env_vars(local_rank, node_id, cfg)
    dist_rank = int(os.environ["RANK"])

    # setup logging
    setup_logging(__name__, output_dir=checkpoint_folder, rank=dist_rank)

    logging.info(f"Env set for rank: {local_rank}, dist_rank: {dist_rank}")
    # print the environment info for the current node
    if local_rank == 0:
        current_env = os.environ.copy()
        print_system_env_info(current_env)

    # setup the multiprocessing to be forkserver.
    # See https://fb.quip.com/CphdAGUaM5Wf
    setup_multiprocessing_method(cfg.MULTI_PROCESSING_METHOD)

    # set seeds
    logging.info("Setting seed....")
    set_seeds(cfg, dist_rank)

    # We set the CUDA device here as well as a safe solution for all downstream
    # `torch.cuda.current_device()` calls to return correct device.
    if cfg.MACHINE.DEVICE == "gpu" and torch.cuda.is_available():
        local_rank, _ = get_machine_local_and_dist_rank()
        torch.cuda.set_device(local_rank)

    # print the training settings and system settings
    if local_rank == 0:
        print_cfg(cfg)
        logging.info("System config:\n{}".format(collect_env_info()))

    # Identify the hooks to run for the extract label engine
    # TODO - we need to plug this better with the engine registry
    #  - we either need to use the global hooks registry
    #  - or we need to create specific hook registry by engine
    hooks = extract_features_hook_generator(cfg)

    # Run the label prediction extraction
    trainer = SelfSupervisionTrainer(cfg, dist_run_id, hooks=hooks)
    output_dir = cfg.EXTRACT_FEATURES.OUTPUT_DIR or checkpoint_folder
    trainer.extract(
        output_folder=cfg.EXTRACT_FEATURES.OUTPUT_DIR or checkpoint_folder,
        extract_features=True,
        extract_predictions=False,
    )

    # TODO (prigoyal): merge this function with _extract_features
    if dist_rank == 0 and cfg.EXTRACT_FEATURES.MAP_FEATURES_TO_IMG_NAME:
        # Get the names of the features that we extracted features for. If user doesn't
        # specify the features to evaluate, we get the full model output and freeze
        # head/trunk both as caution.
        layers = get_trunk_output_feature_names(cfg.MODEL)
        if len(layers) == 0:
            layers = ["heads"]
        available_splits = [
            item.lower() for item in trainer.task.available_splits
        ]
        for split in available_splits:
            image_paths = trainer.task.datasets[split].get_image_paths()[0]
            for layer in layers:
                ExtractedFeaturesLoader.map_features_to_img_filepath(
                    image_paths=image_paths,
                    input_dir=output_dir,
                    split=split,
                    layer=layer,
                )

    logging.info("All Done!")
    # close the logging streams including the filehandlers
    shutdown_logging()