コード例 #1
0
def train_sample_places_low_shot(
    low_shot_trainer: SVMLowShotTrainer,
    k_values: List[int],
    sample_inds: List[int],
    sample_num: int,
    output_dir: str,
    layername: str,
    cfg: AttrDict,
):
    # setup the environment variables
    set_env_vars(local_rank=0, node_id=0, cfg=cfg)

    for low_shot_kvalue in k_values:
        checkpoint_dir = f"{output_dir}/sample{sample_num}_k{low_shot_kvalue}"
        train_data = merge_features(checkpoint_dir, "train", layername)
        train_features = train_data["features"]
        train_targets = train_data["targets"]
        checkpoint_dir = f"{output_dir}/sample{sample_inds[0]}_k{k_values[0]}"
        test_data = merge_features(checkpoint_dir, "test", layername)
        test_features = test_data["features"]
        test_targets = test_data["targets"]
        low_shot_trainer.train(train_features, train_targets, sample_num,
                               low_shot_kvalue)
        low_shot_trainer.test(test_features, test_targets, sample_num,
                              low_shot_kvalue)
コード例 #2
0
def train_voc07_low_shot(
    k_values: List[int],
    sample_inds: List[int],
    output_dir: str,
    layername: str,
    cfg: AttrDict,
):
    dataset_name = cfg["SVM"]["low_shot"]["dataset_name"]
    low_shot_trainer = SVMLowShotTrainer(cfg["SVM"],
                                         layer=layername,
                                         output_dir=output_dir)
    train_data = merge_features(output_dir, "train", layername)
    train_features, train_targets = train_data["features"], train_data[
        "targets"]
    test_data = merge_features(output_dir, "test", layername)
    test_features, test_targets = test_data["features"], test_data["targets"]
    # now we want to create the low-shot samples based on the kind of dataset.
    # We only create low-shot samples for training. We test on the full dataset.
    generate_low_shot_samples(dataset_name, train_targets, k_values,
                              sample_inds, output_dir, layername)
    # Now, we train and test the low-shot SVM for every sample and k-value.
    for sample_num in sample_inds:
        for low_shot_kvalue in k_values:
            train_targets = load_file(
                f"{output_dir}/{layername}_sample{sample_num}_k{low_shot_kvalue}.npy"
            )
            low_shot_trainer.train(train_features, train_targets, sample_num,
                                   low_shot_kvalue)
            low_shot_trainer.test(test_features, test_targets, sample_num,
                                  low_shot_kvalue)
    # now we aggregate the stats across all independent samples and for each
    # k-value and report mean/min/max/std stats
    results = low_shot_trainer.aggregate_stats(k_values, sample_inds)
    logging.info("All Done!")
    return results
コード例 #3
0
    def test_extract_cluster_assignment_ddp(self):
        with in_temporary_directory() as pretrain_dir:

            # Run a pre-training to have some weights to being with
            pretrain_config = self._create_pretraining_config()
            run_integration_test(pretrain_config)

            # Create a directory to contain the extracted features
            with in_temporary_directory() as extract_dir:

                # Run the extract engine in a separate directory to check that
                # it is correctly able to output the feature in a another dir
                with in_temporary_directory():
                    extract_config = self._create_extract_features_config(
                        checkpoint_path=os.path.join(pretrain_dir,
                                                     "checkpoint.torch"))
                    extract_config.EXTRACT_FEATURES.OUTPUT_DIR = extract_dir
                    run_integration_test(extract_config,
                                         engine_name="extract_features")

                # Check the content of the directory containing the extracted dirs
                folder_content = os.listdir(extract_dir)
                print(folder_content)
                for rank in [0, 1]:
                    for chunk in range(5):
                        for file in [
                                f"rank{rank}_chunk{chunk}_train_heads_features.npy",
                                f"rank{rank}_chunk{chunk}_train_heads_inds.npy",
                                f"rank{rank}_chunk{chunk}_train_heads_targets.npy",
                        ]:
                            self.assertIn(file, folder_content)

                # Verify that we can merge the features back (train split)
                train_feat = merge_features(extract_dir, "train", "heads")
                print(train_feat)
                self.assertEqual(train_feat["features"].shape,
                                 torch.Size([40, 128]))
                self.assertEqual(train_feat["targets"].shape,
                                 torch.Size([40, 1]))
                self.assertEqual(train_feat["inds"].shape, torch.Size([40]))

                # Verify that we can merge the features back (test split)
                test_feat = merge_features(extract_dir, "test", "heads")
                self.assertEqual(test_feat["features"].shape,
                                 torch.Size([20, 128]))
                self.assertEqual(test_feat["targets"].shape,
                                 torch.Size([20, 1]))
                self.assertEqual(test_feat["inds"].shape, torch.Size([20]))
コード例 #4
0
ファイル: train_svm.py プロジェクト: ankitshah009/vissl
def train_svm(cfg: AttrDict, output_dir: str, layername: str):
    # setup the environment variables
    set_env_vars(local_rank=0, node_id=0, cfg=cfg)

    # train the svm
    logging.info(f"Training SVM for layer: {layername}")
    trainer = SVMTrainer(cfg["SVM"], layer=layername, output_dir=output_dir)
    train_data = merge_features(output_dir, "train", layername)
    train_features, train_targets = train_data["features"], train_data["targets"]
    trainer.train(train_features, train_targets)

    # test the svm
    test_data = merge_features(output_dir, "test", layername)
    test_features, test_targets = test_data["features"], test_data["targets"]
    trainer.test(test_features, test_targets)
    logging.info("All Done!")
コード例 #5
0
def get_data_features_and_images(cfg: AttrDict):
    output_dir = get_checkpoint_folder(cfg)
    split = cfg.RANKING.FEATURES.DATA_PARTITION
    logging.info("Merging features...")
    # merge the features across all nodes/gpus into one
    feature_data = merge_features(output_dir, split.lower(),
                                  cfg.RANKING.FEATURES.LAYER_NAME)

    logging.info("Getting the image paths...")
    # get the list of image Ids
    dataset = build_dataset(cfg=cfg, split=split)
    feature_image_paths = dataset.get_image_paths()
    # due to multi-modality, we get image_paths as a nested list, one for each
    # dataset. Check it's a list and extract images.
    assert type(feature_image_paths) == list, "Image paths must be a list"
    assert len(feature_image_paths) == 1, "Multi-modality not supported yet!"
    return feature_data, feature_image_paths[0]
コード例 #6
0
def nearest_neighbor_test(cfg: AttrDict, layer_name: str = "heads"):
    temperature = cfg.NEAREST_NEIGHBOR.SIGMA
    num_neighbors = cfg.NEAREST_NEIGHBOR.TOPK
    output_dir = get_checkpoint_folder(cfg)
    logging.info(f"Testing with sigma: {temperature}, topk neighbors: {num_neighbors}")

    ############################################################################
    # Step 1: get train and test features
    train_out = merge_features(output_dir, "train", layer_name, cfg)
    train_features, train_labels = train_out["features"], train_out["targets"]
    # put train features and labels on gpu and transpose train features
    train_features = torch.from_numpy(train_features).float().cuda().t()
    train_labels = torch.LongTensor(train_labels).cuda()
    num_classes = train_labels.max() + 1
    if cfg.NEAREST_NEIGHBOR.L2_NORM_FEATS:
        train_features = nn.functional.normalize(train_features, dim=0, p=2)

    test_out = merge_features(output_dir, "test", layer_name, cfg)
    test_features, test_labels = test_out["features"], test_out["targets"]

    ###########################################################################
    # Step 2: calculate the nearest neighbor and the metrics
    top1, top5, total = 0.0, 0.0, 0
    num_test_images, num_chunks = test_labels.shape[0], 100
    imgs_per_chunk = num_test_images // num_chunks
    with torch.no_grad():
        retrieval_one_hot = torch.zeros(num_neighbors, num_classes).cuda()
        for idx in range(0, num_test_images, imgs_per_chunk):
            # get the features for test images and normalize the features if needed
            features = test_features[
                idx : min((idx + imgs_per_chunk), num_test_images), :
            ]
            targets = test_labels[idx : min((idx + imgs_per_chunk), num_test_images), :]
            batch_size = targets.shape[0]
            features = torch.from_numpy(features).float().cuda()
            targets = torch.LongTensor(targets).cuda()
            if cfg.NEAREST_NEIGHBOR.L2_NORM_FEATS:
                features = nn.functional.normalize(features, dim=1, p=2)

            # calculate the dot product and compute top-k neighbors
            similarity = torch.mm(features, train_features)
            distances, indices = similarity.topk(
                num_neighbors, largest=True, sorted=True
            )
            candidates = train_labels.view(1, -1).expand(batch_size, -1)
            retrieved_neighbors = torch.gather(candidates, 1, indices)

            retrieval_one_hot.resize_(batch_size * num_neighbors, num_classes).zero_()
            retrieval_one_hot.scatter_(1, retrieved_neighbors.view(-1, 1), 1)
            distances_transform = distances.clone().div_(temperature).exp_()
            probs = torch.sum(
                torch.mul(
                    retrieval_one_hot.view(batch_size, -1, num_classes),
                    distances_transform.view(batch_size, -1, 1),
                ),
                1,
            )
            _, predictions = probs.sort(1, True)

            # find the predictions that match the target
            correct = predictions.eq(targets.data.view(-1, 1))
            top1 = top1 + correct.narrow(1, 0, 1).sum().item()
            top5 = top5 + correct.narrow(1, 0, 5).sum().item()
            total += targets.size(0)
    top1 = top1 * 100.0 / total
    top5 = top5 * 100.0 / total
    logging.info(f"Total images: {total}, Top1: {top1}, Top5: {top5}")
    return top1, top5
コード例 #7
0
    def test_extract_cluster_assignment_ddp(self):
        with in_temporary_directory() as pretrain_dir:

            # Run a pre-training to have some weights to being with
            pretrain_config = self._create_pretraining_config()
            run_integration_test(pretrain_config)

            # Create a directory to contain the extracted features
            with in_temporary_directory() as extract_dir:

                # Run the extract engine in a separate directory to check that
                # it is correctly able to output the feature in a another dir
                with in_temporary_directory():
                    extract_config = self._create_extract_features_config_head(
                        checkpoint_path=os.path.join(pretrain_dir, "checkpoint.torch")
                    )
                    extract_config.EXTRACT_FEATURES.OUTPUT_DIR = extract_dir
                    run_integration_test(extract_config, engine_name="extract_features")

                # Check the content of the directory containing the extracted dirs
                folder_content = os.listdir(extract_dir)
                print(folder_content)
                for rank in [0, 1]:
                    for chunk in range(5):
                        for file in [
                            f"rank{rank}_chunk{chunk}_train_heads_features.npy",
                            f"rank{rank}_chunk{chunk}_train_heads_inds.npy",
                            f"rank{rank}_chunk{chunk}_train_heads_targets.npy",
                        ]:
                            self.assertIn(file, folder_content)

                # Verify that we can merge the features back (train split)
                train_feat = merge_features(extract_dir, "train", "heads")
                print(train_feat)
                self.assertEqual(train_feat["features"].shape, torch.Size([40, 128]))
                self.assertEqual(train_feat["targets"].shape, torch.Size([40, 1]))
                self.assertEqual(train_feat["inds"].shape, torch.Size([40]))

                # Verify that we can merge the features back (test split)
                test_feat = merge_features(extract_dir, "test", "heads")
                self.assertEqual(test_feat["features"].shape, torch.Size([20, 128]))
                self.assertEqual(test_feat["targets"].shape, torch.Size([20, 1]))
                self.assertEqual(test_feat["inds"].shape, torch.Size([20]))

                # Run the extract engine this time for the features of the trunk
                with in_temporary_directory():
                    extract_config = self._create_extract_features_config_trunk(
                        checkpoint_path=os.path.join(pretrain_dir, "checkpoint.torch")
                    )
                    extract_config.EXTRACT_FEATURES.OUTPUT_DIR = extract_dir
                    run_integration_test(extract_config, engine_name="extract_features")

                # Verify that we can merge the features back without flattening them
                train_feat = merge_features(extract_dir, "train", "res5")
                self.assertEqual(
                    train_feat["features"].shape, torch.Size([40, 2048, 2, 2])
                )
                self.assertEqual(train_feat["targets"].shape, torch.Size([40, 1]))
                self.assertEqual(train_feat["inds"].shape, torch.Size([40]))

                # Verify that we can merge the features back without flattening them (second approach)
                train_feat = ExtractedFeaturesLoader.load_features(
                    extract_dir, "train", "res5"
                )
                self.assertEqual(
                    train_feat["features"].shape, torch.Size([40, 2048, 2, 2])
                )

                # Verify that we can merge the features back but flattened
                train_feat = ExtractedFeaturesLoader.load_features(
                    extract_dir, "train", "res5", flatten_features=True
                )
                self.assertEqual(
                    train_feat["features"].shape, torch.Size([40, 2048 * 2 * 2])
                )
                self.assertEqual(train_feat["targets"].shape, torch.Size([40, 1]))
                self.assertEqual(train_feat["inds"].shape, torch.Size([40]))

                # Verify that we can sample the features (unflattened)
                train_feat = ExtractedFeaturesLoader.sample_features(
                    input_dir=extract_dir,
                    split="train",
                    layer="res5",
                    num_samples=10,
                    seed=0,
                )
                self.assertEqual(
                    train_feat["features"].shape, torch.Size([10, 2048, 2, 2])
                )
                self.assertEqual(train_feat["targets"].shape, torch.Size([10, 1]))
                self.assertEqual(train_feat["inds"].shape, torch.Size([10]))

                # Verify that we can sample the features (flattened)
                train_feat = ExtractedFeaturesLoader.sample_features(
                    input_dir=extract_dir,
                    split="train",
                    layer="res5",
                    num_samples=10,
                    seed=0,
                    flatten_features=True,
                )
                self.assertEqual(
                    train_feat["features"].shape, torch.Size([10, 2048 * 2 * 2])
                )
                self.assertEqual(train_feat["targets"].shape, torch.Size([10, 1]))
                self.assertEqual(train_feat["inds"].shape, torch.Size([10]))