Exemple #1
0
def extract_low_shot_features(args: Namespace, cfg: AttrDict, output_dir: str):
    dataset_name = cfg["SVM"]["low_shot"]["dataset_name"]
    k_values = cfg["SVM"]["low_shot"]["k_values"]
    sample_inds = cfg["SVM"]["low_shot"]["sample_inds"]
    if "voc" in dataset_name:
        # extract the features. In case of voc07 low-shot, we extract the
        # features on full train and test sets. Both sets have about 5K images
        # we extract
        launch_distributed(
            cfg,
            args.node_id,
            engine_name="extract_features",
            hook_generator=default_hook_generator,
        )
    elif "places" in dataset_name:
        # in case of places, since the features size could become large, we need
        # to extract features at smaller subsamples
        data_paths, label_paths = dataset_catalog.get_data_files(
            split="TRAIN", dataset_config=cfg["DATA"])
        targets = load_file(label_paths[0])
        logging.info("Generating low-shot samples for Places205...")
        generate_places_low_shot_samples(targets, k_values, sample_inds,
                                         output_dir, data_paths[0])

        test_features_extracted = False
        for idx in sample_inds:
            for k in k_values:
                out_img_file = f"{output_dir}/train_images_sample{idx}_k{k}.npy"
                out_lbls_file = f"{output_dir}/train_labels_sample{idx}_k{k}.npy"
                cfg.DATA.TRAIN.DATA_PATHS = [out_img_file]
                cfg.DATA.TRAIN.LABEL_PATHS = [out_lbls_file]
                cfg.CHECKPOINT.DIR = f"{output_dir}/sample{idx}_k{k}"
                logging.info(
                    f"Extracting features for places low shot: sample{idx}_k{k}"
                )
                # we want to extract the test features only once since the test
                # features are commonly used for testing for all low-shot setup.
                if test_features_extracted:
                    cfg.TEST_MODEL = False
                launch_distributed(
                    cfg,
                    args.node_id,
                    engine_name="extract_features",
                    hook_generator=default_hook_generator,
                )
                test_features_extracted = True
        # set the test model to true again after feature extraction is done
        cfg.TEST_MODEL = True
    else:
        raise RuntimeError(f"Dataset not recognised: {dataset_name}")
    def __init__(self, loss_config: AttrDict):
        """
        Intializer for the sum cross-entropy loss. For a single
        tensor, this is equivalent to the cross-entropy loss. For a
        list of tensors, this computes the sum of the cross-entropy
        losses for each tensor in the list against the target.

        Config params:
            reduction: specifies reduction to apply to the output, optional
            normalize_output: Whether to L2 normalize the outputs
            world_size: total number of gpus in training. automatically inferred by vissl
        """
        super(BCELogitsMultipleOutputSingleTargetLoss, self).__init__()
        self.loss_config = loss_config
        self._losses = torch.nn.modules.ModuleList([])
        self._reduction = loss_config.get("reduction", "none")
        self._normalize_output = loss_config.get("normalize_output", False)
        self._world_size = loss_config["world_size"]
Exemple #3
0
class TestMLP(unittest.TestCase):
    """
    Unit test to verify that correct construction of MLP layers
    and linear evaluation MLP layers
    """

    MODEL_CONFIG = AttrDict(
        {
            "HEAD": {
                "BATCHNORM_EPS": 1e-6,
                "BATCHNORM_MOMENTUM": 0.99,
                "PARAMS_MULTIPLIER": 1.0,
            }
        }
    )

    def test_mlp(self):
        mlp = MLP(self.MODEL_CONFIG, dims=[2048, 100])

        x = torch.randn(size=(4, 2048))
        out = mlp(x)
        assert out.shape == torch.Size([4, 100])

        x = torch.randn(size=(1, 2048))
        out = mlp(x)
        assert out.shape == torch.Size([1, 100])

    def test_mlp_reshaping(self):
        mlp = MLP(self.MODEL_CONFIG, dims=[2048, 100])

        x = torch.randn(size=(1, 2048, 1, 1))
        out = mlp(x)
        assert out.shape == torch.Size([1, 100])

    def test_mlp_catch_bad_shapes(self):
        mlp = MLP(self.MODEL_CONFIG, dims=[2048, 100])

        x = torch.randn(size=(1, 2048, 2, 1))
        with self.assertRaises(AssertionError) as context:
            mlp(x)
        assert context.exception is not None

    def test_eval_mlp_shape(self):
        eval_mlp = LinearEvalMLP(
            self.MODEL_CONFIG,
            in_channels=2048,
            dims=[2048 * 2 * 2, 1000],
        )

        resnet_feature_map = torch.randn(size=(4, 2048, 2, 2))
        out = eval_mlp(resnet_feature_map)
        assert out.shape == torch.Size([4, 1000])

        resnet_feature_map = torch.randn(size=(1, 2048, 2, 2))
        out = eval_mlp(resnet_feature_map)
        assert out.shape == torch.Size([1, 1000])
Exemple #4
0
def _copy_to_local(cfg: AttrDict):
    available_splits = _get_available_splits(cfg)
    for split in available_splits:
        if cfg.DATA[split].COPY_TO_LOCAL_DISK:
            dest_dir = cfg.DATA[split]["COPY_DESTINATION_DIR"]
            tmp_dest_dir = tempfile.mkdtemp()
            data_files, label_files = get_data_files(split, cfg.DATA)
            data_files.extend(label_files)
            _, output_dir = copy_data_to_local(
                data_files, dest_dir, tmp_destination_dir=tmp_dest_dir)
            cfg.DATA[split]["COPY_DESTINATION_DIR"] = output_dir
Exemple #5
0
 def _get_data_limit_sampling(cfg: AttrDict, split: str) -> AttrDict:
     default_sampling = AttrDict(
         {"SEED": 0, "IS_BALANCED": False, "SKIP_NUM_SAMPLES": 0}
     )
     return cfg["DATA"][split].get("DATA_LIMIT_SAMPLING", default_sampling)
Exemple #6
0
 def __init__(self, meters_config: AttrDict):
     self.num_classes = meters_config.get("num_classes")
     self._total_sample_count = None
     self._curr_sample_count = None
     self.reset()
    def __init__(self, model_config: AttrDict, model_name: str):
        super().__init__()

        assert model_config.INPUT_TYPE in ["rgb",
                                           "bgr"], "Input type not supported"
        trunk_config = copy.deepcopy(
            model_config.TRUNK.TRUNK_PARAMS.VISION_TRANSFORMERS)

        logging.info("Building model: Vision Transformer from yaml config")
        # Hacky workaround
        trunk_config = AttrDict(
            {k.lower(): v
             for k, v in trunk_config.items()})

        img_size = trunk_config.image_size
        patch_size = trunk_config.patch_size
        in_chans = 3
        embed_dim = trunk_config.hidden_dim
        depth = trunk_config.num_layers
        num_heads = trunk_config.num_heads
        mlp_ratio = 4.0
        qkv_bias = trunk_config.qkv_bias
        qk_scale = trunk_config.qk_scale
        drop_rate = trunk_config.dropout_rate
        attn_drop_rate = trunk_config.attention_dropout_rate
        drop_path_rate = trunk_config.drop_path_rate
        hybrid_backbone_string = None
        # TODO Implement hybrid backbones
        if "HYBRID" in trunk_config.keys():
            hybrid_backbone_string = trunk_config.HYBRID
        norm_layer = nn.LayerNorm

        self.num_features = (
            self.embed_dim
        ) = embed_dim  # num_features for consistency with other models

        # TODO : Enable Hybrid Backbones
        if hybrid_backbone_string:
            self.patch_embed = globals()[hybrid_backbone_string](
                out_dim=embed_dim, img_size=img_size)
        # if hybrid_backbone is not None:
        #     self.patch_embed = HybridEmbed(
        #         hybrid_backbone,
        #         img_size=img_size,
        #         in_chans=in_chans,
        #         embed_dim=embed_dim,
        #     )
        else:
            self.patch_embed = PatchEmbed(
                img_size=img_size,
                patch_size=patch_size,
                in_chans=in_chans,
                embed_dim=embed_dim,
            )
        num_patches = self.patch_embed.num_patches

        self.class_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
        self.pos_embedding = nn.Parameter(
            torch.zeros(1, num_patches + 1, embed_dim))
        self.pos_drop = nn.Dropout(p=drop_rate)

        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)
               ]  # stochastic depth decay rule
        self.blocks = nn.ModuleList([
            Block(
                dim=embed_dim,
                num_heads=num_heads,
                mlp_ratio=mlp_ratio,
                qkv_bias=qkv_bias,
                qk_scale=qk_scale,
                drop=drop_rate,
                attn_drop=attn_drop_rate,
                drop_path=dpr[i],
                norm_layer=norm_layer,
            ) for i in range(depth)
        ])
        self.norm = norm_layer(embed_dim)

        # NOTE as per official impl, we could have a pre-logits
        # representation dense layer + tanh here
        # self.repr = nn.Linear(embed_dim, representation_size)
        # self.repr_act = nn.Tanh()

        trunc_normal_(self.pos_embedding, std=0.02)
        trunc_normal_(self.class_token, std=0.02)
        self.apply(self._init_weights)
Exemple #8
0
    def __init__(self, model_config, model_name):
        super().__init__()
        trunk_config = copy.deepcopy(model_config.TRUNK.TRUNK_PARAMS.CONVIT)
        trunk_config.update(model_config.TRUNK.TRUNK_PARAMS.VISION_TRANSFORMERS)

        logging.info("Building model: ConViT from yaml config")
        # Hacky workaround
        trunk_config = AttrDict({k.lower(): v for k, v in trunk_config.items()})

        image_size = trunk_config.image_size
        patch_size = trunk_config.patch_size
        classifier = trunk_config.classifier
        assert image_size % patch_size == 0, "Input shape indivisible by patch size"
        assert classifier in ["token", "gap"], "Unexpected classifier mode"
        n_gpsa_layers = trunk_config.n_gpsa_layers
        class_token_in_local_layers = trunk_config.class_token_in_local_layers
        mlp_dim = trunk_config.mlp_dim
        embed_dim = trunk_config.hidden_dim
        locality_dim = trunk_config.locality_dim
        attention_dropout_rate = trunk_config.attention_dropout_rate
        dropout_rate = trunk_config.dropout_rate
        drop_path_rate = trunk_config.drop_path_rate
        num_layers = trunk_config.num_layers
        locality_strength = trunk_config.locality_strength
        num_heads = trunk_config.num_heads
        qkv_bias = trunk_config.qkv_bias
        qk_scale = trunk_config.qk_scale
        use_local_init = trunk_config.use_local_init

        hybrid_backbone = None
        if "hybrid" in trunk_config.keys():
            hybrid_backbone = trunk_config.hybrid

        in_chans = 3
        # TODO: Make this configurable
        norm_layer = nn.LayerNorm

        self.classifier = classifier
        self.n_gpsa_layers = n_gpsa_layers
        self.class_token_in_local_layers = class_token_in_local_layers
        # For consistency with other models
        self.num_features = self.embed_dim = self.hidden_dim = embed_dim
        self.locality_dim = locality_dim

        # Hybrid backbones not tested
        if hybrid_backbone is not None:
            self.patch_embed = HybridEmbed(
                hybrid_backbone,
                img_size=image_size,
                in_chans=in_chans,
                embed_dim=embed_dim,
            )
        else:
            self.patch_embed = PatchEmbed(
                img_size=image_size,
                patch_size=patch_size,
                in_chans=in_chans,
                embed_dim=embed_dim,
            )

        seq_length = (image_size // patch_size) ** 2
        self.seq_length = seq_length

        self.class_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
        self.pos_embedding = nn.Parameter(torch.zeros(1, seq_length, embed_dim))
        self.pos_drop = nn.Dropout(p=dropout_rate)

        if class_token_in_local_layers:
            seq_length += 1

        # stochastic depth decay rule
        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, num_layers)]

        layers = []
        for i in range(num_layers):
            if i < self.n_gpsa_layers:
                if locality_strength > 0:
                    layer_locality_strength = locality_strength
                else:
                    layer_locality_strength = 1 / (i + 1)
                layers.append(
                    AttentionBlock(
                        attention_module=GPSA,
                        embed_dim=embed_dim,
                        num_heads=num_heads,
                        mlp_dim=mlp_dim,
                        qkv_bias=qkv_bias,
                        qk_scale=qk_scale,
                        dropout_rate=dropout_rate,
                        attention_dropout_rate=attention_dropout_rate,
                        drop_path_rate=dpr[i],
                        norm_layer=norm_layer,
                        locality_strength=layer_locality_strength,
                        locality_dim=self.locality_dim,
                        use_local_init=use_local_init,
                    )
                )
            else:
                layers.append(
                    AttentionBlock(
                        attention_module=SelfAttention,
                        embed_dim=embed_dim,
                        num_heads=num_heads,
                        mlp_dim=mlp_dim,
                        qkv_bias=qkv_bias,
                        qk_scale=qk_scale,
                        dropout_rate=dropout_rate,
                        attention_dropout_rate=attention_dropout_rate,
                        drop_path_rate=dpr[i],
                        norm_layer=norm_layer,
                    )
                )
        self.blocks = nn.ModuleList(layers)
        self.norm = norm_layer(embed_dim)

        trunc_normal_(self.pos_embedding, std=0.02)
        trunc_normal_(self.class_token, std=0.02)
        self.apply(self._init_weights)