def extract_low_shot_features(args: Namespace, cfg: AttrDict, output_dir: str): dataset_name = cfg["SVM"]["low_shot"]["dataset_name"] k_values = cfg["SVM"]["low_shot"]["k_values"] sample_inds = cfg["SVM"]["low_shot"]["sample_inds"] if "voc" in dataset_name: # extract the features. In case of voc07 low-shot, we extract the # features on full train and test sets. Both sets have about 5K images # we extract launch_distributed( cfg, args.node_id, engine_name="extract_features", hook_generator=default_hook_generator, ) elif "places" in dataset_name: # in case of places, since the features size could become large, we need # to extract features at smaller subsamples data_paths, label_paths = dataset_catalog.get_data_files( split="TRAIN", dataset_config=cfg["DATA"]) targets = load_file(label_paths[0]) logging.info("Generating low-shot samples for Places205...") generate_places_low_shot_samples(targets, k_values, sample_inds, output_dir, data_paths[0]) test_features_extracted = False for idx in sample_inds: for k in k_values: out_img_file = f"{output_dir}/train_images_sample{idx}_k{k}.npy" out_lbls_file = f"{output_dir}/train_labels_sample{idx}_k{k}.npy" cfg.DATA.TRAIN.DATA_PATHS = [out_img_file] cfg.DATA.TRAIN.LABEL_PATHS = [out_lbls_file] cfg.CHECKPOINT.DIR = f"{output_dir}/sample{idx}_k{k}" logging.info( f"Extracting features for places low shot: sample{idx}_k{k}" ) # we want to extract the test features only once since the test # features are commonly used for testing for all low-shot setup. if test_features_extracted: cfg.TEST_MODEL = False launch_distributed( cfg, args.node_id, engine_name="extract_features", hook_generator=default_hook_generator, ) test_features_extracted = True # set the test model to true again after feature extraction is done cfg.TEST_MODEL = True else: raise RuntimeError(f"Dataset not recognised: {dataset_name}")
def __init__(self, loss_config: AttrDict): """ Intializer for the sum cross-entropy loss. For a single tensor, this is equivalent to the cross-entropy loss. For a list of tensors, this computes the sum of the cross-entropy losses for each tensor in the list against the target. Config params: reduction: specifies reduction to apply to the output, optional normalize_output: Whether to L2 normalize the outputs world_size: total number of gpus in training. automatically inferred by vissl """ super(BCELogitsMultipleOutputSingleTargetLoss, self).__init__() self.loss_config = loss_config self._losses = torch.nn.modules.ModuleList([]) self._reduction = loss_config.get("reduction", "none") self._normalize_output = loss_config.get("normalize_output", False) self._world_size = loss_config["world_size"]
class TestMLP(unittest.TestCase): """ Unit test to verify that correct construction of MLP layers and linear evaluation MLP layers """ MODEL_CONFIG = AttrDict( { "HEAD": { "BATCHNORM_EPS": 1e-6, "BATCHNORM_MOMENTUM": 0.99, "PARAMS_MULTIPLIER": 1.0, } } ) def test_mlp(self): mlp = MLP(self.MODEL_CONFIG, dims=[2048, 100]) x = torch.randn(size=(4, 2048)) out = mlp(x) assert out.shape == torch.Size([4, 100]) x = torch.randn(size=(1, 2048)) out = mlp(x) assert out.shape == torch.Size([1, 100]) def test_mlp_reshaping(self): mlp = MLP(self.MODEL_CONFIG, dims=[2048, 100]) x = torch.randn(size=(1, 2048, 1, 1)) out = mlp(x) assert out.shape == torch.Size([1, 100]) def test_mlp_catch_bad_shapes(self): mlp = MLP(self.MODEL_CONFIG, dims=[2048, 100]) x = torch.randn(size=(1, 2048, 2, 1)) with self.assertRaises(AssertionError) as context: mlp(x) assert context.exception is not None def test_eval_mlp_shape(self): eval_mlp = LinearEvalMLP( self.MODEL_CONFIG, in_channels=2048, dims=[2048 * 2 * 2, 1000], ) resnet_feature_map = torch.randn(size=(4, 2048, 2, 2)) out = eval_mlp(resnet_feature_map) assert out.shape == torch.Size([4, 1000]) resnet_feature_map = torch.randn(size=(1, 2048, 2, 2)) out = eval_mlp(resnet_feature_map) assert out.shape == torch.Size([1, 1000])
def _copy_to_local(cfg: AttrDict): available_splits = _get_available_splits(cfg) for split in available_splits: if cfg.DATA[split].COPY_TO_LOCAL_DISK: dest_dir = cfg.DATA[split]["COPY_DESTINATION_DIR"] tmp_dest_dir = tempfile.mkdtemp() data_files, label_files = get_data_files(split, cfg.DATA) data_files.extend(label_files) _, output_dir = copy_data_to_local( data_files, dest_dir, tmp_destination_dir=tmp_dest_dir) cfg.DATA[split]["COPY_DESTINATION_DIR"] = output_dir
def _get_data_limit_sampling(cfg: AttrDict, split: str) -> AttrDict: default_sampling = AttrDict( {"SEED": 0, "IS_BALANCED": False, "SKIP_NUM_SAMPLES": 0} ) return cfg["DATA"][split].get("DATA_LIMIT_SAMPLING", default_sampling)
def __init__(self, meters_config: AttrDict): self.num_classes = meters_config.get("num_classes") self._total_sample_count = None self._curr_sample_count = None self.reset()
def __init__(self, model_config: AttrDict, model_name: str): super().__init__() assert model_config.INPUT_TYPE in ["rgb", "bgr"], "Input type not supported" trunk_config = copy.deepcopy( model_config.TRUNK.TRUNK_PARAMS.VISION_TRANSFORMERS) logging.info("Building model: Vision Transformer from yaml config") # Hacky workaround trunk_config = AttrDict( {k.lower(): v for k, v in trunk_config.items()}) img_size = trunk_config.image_size patch_size = trunk_config.patch_size in_chans = 3 embed_dim = trunk_config.hidden_dim depth = trunk_config.num_layers num_heads = trunk_config.num_heads mlp_ratio = 4.0 qkv_bias = trunk_config.qkv_bias qk_scale = trunk_config.qk_scale drop_rate = trunk_config.dropout_rate attn_drop_rate = trunk_config.attention_dropout_rate drop_path_rate = trunk_config.drop_path_rate hybrid_backbone_string = None # TODO Implement hybrid backbones if "HYBRID" in trunk_config.keys(): hybrid_backbone_string = trunk_config.HYBRID norm_layer = nn.LayerNorm self.num_features = ( self.embed_dim ) = embed_dim # num_features for consistency with other models # TODO : Enable Hybrid Backbones if hybrid_backbone_string: self.patch_embed = globals()[hybrid_backbone_string]( out_dim=embed_dim, img_size=img_size) # if hybrid_backbone is not None: # self.patch_embed = HybridEmbed( # hybrid_backbone, # img_size=img_size, # in_chans=in_chans, # embed_dim=embed_dim, # ) else: self.patch_embed = PatchEmbed( img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim, ) num_patches = self.patch_embed.num_patches self.class_token = nn.Parameter(torch.zeros(1, 1, embed_dim)) self.pos_embedding = nn.Parameter( torch.zeros(1, num_patches + 1, embed_dim)) self.pos_drop = nn.Dropout(p=drop_rate) dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth) ] # stochastic depth decay rule self.blocks = nn.ModuleList([ Block( dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale, drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer, ) for i in range(depth) ]) self.norm = norm_layer(embed_dim) # NOTE as per official impl, we could have a pre-logits # representation dense layer + tanh here # self.repr = nn.Linear(embed_dim, representation_size) # self.repr_act = nn.Tanh() trunc_normal_(self.pos_embedding, std=0.02) trunc_normal_(self.class_token, std=0.02) self.apply(self._init_weights)
def __init__(self, model_config, model_name): super().__init__() trunk_config = copy.deepcopy(model_config.TRUNK.TRUNK_PARAMS.CONVIT) trunk_config.update(model_config.TRUNK.TRUNK_PARAMS.VISION_TRANSFORMERS) logging.info("Building model: ConViT from yaml config") # Hacky workaround trunk_config = AttrDict({k.lower(): v for k, v in trunk_config.items()}) image_size = trunk_config.image_size patch_size = trunk_config.patch_size classifier = trunk_config.classifier assert image_size % patch_size == 0, "Input shape indivisible by patch size" assert classifier in ["token", "gap"], "Unexpected classifier mode" n_gpsa_layers = trunk_config.n_gpsa_layers class_token_in_local_layers = trunk_config.class_token_in_local_layers mlp_dim = trunk_config.mlp_dim embed_dim = trunk_config.hidden_dim locality_dim = trunk_config.locality_dim attention_dropout_rate = trunk_config.attention_dropout_rate dropout_rate = trunk_config.dropout_rate drop_path_rate = trunk_config.drop_path_rate num_layers = trunk_config.num_layers locality_strength = trunk_config.locality_strength num_heads = trunk_config.num_heads qkv_bias = trunk_config.qkv_bias qk_scale = trunk_config.qk_scale use_local_init = trunk_config.use_local_init hybrid_backbone = None if "hybrid" in trunk_config.keys(): hybrid_backbone = trunk_config.hybrid in_chans = 3 # TODO: Make this configurable norm_layer = nn.LayerNorm self.classifier = classifier self.n_gpsa_layers = n_gpsa_layers self.class_token_in_local_layers = class_token_in_local_layers # For consistency with other models self.num_features = self.embed_dim = self.hidden_dim = embed_dim self.locality_dim = locality_dim # Hybrid backbones not tested if hybrid_backbone is not None: self.patch_embed = HybridEmbed( hybrid_backbone, img_size=image_size, in_chans=in_chans, embed_dim=embed_dim, ) else: self.patch_embed = PatchEmbed( img_size=image_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim, ) seq_length = (image_size // patch_size) ** 2 self.seq_length = seq_length self.class_token = nn.Parameter(torch.zeros(1, 1, embed_dim)) self.pos_embedding = nn.Parameter(torch.zeros(1, seq_length, embed_dim)) self.pos_drop = nn.Dropout(p=dropout_rate) if class_token_in_local_layers: seq_length += 1 # stochastic depth decay rule dpr = [x.item() for x in torch.linspace(0, drop_path_rate, num_layers)] layers = [] for i in range(num_layers): if i < self.n_gpsa_layers: if locality_strength > 0: layer_locality_strength = locality_strength else: layer_locality_strength = 1 / (i + 1) layers.append( AttentionBlock( attention_module=GPSA, embed_dim=embed_dim, num_heads=num_heads, mlp_dim=mlp_dim, qkv_bias=qkv_bias, qk_scale=qk_scale, dropout_rate=dropout_rate, attention_dropout_rate=attention_dropout_rate, drop_path_rate=dpr[i], norm_layer=norm_layer, locality_strength=layer_locality_strength, locality_dim=self.locality_dim, use_local_init=use_local_init, ) ) else: layers.append( AttentionBlock( attention_module=SelfAttention, embed_dim=embed_dim, num_heads=num_heads, mlp_dim=mlp_dim, qkv_bias=qkv_bias, qk_scale=qk_scale, dropout_rate=dropout_rate, attention_dropout_rate=attention_dropout_rate, drop_path_rate=dpr[i], norm_layer=norm_layer, ) ) self.blocks = nn.ModuleList(layers) self.norm = norm_layer(embed_dim) trunc_normal_(self.pos_embedding, std=0.02) trunc_normal_(self.class_token, std=0.02) self.apply(self._init_weights)