def get_swin_config(swin_name):
    config = SwinConfig()
    name_split = swin_name.split("_")

    model_size = name_split[1]
    img_size = int(name_split[4])
    window_size = int(name_split[3][-1])

    if model_size == "tiny":
        embed_dim = 96
        depths = (2, 2, 6, 2)
        num_heads = (3, 6, 12, 24)
    elif model_size == "small":
        embed_dim = 96
        depths = (2, 2, 18, 2)
        num_heads = (3, 6, 12, 24)
    elif model_size == "base":
        embed_dim = 128
        depths = (2, 2, 18, 2)
        num_heads = (4, 8, 16, 32)
    else:
        embed_dim = 192
        depths = (2, 2, 18, 2)
        num_heads = (6, 12, 24, 48)

    if "in22k" in swin_name:
        num_classes = 21841
    else:
        num_classes = 1000
        repo_id = "datasets/huggingface/label-files"
        filename = "imagenet-1k-id2label.json"
        id2label = json.load(
            open(cached_download(hf_hub_url(repo_id, filename)), "r"))
        id2label = {int(k): v for k, v in id2label.items()}
        config.id2label = id2label
        config.label2id = {v: k for k, v in id2label.items()}

    config.image_size = img_size
    config.num_labels = num_classes
    config.embed_dim = embed_dim
    config.depths = depths
    config.num_heads = num_heads
    config.window_size = window_size

    return config
 def get_config(self):
     return MaskFormerConfig.from_backbone_and_decoder_configs(
         backbone_config=SwinConfig(depths=[1, 1, 1, 1], ),
         decoder_config=DetrConfig(
             decoder_ffn_dim=128,
             num_queries=self.num_queries,
             decoder_attention_heads=2,
             d_model=self.mask_feature_size,
         ),
         mask_feature_size=self.mask_feature_size,
         fpn_feature_size=self.mask_feature_size,
         num_channels=self.num_channels,
         num_labels=self.num_labels,
     )
Example #3
0
 def get_config(self):
     return SwinConfig(
         image_size=self.image_size,
         patch_size=self.patch_size,
         num_channels=self.num_channels,
         embed_dim=self.embed_dim,
         depths=self.depths,
         num_heads=self.num_heads,
         window_size=self.window_size,
         mlp_ratio=self.mlp_ratio,
         qkv_bias=self.qkv_bias,
         hidden_dropout_prob=self.hidden_dropout_prob,
         attention_probs_dropout_prob=self.attention_probs_dropout_prob,
         drop_path_rate=self.drop_path_rate,
         hidden_act=self.hidden_act,
         use_absolute_embeddings=self.use_absolute_embeddings,
         path_norm=self.patch_norm,
         layer_norm_eps=self.layer_norm_eps,
         initializer_range=self.initializer_range,
     )