def get_swin_config(swin_name): config = SwinConfig() name_split = swin_name.split("_") model_size = name_split[1] img_size = int(name_split[4]) window_size = int(name_split[3][-1]) if model_size == "tiny": embed_dim = 96 depths = (2, 2, 6, 2) num_heads = (3, 6, 12, 24) elif model_size == "small": embed_dim = 96 depths = (2, 2, 18, 2) num_heads = (3, 6, 12, 24) elif model_size == "base": embed_dim = 128 depths = (2, 2, 18, 2) num_heads = (4, 8, 16, 32) else: embed_dim = 192 depths = (2, 2, 18, 2) num_heads = (6, 12, 24, 48) if "in22k" in swin_name: num_classes = 21841 else: num_classes = 1000 repo_id = "datasets/huggingface/label-files" filename = "imagenet-1k-id2label.json" id2label = json.load( open(cached_download(hf_hub_url(repo_id, filename)), "r")) id2label = {int(k): v for k, v in id2label.items()} config.id2label = id2label config.label2id = {v: k for k, v in id2label.items()} config.image_size = img_size config.num_labels = num_classes config.embed_dim = embed_dim config.depths = depths config.num_heads = num_heads config.window_size = window_size return config
def get_config(self): return MaskFormerConfig.from_backbone_and_decoder_configs( backbone_config=SwinConfig(depths=[1, 1, 1, 1], ), decoder_config=DetrConfig( decoder_ffn_dim=128, num_queries=self.num_queries, decoder_attention_heads=2, d_model=self.mask_feature_size, ), mask_feature_size=self.mask_feature_size, fpn_feature_size=self.mask_feature_size, num_channels=self.num_channels, num_labels=self.num_labels, )
def get_config(self): return SwinConfig( image_size=self.image_size, patch_size=self.patch_size, num_channels=self.num_channels, embed_dim=self.embed_dim, depths=self.depths, num_heads=self.num_heads, window_size=self.window_size, mlp_ratio=self.mlp_ratio, qkv_bias=self.qkv_bias, hidden_dropout_prob=self.hidden_dropout_prob, attention_probs_dropout_prob=self.attention_probs_dropout_prob, drop_path_rate=self.drop_path_rate, hidden_act=self.hidden_act, use_absolute_embeddings=self.use_absolute_embeddings, path_norm=self.patch_norm, layer_norm_eps=self.layer_norm_eps, initializer_range=self.initializer_range, )