def get_config(self):
     return SEWDConfig(
         hidden_size=self.hidden_size,
         feat_extract_norm=self.feat_extract_norm,
         feat_extract_dropout=self.feat_extract_dropout,
         feat_extract_activation=self.feat_extract_activation,
         conv_dim=self.conv_dim,
         conv_stride=self.conv_stride,
         conv_kernel=self.conv_kernel,
         conv_bias=self.conv_bias,
         num_conv_pos_embeddings=self.num_conv_pos_embeddings,
         num_conv_pos_embedding_groups=self.num_conv_pos_embedding_groups,
         squeeze_factor=self.squeeze_factor,
         max_position_embeddings=self.max_position_embeddings,
         position_buckets=self.position_buckets,
         share_att_key=self.share_att_key,
         relative_attention=self.relative_attention,
         position_biased_input=self.position_biased_input,
         pos_att_type=self.pos_att_type,
         norm_rel_ebd=self.norm_rel_ebd,
         num_hidden_layers=self.num_hidden_layers,
         num_attention_heads=self.num_attention_heads,
         hidden_dropout=self.hidden_dropout,
         intermediate_size=self.intermediate_size,
         layer_norm_eps=self.layer_norm_eps,
         hidden_act=self.hidden_act,
         initializer_range=self.initializer_range,
         vocab_size=self.vocab_size,
     )
def convert_config(model, is_finetuned):
    config = SEWDConfig()
    if is_finetuned:
        fs_config = model.w2v_encoder.w2v_model.cfg
    else:
        fs_config = model.cfg

    config.conv_bias = fs_config.conv_bias
    conv_layers = eval(fs_config.conv_feature_layers)
    config.conv_dim = [x[0] for x in conv_layers]
    config.conv_kernel = [x[1] for x in conv_layers]
    config.conv_stride = [x[2] for x in conv_layers]
    config.feat_extract_activation = "gelu"
    config.feat_extract_norm = "layer" if fs_config.extractor_mode == "layer_norm" else "group"
    config.final_dropout = 0.0
    config.hidden_act = fs_config.activation_fn.name
    config.hidden_size = fs_config.encoder_embed_dim
    config.initializer_range = 0.02
    config.intermediate_size = fs_config.encoder_ffn_embed_dim
    config.layer_norm_eps = 1e-5
    config.layerdrop = fs_config.encoder_layerdrop
    config.num_attention_heads = fs_config.encoder_attention_heads
    config.num_conv_pos_embedding_groups = fs_config.conv_pos_groups
    config.num_conv_pos_embeddings = fs_config.conv_pos
    config.num_feat_extract_layers = len(conv_layers)
    config.num_hidden_layers = fs_config.encoder_layers
    config.squeeze_factor = fs_config.squeeze_factor
    # DeBERTa-specific parameters:
    config.max_position_embeddings = fs_config.max_position_embeddings
    config.position_buckets = fs_config.position_buckets
    config.share_att_key = fs_config.share_att_key
    config.relative_attention = fs_config.relative_attention
    config.position_biased_input = fs_config.position_biased_input
    config.pos_att_type = tuple(fs_config.pos_att_type.split("|"))
    config.norm_rel_ebd = fs_config.norm_rel_ebd

    # take care of any params that are overridden by the Wav2VecCtc model
    if is_finetuned:
        fs_config = model.cfg
        config.final_dropout = fs_config.final_dropout
        config.layerdrop = fs_config.layerdrop
    config.activation_dropout = fs_config.activation_dropout
    config.apply_spec_augment = fs_config.mask_prob > 0 or fs_config.mask_channel_prob > 0
    config.attention_dropout = fs_config.attention_dropout
    config.feat_proj_dropout = fs_config.dropout_input
    config.hidden_dropout = fs_config.dropout
    config.mask_feature_length = fs_config.mask_channel_length
    config.mask_feature_prob = fs_config.mask_channel_prob
    config.mask_time_length = fs_config.mask_length
    config.mask_time_prob = fs_config.mask_prob

    config.feature_extractor_type = "Wav2Vec2FeatureExtractor"
    config.tokenizer_class = "Wav2Vec2CTCTokenizer"

    return config