def convert_tf_config(json_cfg_path, vocab_size, model_type): """Convert the config file""" with open(json_cfg_path, encoding='utf-8') as f: json_cfg = json.load(f) if model_type == 'bert': # For bert model, the config file are copied from local configuration file # leaving the vocab_size indistinguishable. Actually, the verification of # vocab_size would be done in the process of embedding weights conversion. cfg = BertModel.get_cfg().clone() elif model_type == 'albert': assert vocab_size == json_cfg['vocab_size'] cfg = AlbertModel.get_cfg().clone() else: raise NotImplementedError cfg.defrost() cfg.MODEL.vocab_size = vocab_size cfg.MODEL.units = json_cfg['hidden_size'] cfg.MODEL.hidden_size = json_cfg['intermediate_size'] cfg.MODEL.max_length = json_cfg['max_position_embeddings'] cfg.MODEL.num_heads = json_cfg['num_attention_heads'] cfg.MODEL.num_layers = json_cfg['num_hidden_layers'] cfg.MODEL.pos_embed_type = 'learned' if json_cfg['hidden_act'] == 'gelu': cfg.MODEL.activation = 'gelu(tanh)' else: cfg.MODEL.activation = json_cfg['hidden_act'] cfg.MODEL.layer_norm_eps = 1E-12 cfg.MODEL.num_token_types = json_cfg['type_vocab_size'] cfg.MODEL.hidden_dropout_prob = float(json_cfg['hidden_dropout_prob']) cfg.MODEL.attention_dropout_prob = float( json_cfg['attention_probs_dropout_prob']) cfg.MODEL.dtype = 'float32' cfg.INITIALIZER.weight = ['truncnorm', 0, json_cfg['initializer_range'] ] # TruncNorm(0, 0.02) cfg.INITIALIZER.bias = ['zeros'] cfg.VERSION = 1 if model_type == 'albert': # The below configurations are not supported in bert cfg.MODEL.embed_size = json_cfg['embedding_size'] cfg.MODEL.num_groups = json_cfg['num_hidden_groups'] cfg.freeze() return cfg
def get_test_cfg(): vocab_size = 500 num_token_types = 3 num_layers = 3 num_heads = 2 units = 64 hidden_size = 96 hidden_dropout_prob = 0.0 attention_dropout_prob = 0.0 cfg = AlbertModel.get_cfg().clone() cfg.defrost() cfg.MODEL.vocab_size = vocab_size cfg.MODEL.num_token_types = num_token_types cfg.MODEL.units = units cfg.MODEL.hidden_size = hidden_size cfg.MODEL.num_heads = num_heads cfg.MODEL.num_layers = num_layers cfg.MODEL.hidden_dropout_prob = hidden_dropout_prob cfg.MODEL.attention_dropout_prob = attention_dropout_prob return cfg