def transform_gpt2_to_texar_config(input_json_path): """ Remap the config file """ config_gpt = json.loads(open(input_json_path).read()) configs = dict() configs["vocab_size"] = config_gpt["n_vocab"] configs["context_size"] = config_gpt["n_ctx"] configs["embedding_size"] = config_gpt["n_embd"] hidden_dim = config_gpt["n_embd"] configs["embed"] = { "dim": hidden_dim, } configs["position_size"] = config_gpt["n_ctx"] configs["pos_embed"] = {"dim": hidden_dim} configs["decoder"] = { "dim": hidden_dim, "num_blocks": config_gpt["n_layer"], "use_gpt_config": True, "embedding_dropout": 0, "residual_dropout": 0, "multihead_attention": { "use_bias": True, "num_units": hidden_dim, "num_heads": config_gpt["n_head"], "output_dim": hidden_dim, }, "initializer": { "type": "variance_scaling_initializer", "kwargs": { "scale": 1.0, "mode": "FAN_AVG", "uniform": True, }, }, "poswise_feedforward": { "layers": [{ "type": "Linear", "kwargs": { "in_features": hidden_dim, "out_features": hidden_dim * 4, "bias": True, } }, { "type": "GPTGELU", "kwargs": {} }, { "type": "Linear", "kwargs": { "in_features": hidden_dim * 4, "out_features": hidden_dim, "bias": True, } }], "name": "ffn", }, } return HParams(configs, default_hparams=None)
def transform_gpt2_to_texar_config(input_json_path): """ Remap the config file """ config_gpt = json.loads(open(input_json_path).read()) configs = dict() configs["vocab_size"] = config_gpt["n_vocab"] configs["context_size"] = config_gpt["n_ctx"] configs["embedding_size"] = config_gpt["n_embd"] hidden_dim = config_gpt["n_embd"] configs['embed'] = { 'dim': hidden_dim, } configs['position_size'] = config_gpt['n_ctx'], configs['pos_embed'] = { 'dim': hidden_dim } configs['decoder'] = { 'dim': hidden_dim, 'num_blocks': config_gpt['n_layer'], 'multihead_attention': { 'use_bias': True, 'num_units': hidden_dim, 'num_heads': config_gpt['n_head'], 'output_dim': hidden_dim, }, 'initializer': { 'type': 'variance_scaling_initializer', 'kwargs': { 'scale': 1.0, 'mode': 'fan_avg', 'distribution': 'uniform', }, }, 'poswise_feedforward': { "layers": [ { "type": "Dense", "kwargs": { "name": "conv1", "units": hidden_dim*4, "activation": "gelu", "use_bias": True, } }, { "type": "Dense", "kwargs": { "name": "conv2", "units": hidden_dim, "use_bias": True, } } ], "name": "ffn", }, } return HParams(configs, default_hparams=None)
def transform_bert_to_texar_config(input_json): config_ckpt = json.loads(open(input_json).read()) configs = {} configs['random_seed'] = 123 configs['hidden_size'] = config_ckpt['hidden_size'] hidden_dim = config_ckpt['hidden_size'] configs['embed'] = {'name': 'word_embeddings', 'dim': hidden_dim} configs['vocab_size'] = config_ckpt['vocab_size'] configs['segment_embed'] = { 'name': 'token_type_embeddings', 'dim': hidden_dim } configs['type_vocab_size'] = config_ckpt['type_vocab_size'] configs['encoder'] = { 'name': 'encoder', 'position_embedder_type': 'variables', 'position_size': config_ckpt['max_position_embeddings'], 'position_embedder_hparams': { 'dim': hidden_dim, }, 'embedding_dropout': config_ckpt['hidden_dropout_prob'], 'num_blocks': config_ckpt['num_hidden_layers'], 'multihead_attention': { 'use_bias': True, 'num_units': hidden_dim, 'num_heads': config_ckpt['num_attention_heads'], 'output_dim': hidden_dim, 'dropout_rate': config_ckpt['attention_probs_dropout_prob'], 'name': 'self' }, 'residual_dropout': config_ckpt['hidden_dropout_prob'], 'dim': hidden_dim, 'use_bert_config': True, 'poswise_feedforward': { "layers": [ { 'type': 'Dense', 'kwargs': { 'name': 'intermediate', 'units': config_ckpt['intermediate_size'], 'activation': config_ckpt['hidden_act'], 'use_bias': True, } }, { 'type': 'Dense', 'kwargs': { 'name': 'output', 'units': hidden_dim, 'activation': None, 'use_bias': True, } }, ], }, } return HParams(configs, default_hparams=None)
def transform_gpt2_to_texar_config(input_json_path): """ Remap the config file """ config_gpt = json.loads(open(input_json_path).read()) configs = dict() configs["vocab_size"] = config_gpt["n_vocab"] configs["context_size"] = config_gpt["n_ctx"] configs["embedding_size"] = config_gpt["n_embd"] hidden_dim = config_gpt["n_embd"] configs["embed"] = { "dim": hidden_dim, } configs["position_size"] = config_gpt["n_ctx"] configs["pos_embed"] = {"dim": hidden_dim} configs["decoder"] = { "dim": hidden_dim, "num_blocks": config_gpt["n_layer"], "multihead_attention": { "use_bias": True, "num_units": hidden_dim, "num_heads": config_gpt["n_head"], "output_dim": hidden_dim, }, "initializer": { "type": "variance_scaling_initializer", "kwargs": { "scale": 1.0, "mode": "fan_avg", "distribution": "uniform", }, }, "poswise_feedforward": { "layers": [{ "type": "Dense", "kwargs": { "name": "conv1", "units": hidden_dim * 4, "activation": "gelu", "use_bias": True, } }, { "type": "Dense", "kwargs": { "name": "conv2", "units": hidden_dim, "use_bias": True, } }], "name": "ffn", }, } return HParams(configs, default_hparams=None)
def __init__(self, data_hparams, hparams=None): ModelBase.__init__(self, hparams) self._data_hparams = HParams(data_hparams, PairedTextData.default_hparams()) self._src_vocab = None self._tgt_vocab = None self._src_embedder = None self._tgt_embedder = None self._connector = None self._encoder = None self._decoder = None
def __init__(self, hparams=None): self._hparams = HParams(hparams, self.default_hparams(), allow_new_hparam=True)