def __get_kobert_model(self):
     use_pooler = True
     use_decoder = False
     use_classifier = False
     vocab_path = con.SENTIMENT_UTIL_PATH["tokenizer"]
     vocab_b_obj = nlp.vocab.BERTVocab.from_sentencepiece(
         vocab_path, padding_token="[PAD]")
     predefined_args = con.SENTIMENT_BERT_CONFIG
     encoder = BERTEncoder(  #attention_cell=predefined_args["attention_cell"],
         num_layers=predefined_args["num_layers"],
         units=predefined_args["units"],
         hidden_size=predefined_args["hidden_size"],
         max_length=predefined_args["max_length"],
         num_heads=predefined_args["num_heads"],
         #scaled=predefined_args["scaled"],
         dropout=predefined_args["dropout"],
         output_attention=True,
         output_all_encodings=False,
         #use_residual=predefined_args["use_residual"])
     )
     net = BERTModel(
         encoder,
         len(vocab_b_obj.idx_to_token),
         token_type_vocab_size=predefined_args["token_type_vocab_size"],
         units=predefined_args["units"],
         embed_size=predefined_args["embed_size"],
         #embed_dropout=predefined_args["embed_dropout"],
         word_embed=predefined_args["word_embed"],
         use_pooler=use_pooler,
         use_decoder=use_decoder,
         use_classifier=use_classifier)
     net.initialize(ctx=self.ctx)
     net.load_parameters(self.kobert_path, self.ctx, ignore_extra=True)
     return (net, vocab_b_obj)
Exemple #2
0
    def get_kobert_model(
            model_file,
            vocab_file,
            use_pooler=True,
            use_decoder=True,
            use_classifier=True,
            ctx=mx.cpu(0),
    ):
        vocab_b_obj = nlp.vocab.BERTVocab.from_sentencepiece(
            vocab_file, padding_token="[PAD]")

        predefined_args = {
            "attention_cell": "multi_head",
            "num_layers": 12,
            "units": 768,
            "hidden_size": 3072,
            "max_length": 512,
            "num_heads": 12,
            "scaled": True,
            "dropout": 0.1,
            "use_residual": True,
            "embed_size": 768,
            "embed_dropout": 0.1,
            "token_type_vocab_size": 2,
            "word_embed": None,
        }

        encoder = BERTEncoder(
            num_layers=predefined_args["num_layers"],
            units=predefined_args["units"],
            hidden_size=predefined_args["hidden_size"],
            max_length=predefined_args["max_length"],
            num_heads=predefined_args["num_heads"],
            dropout=predefined_args["dropout"],
            output_attention=False,
            output_all_encodings=False,
        )

        # BERT
        net = BERTModel(
            encoder,
            len(vocab_b_obj.idx_to_token),
            token_type_vocab_size=predefined_args["token_type_vocab_size"],
            units=predefined_args["units"],
            embed_size=predefined_args["embed_size"],
            word_embed=predefined_args["word_embed"],
            use_pooler=use_pooler,
            use_decoder=use_decoder,
            use_classifier=use_classifier,
        )
        net.initialize(ctx=ctx)
        net.load_parameters(model_file, ctx, ignore_extra=True)
        return (net, vocab_b_obj)
Exemple #3
0
def get_kobert_model(model_file,
                     vocab_file,
                     use_pooler=True,
                     use_decoder=True,
                     use_classifier=True,
                     ctx=mx.cpu(0)):
    vocab_b_obj = nlp.vocab.BERTVocab.from_json(open(vocab_file, 'rt').read())

    predefined_args = {
        'attention_cell': 'multi_head',
        'num_layers': 12,
        'units': 768,
        'hidden_size': 3072,
        'max_length': 512,
        'num_heads': 12,
        'scaled': True,
        'dropout': 0.1,
        'use_residual': True,
        'embed_size': 768,
        'embed_dropout': 0.1,
        'token_type_vocab_size': 2,
        'word_embed': None,
    }

    encoder = BERTEncoder(attention_cell=predefined_args['attention_cell'],
                          num_layers=predefined_args['num_layers'],
                          units=predefined_args['units'],
                          hidden_size=predefined_args['hidden_size'],
                          max_length=predefined_args['max_length'],
                          num_heads=predefined_args['num_heads'],
                          scaled=predefined_args['scaled'],
                          dropout=predefined_args['dropout'],
                          output_attention=False,
                          output_all_encodings=False,
                          use_residual=predefined_args['use_residual'])

    # BERT
    net = BERTModel(
        encoder,
        len(vocab_b_obj.idx_to_token),
        token_type_vocab_size=predefined_args['token_type_vocab_size'],
        units=predefined_args['units'],
        embed_size=predefined_args['embed_size'],
        embed_dropout=predefined_args['embed_dropout'],
        word_embed=predefined_args['word_embed'],
        use_pooler=use_pooler,
        use_decoder=use_decoder,
        use_classifier=use_classifier)
    net.initialize(ctx=ctx)
    net.load_parameters(model_file, ctx, ignore_extra=True)
    return (net, vocab_b_obj)
def initialize_model(vocab_file,
                     use_pooler,
                     use_decoder,
                     use_classifier,
                     ctx=mx.cpu(0)):

    vocab_b_obj = nlp.vocab.BERTVocab.from_sentencepiece(vocab_file,
                                                         padding_token='[PAD]')
    predefined_args = {
        'num_layers': 12,
        'units': 768,
        'hidden_size': 3072,
        'max_length': 512,
        'num_heads': 12,
        'dropout': 0.1,
        'embed_size': 768,
        'token_type_vocab_size': 2,
        'word_embed': None,
    }

    encoder = BERTEncoder(num_layers=predefined_args['num_layers'],
                          units=predefined_args['units'],
                          hidden_size=predefined_args['hidden_size'],
                          max_length=predefined_args['max_length'],
                          num_heads=predefined_args['num_heads'],
                          dropout=predefined_args['dropout'],
                          output_attention=False,
                          output_all_encodings=False)
    # BERT
    net = BERTModel(
        encoder,
        len(vocab_b_obj.idx_to_token),
        token_type_vocab_size=predefined_args['token_type_vocab_size'],
        units=predefined_args['units'],
        embed_size=predefined_args['embed_size'],
        word_embed=predefined_args['word_embed'],
        use_pooler=use_pooler,
        use_decoder=use_decoder,
        use_classifier=use_classifier)

    net.initialize(ctx=ctx)
    return vocab_b_obj, net
    embedding[source_idx][:] = dst
    embedding[dst_idx][:] = source
logging.info('total number of tf parameters = %d', len(tf_tensors))
logging.info(
    'total number of mx parameters = %d (including decoder param for weight tying)',
    len(mx_tensors))

# XXX assume no changes in BERT configs
predefined_args = bert_hparams[args.model]

# BERT encoder
encoder = BERTEncoder(attention_cell=predefined_args['attention_cell'],
                      num_layers=predefined_args['num_layers'],
                      units=predefined_args['units'],
                      hidden_size=predefined_args['hidden_size'],
                      max_length=predefined_args['max_length'],
                      num_heads=predefined_args['num_heads'],
                      scaled=predefined_args['scaled'],
                      dropout=predefined_args['dropout'],
                      use_residual=predefined_args['use_residual'])

# BERT model
bert = BERTModel(
    encoder,
    len(vocab),
    token_type_vocab_size=predefined_args['token_type_vocab_size'],
    units=predefined_args['units'],
    embed_size=predefined_args['embed_size'],
    embed_dropout=predefined_args['embed_dropout'],
    word_embed=predefined_args['word_embed'],
    use_pooler=True,
def save_model(new_gluon_parameters, output_dir):
    print('save model start'.center(60, '='))
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    # save model
    # load vocab
    vocab_f = open(os.path.join(output_dir, "vocab.txt"),
                   "wt",
                   encoding='utf-8')
    with open(args.ernie_vocab_path, "rt", encoding='utf-8') as f:
        for line in f:
            data = line.strip().split("\t")
            vocab_f.writelines(data[0] + "\n")
    vocab_f.close()
    vocab = tf_vocab_to_gluon_vocab(
        load_text_vocab(os.path.join(output_dir, "vocab.txt")))
    # vocab serialization
    tmp_file_path = os.path.expanduser(os.path.join(output_dir, 'tmp'))
    if not os.path.exists(os.path.join(args.out_dir)):
        os.makedirs(os.path.join(args.out_dir))
    with open(tmp_file_path, 'w') as f:
        f.write(vocab.to_json())
    hash_full, hash_short = get_hash(tmp_file_path)
    gluon_vocab_path = os.path.expanduser(
        os.path.join(output_dir, hash_short + '.vocab'))
    with open(gluon_vocab_path, 'w') as f:
        f.write(vocab.to_json())
        logging.info('vocab file saved to %s. hash = %s', gluon_vocab_path,
                     hash_full)

    # BERT config
    tf_config_names_to_gluon_config_names = {
        'attention_probs_dropout_prob': 'dropout',
        'hidden_act': None,
        'hidden_dropout_prob': 'dropout',
        'hidden_size': 'units',
        'initializer_range': None,
        # 'intermediate_size': 'hidden_size',
        'max_position_embeddings': 'max_length',
        'num_attention_heads': 'num_heads',
        'num_hidden_layers': 'num_layers',
        'type_vocab_size': 'token_type_vocab_size',
        'vocab_size': None
    }
    predefined_args = bert_hparams[args.gluon_bert_model_base]
    with open(args.ernie_config_path, 'r') as f:
        tf_config = json.load(f)
        if 'layer_norm_eps' in tf_config:  # ignore layer_norm_eps
            del tf_config['layer_norm_eps']
        assert len(tf_config) == len(tf_config_names_to_gluon_config_names)
        for tf_name, gluon_name in tf_config_names_to_gluon_config_names.items(
        ):
            if tf_name is None or gluon_name is None:
                continue
            if gluon_name != 'max_length':
                assert tf_config[tf_name] == predefined_args[gluon_name]

    encoder = BERTEncoder(attention_cell=predefined_args['attention_cell'],
                          num_layers=predefined_args['num_layers'],
                          units=predefined_args['units'],
                          hidden_size=predefined_args['hidden_size'],
                          max_length=predefined_args['max_length'],
                          num_heads=predefined_args['num_heads'],
                          scaled=predefined_args['scaled'],
                          dropout=predefined_args['dropout'],
                          use_residual=predefined_args['use_residual'],
                          activation='relu')

    bert = BERTModel(
        encoder,
        len(vocab),
        token_type_vocab_size=predefined_args['token_type_vocab_size'],
        units=predefined_args['units'],
        embed_size=predefined_args['embed_size'],
        word_embed=predefined_args['word_embed'],
        use_pooler=True,
        use_decoder=False,
        use_classifier=False)

    bert.initialize(init=mx.init.Normal(0.02))

    ones = mx.nd.ones((2, 8))
    out = bert(ones, ones, mx.nd.array([5, 6]), mx.nd.array([[1], [2]]))
    params = bert._collect_params_with_prefix()
    assert len(params) == len(new_gluon_parameters), "Gluon model does not match paddle model. " \
                                                   "Please fix the BERTModel hyperparameters"

    # post processings for parameters:
    # - handle tied decoder weight
    new_gluon_parameters['decoder.3.weight'] = new_gluon_parameters[
        'word_embed.0.weight']
    # set parameter data
    loaded_params = {}
    for name in params:
        if name == 'word_embed.0.weight':
            arr = mx.nd.array(
                new_gluon_parameters[name][:params[name].shape[0]])
        else:
            arr = mx.nd.array(new_gluon_parameters[name])
        try:
            assert arr.shape == params[name].shape
        except:
            print(name)
        params[name].set_data(arr)
        loaded_params[name] = True

    # post processings for parameters:
    # - handle tied decoder weight
    # - update word embedding for reserved tokens

    if len(params) != len(loaded_params):
        raise RuntimeError(
            'The Gluon BERTModel comprises {} parameter arrays, '
            'but {} have been extracted from the paddle model. '.format(
                len(params), len(loaded_params)))

    # param serialization
    bert.save_parameters(tmp_file_path)
    hash_full, hash_short = get_hash(tmp_file_path)
    gluon_param_path = os.path.expanduser(
        os.path.join(args.out_dir, hash_short + '.params'))
    logging.info('param saved to %s. hash = %s', gluon_param_path, hash_full)
    bert.save_parameters(gluon_param_path)
    mx.nd.waitall()
    # save config
    print('finish save vocab')
    print('save model done!'.center(60, '='))
Exemple #7
0
    'type_vocab_size': 'token_type_vocab_size',
    'vocab_size': None
}
predefined_args = bert_hparams[args.model]
with open(os.path.join(args.tf_config_name), 'r') as f:
    tf_config = json.load(f)
    assert len(tf_config) == len(tf_config_names_to_gluon_config_names)
    for tf_name, gluon_name in tf_config_names_to_gluon_config_names.items():
        if tf_name is None or gluon_name is None:
            continue
        assert tf_config[tf_name] == predefined_args[gluon_name]

# BERT encoder
encoder = BERTEncoder(num_layers=predefined_args['num_layers'],
                      units=predefined_args['units'],
                      hidden_size=predefined_args['hidden_size'],
                      max_length=predefined_args['max_length'],
                      num_heads=predefined_args['num_heads'],
                      dropout=predefined_args['dropout'])

# Infer enabled BERTModel components
use_pooler = any('pooler' in n for n in mx_tensors)
use_decoder = any('decoder.0' in n for n in mx_tensors)
use_classifier = any('classifier.weight' in n for n in mx_tensors)
if use_pooler is False:
    use_classifier = False

logging.info(
    'Inferred that the tensorflow model provides the following parameters:')
logging.info('- use_pooler = {}'.format(use_pooler))
logging.info('- use_decoder = {}'.format(use_decoder))
logging.info('- use_classifier = {}'.format(use_classifier))