def __get_kobert_model(self): use_pooler = True use_decoder = False use_classifier = False vocab_path = con.SENTIMENT_UTIL_PATH["tokenizer"] vocab_b_obj = nlp.vocab.BERTVocab.from_sentencepiece( vocab_path, padding_token="[PAD]") predefined_args = con.SENTIMENT_BERT_CONFIG encoder = BERTEncoder( #attention_cell=predefined_args["attention_cell"], num_layers=predefined_args["num_layers"], units=predefined_args["units"], hidden_size=predefined_args["hidden_size"], max_length=predefined_args["max_length"], num_heads=predefined_args["num_heads"], #scaled=predefined_args["scaled"], dropout=predefined_args["dropout"], output_attention=True, output_all_encodings=False, #use_residual=predefined_args["use_residual"]) ) net = BERTModel( encoder, len(vocab_b_obj.idx_to_token), token_type_vocab_size=predefined_args["token_type_vocab_size"], units=predefined_args["units"], embed_size=predefined_args["embed_size"], #embed_dropout=predefined_args["embed_dropout"], word_embed=predefined_args["word_embed"], use_pooler=use_pooler, use_decoder=use_decoder, use_classifier=use_classifier) net.initialize(ctx=self.ctx) net.load_parameters(self.kobert_path, self.ctx, ignore_extra=True) return (net, vocab_b_obj)
def get_kobert_model( model_file, vocab_file, use_pooler=True, use_decoder=True, use_classifier=True, ctx=mx.cpu(0), ): vocab_b_obj = nlp.vocab.BERTVocab.from_sentencepiece( vocab_file, padding_token="[PAD]") predefined_args = { "attention_cell": "multi_head", "num_layers": 12, "units": 768, "hidden_size": 3072, "max_length": 512, "num_heads": 12, "scaled": True, "dropout": 0.1, "use_residual": True, "embed_size": 768, "embed_dropout": 0.1, "token_type_vocab_size": 2, "word_embed": None, } encoder = BERTEncoder( num_layers=predefined_args["num_layers"], units=predefined_args["units"], hidden_size=predefined_args["hidden_size"], max_length=predefined_args["max_length"], num_heads=predefined_args["num_heads"], dropout=predefined_args["dropout"], output_attention=False, output_all_encodings=False, ) # BERT net = BERTModel( encoder, len(vocab_b_obj.idx_to_token), token_type_vocab_size=predefined_args["token_type_vocab_size"], units=predefined_args["units"], embed_size=predefined_args["embed_size"], word_embed=predefined_args["word_embed"], use_pooler=use_pooler, use_decoder=use_decoder, use_classifier=use_classifier, ) net.initialize(ctx=ctx) net.load_parameters(model_file, ctx, ignore_extra=True) return (net, vocab_b_obj)
def get_kobert_model(model_file, vocab_file, use_pooler=True, use_decoder=True, use_classifier=True, ctx=mx.cpu(0)): vocab_b_obj = nlp.vocab.BERTVocab.from_json(open(vocab_file, 'rt').read()) predefined_args = { 'attention_cell': 'multi_head', 'num_layers': 12, 'units': 768, 'hidden_size': 3072, 'max_length': 512, 'num_heads': 12, 'scaled': True, 'dropout': 0.1, 'use_residual': True, 'embed_size': 768, 'embed_dropout': 0.1, 'token_type_vocab_size': 2, 'word_embed': None, } encoder = BERTEncoder(attention_cell=predefined_args['attention_cell'], num_layers=predefined_args['num_layers'], units=predefined_args['units'], hidden_size=predefined_args['hidden_size'], max_length=predefined_args['max_length'], num_heads=predefined_args['num_heads'], scaled=predefined_args['scaled'], dropout=predefined_args['dropout'], output_attention=False, output_all_encodings=False, use_residual=predefined_args['use_residual']) # BERT net = BERTModel( encoder, len(vocab_b_obj.idx_to_token), token_type_vocab_size=predefined_args['token_type_vocab_size'], units=predefined_args['units'], embed_size=predefined_args['embed_size'], embed_dropout=predefined_args['embed_dropout'], word_embed=predefined_args['word_embed'], use_pooler=use_pooler, use_decoder=use_decoder, use_classifier=use_classifier) net.initialize(ctx=ctx) net.load_parameters(model_file, ctx, ignore_extra=True) return (net, vocab_b_obj)
def initialize_model(vocab_file, use_pooler, use_decoder, use_classifier, ctx=mx.cpu(0)): vocab_b_obj = nlp.vocab.BERTVocab.from_sentencepiece(vocab_file, padding_token='[PAD]') predefined_args = { 'num_layers': 12, 'units': 768, 'hidden_size': 3072, 'max_length': 512, 'num_heads': 12, 'dropout': 0.1, 'embed_size': 768, 'token_type_vocab_size': 2, 'word_embed': None, } encoder = BERTEncoder(num_layers=predefined_args['num_layers'], units=predefined_args['units'], hidden_size=predefined_args['hidden_size'], max_length=predefined_args['max_length'], num_heads=predefined_args['num_heads'], dropout=predefined_args['dropout'], output_attention=False, output_all_encodings=False) # BERT net = BERTModel( encoder, len(vocab_b_obj.idx_to_token), token_type_vocab_size=predefined_args['token_type_vocab_size'], units=predefined_args['units'], embed_size=predefined_args['embed_size'], word_embed=predefined_args['word_embed'], use_pooler=use_pooler, use_decoder=use_decoder, use_classifier=use_classifier) net.initialize(ctx=ctx) return vocab_b_obj, net
embedding[source_idx][:] = dst embedding[dst_idx][:] = source logging.info('total number of tf parameters = %d', len(tf_tensors)) logging.info( 'total number of mx parameters = %d (including decoder param for weight tying)', len(mx_tensors)) # XXX assume no changes in BERT configs predefined_args = bert_hparams[args.model] # BERT encoder encoder = BERTEncoder(attention_cell=predefined_args['attention_cell'], num_layers=predefined_args['num_layers'], units=predefined_args['units'], hidden_size=predefined_args['hidden_size'], max_length=predefined_args['max_length'], num_heads=predefined_args['num_heads'], scaled=predefined_args['scaled'], dropout=predefined_args['dropout'], use_residual=predefined_args['use_residual']) # BERT model bert = BERTModel( encoder, len(vocab), token_type_vocab_size=predefined_args['token_type_vocab_size'], units=predefined_args['units'], embed_size=predefined_args['embed_size'], embed_dropout=predefined_args['embed_dropout'], word_embed=predefined_args['word_embed'], use_pooler=True,
def save_model(new_gluon_parameters, output_dir): print('save model start'.center(60, '=')) if not os.path.exists(output_dir): os.makedirs(output_dir) # save model # load vocab vocab_f = open(os.path.join(output_dir, "vocab.txt"), "wt", encoding='utf-8') with open(args.ernie_vocab_path, "rt", encoding='utf-8') as f: for line in f: data = line.strip().split("\t") vocab_f.writelines(data[0] + "\n") vocab_f.close() vocab = tf_vocab_to_gluon_vocab( load_text_vocab(os.path.join(output_dir, "vocab.txt"))) # vocab serialization tmp_file_path = os.path.expanduser(os.path.join(output_dir, 'tmp')) if not os.path.exists(os.path.join(args.out_dir)): os.makedirs(os.path.join(args.out_dir)) with open(tmp_file_path, 'w') as f: f.write(vocab.to_json()) hash_full, hash_short = get_hash(tmp_file_path) gluon_vocab_path = os.path.expanduser( os.path.join(output_dir, hash_short + '.vocab')) with open(gluon_vocab_path, 'w') as f: f.write(vocab.to_json()) logging.info('vocab file saved to %s. hash = %s', gluon_vocab_path, hash_full) # BERT config tf_config_names_to_gluon_config_names = { 'attention_probs_dropout_prob': 'dropout', 'hidden_act': None, 'hidden_dropout_prob': 'dropout', 'hidden_size': 'units', 'initializer_range': None, # 'intermediate_size': 'hidden_size', 'max_position_embeddings': 'max_length', 'num_attention_heads': 'num_heads', 'num_hidden_layers': 'num_layers', 'type_vocab_size': 'token_type_vocab_size', 'vocab_size': None } predefined_args = bert_hparams[args.gluon_bert_model_base] with open(args.ernie_config_path, 'r') as f: tf_config = json.load(f) if 'layer_norm_eps' in tf_config: # ignore layer_norm_eps del tf_config['layer_norm_eps'] assert len(tf_config) == len(tf_config_names_to_gluon_config_names) for tf_name, gluon_name in tf_config_names_to_gluon_config_names.items( ): if tf_name is None or gluon_name is None: continue if gluon_name != 'max_length': assert tf_config[tf_name] == predefined_args[gluon_name] encoder = BERTEncoder(attention_cell=predefined_args['attention_cell'], num_layers=predefined_args['num_layers'], units=predefined_args['units'], hidden_size=predefined_args['hidden_size'], max_length=predefined_args['max_length'], num_heads=predefined_args['num_heads'], scaled=predefined_args['scaled'], dropout=predefined_args['dropout'], use_residual=predefined_args['use_residual'], activation='relu') bert = BERTModel( encoder, len(vocab), token_type_vocab_size=predefined_args['token_type_vocab_size'], units=predefined_args['units'], embed_size=predefined_args['embed_size'], word_embed=predefined_args['word_embed'], use_pooler=True, use_decoder=False, use_classifier=False) bert.initialize(init=mx.init.Normal(0.02)) ones = mx.nd.ones((2, 8)) out = bert(ones, ones, mx.nd.array([5, 6]), mx.nd.array([[1], [2]])) params = bert._collect_params_with_prefix() assert len(params) == len(new_gluon_parameters), "Gluon model does not match paddle model. " \ "Please fix the BERTModel hyperparameters" # post processings for parameters: # - handle tied decoder weight new_gluon_parameters['decoder.3.weight'] = new_gluon_parameters[ 'word_embed.0.weight'] # set parameter data loaded_params = {} for name in params: if name == 'word_embed.0.weight': arr = mx.nd.array( new_gluon_parameters[name][:params[name].shape[0]]) else: arr = mx.nd.array(new_gluon_parameters[name]) try: assert arr.shape == params[name].shape except: print(name) params[name].set_data(arr) loaded_params[name] = True # post processings for parameters: # - handle tied decoder weight # - update word embedding for reserved tokens if len(params) != len(loaded_params): raise RuntimeError( 'The Gluon BERTModel comprises {} parameter arrays, ' 'but {} have been extracted from the paddle model. '.format( len(params), len(loaded_params))) # param serialization bert.save_parameters(tmp_file_path) hash_full, hash_short = get_hash(tmp_file_path) gluon_param_path = os.path.expanduser( os.path.join(args.out_dir, hash_short + '.params')) logging.info('param saved to %s. hash = %s', gluon_param_path, hash_full) bert.save_parameters(gluon_param_path) mx.nd.waitall() # save config print('finish save vocab') print('save model done!'.center(60, '='))
'type_vocab_size': 'token_type_vocab_size', 'vocab_size': None } predefined_args = bert_hparams[args.model] with open(os.path.join(args.tf_config_name), 'r') as f: tf_config = json.load(f) assert len(tf_config) == len(tf_config_names_to_gluon_config_names) for tf_name, gluon_name in tf_config_names_to_gluon_config_names.items(): if tf_name is None or gluon_name is None: continue assert tf_config[tf_name] == predefined_args[gluon_name] # BERT encoder encoder = BERTEncoder(num_layers=predefined_args['num_layers'], units=predefined_args['units'], hidden_size=predefined_args['hidden_size'], max_length=predefined_args['max_length'], num_heads=predefined_args['num_heads'], dropout=predefined_args['dropout']) # Infer enabled BERTModel components use_pooler = any('pooler' in n for n in mx_tensors) use_decoder = any('decoder.0' in n for n in mx_tensors) use_classifier = any('classifier.weight' in n for n in mx_tensors) if use_pooler is False: use_classifier = False logging.info( 'Inferred that the tensorflow model provides the following parameters:') logging.info('- use_pooler = {}'.format(use_pooler)) logging.info('- use_decoder = {}'.format(use_decoder)) logging.info('- use_classifier = {}'.format(use_classifier))