def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels): """ :param bert_config: :param is_training: :param input_ids: :param input_mask: :param segment_ids: :param labels: :param num_labels: :param use_one_hot_embedding: :return: """ # 通过传入的训练数据,进行representation model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, ) embedding_layer = model.get_sequence_output() output_layer = model.get_pooled_output() hidden_size = output_layer.shape[-1].value # model = CNN_Classification(embedding_chars=embedding_layer, # labels=labels, # num_tags=num_labels, # sequence_length=FLAGS.max_seq_length, # embedding_dims=embedding_layer.shape[-1].value, # vocab_size=0, # filter_sizes=[3, 4, 5], # num_filters=3, # dropout_keep_prob=FLAGS.dropout_keep_prob, # l2_reg_lambda=0.001) # loss, predictions, probabilities = model.add_cnn_layer() output_weights = tf.get_variable( "output_weights", [num_labels, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable( "output_bias", [num_labels], initializer=tf.zeros_initializer()) with tf.variable_scope("loss"): if is_training: # I.e., 0.1 dropout output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) logits = tf.matmul(output_layer, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) probabilities = tf.nn.softmax(logits, axis=-1) log_probs = tf.nn.log_softmax(logits, axis=-1) one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32) per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) loss = tf.reduce_mean(per_example_loss) return (loss, per_example_loss, logits, probabilities)
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings, dropout_rate=1.0, lstm_size=1, cell='lstm', num_layers=1): """ 创建X模型 :param bert_config: bert 配置 :param is_training: :param input_ids: 数据的idx 表示 :param input_mask: :param segment_ids: :param labels: 标签的idx 表示 :param num_labels: 类别数量 :param use_one_hot_embeddings: :return: """ # 使用数据加载BertModel,获取对应的字embedding import tensorflow as tf from bert_base.bert import modeling model = modeling.BertModel(config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) # 获取对应的embedding 输入数据[batch_size, seq_length, embedding_size] # embedding Tensor("bert/encoder/Reshape_13:0", shape=(56, 202, 768), dtype=float32) embedding = model.get_sequence_output() # max_seq_length 202 max_seq_length = embedding.shape[1].value # 算序列真实长度 # input_ids Tensor("IteratorGetNext:0", shape=(56, 202), dtype=int32) # tf.abs Tensor("Abs_1:0", shape=(56, 202), dtype=int32) # used Tensor("Sign:0", shape=(56, 202), dtype=int32) # lengths Tensor("Sum:0", shape=(56,), dtype=int32) 1行56列(/56行1列?)的数组 每个batch下序列的真实长度 used = tf.sign(tf.abs(input_ids)) lengths = tf.reduce_sum( used, reduction_indices=1) # [batch_size] 大小的向量,包含了当前batch中的序列长度 # 添加CRF output layer blstm_crf = BLSTM_CRF(embedded_chars=embedding, hidden_unit=lstm_size, cell_type=cell, num_layers=num_layers, dropout_rate=dropout_rate, initializers=initializers, num_labels=num_labels, seq_length=max_seq_length, labels=labels, lengths=lengths, is_training=is_training) rst = blstm_crf.add_blstm_crf_layer(crf_only=False) return rst
def bert_model(input_ids, is_training): with tf.Session() as sess: input_mask = tf.zeros(shape=tf.shape(input_ids), dtype=np.int32) token_type_ids = tf.zeros(shape=tf.shape(input_ids), dtype=np.int32) model = modeling.BertModel(config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=token_type_ids, use_one_hot_embeddings=True) # 调用init_from_checkpoint方法 # 最后初始化变量 # graph = tf.get_default_graph() # tvars = tf.trainable_variables() # # (assignment_map, # initialized_variable_names) = modeling.get_assignment_map_from_checkpoint( # tvars, init_checkpoint) # # tf.train.init_from_checkpoint(init_checkpoint, assignment_map) # # 初始化所有的变量 # # tf.logging.info("**** Trainable Variables ****") # for var in tvars: # init_string = "" # if var.name in initialized_variable_names: # init_string = ", *INIT_FROM_CKPT*" # tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, # init_string) # sess.run(tf.global_variables_initializer()) embeddings = model.get_sequence_output() return embeddings
def create_model( bert_config, is_training, input_ids, input_mask, segment_ids, use_one_hot_embeddings, ): """ 创建X模型 :param bert_config: bert 配置 :param is_training: :param input_ids: 数据的idx 表示 :param input_mask: :param segment_ids: :param use_one_hot_embeddings: :return: """ # 使用数据加载BertModel,获取对应的字embedding model = modeling.BertModel(config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) # 获取对应的embedding 输入数据[batch_size, seq_length, embedding_size] embedding = model.get_sequence_output( ) # (batch_size, seq_length, embedding_size) print('bert最有一个隐层的输出维度:', embedding.shape) return embedding
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings, dropout_rate=1.0, lstm_size=1, cell='lstm', num_layers=1): """ 创建模型 :param bert_config: bert配置 :param is_training: 是否训练 :param input_ids: 数据的idx表示 :param input_mask: :param segment_ids: :param labels: 标签的idx表示 :param num_labels: 类别数量 :param use_one_hot_embeddings: :param dropout_rate: :param lstm_size: :param cell: :param num_layers: :return: """ # 使用数据加载BertModel,获取对应的字embedding model = modeling.BertModel(config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) # 获取对应的embedding,输入数据[batch_size, seq_length, embedding_size] embedding = model.get_sequence_output() max_seq_length = embedding.shape[1].value # 序列的真实长度 used = tf.sign(tf.abs(input_ids)) lengths = tf.reduce_sum( used, reduction_indices=1) # [batch_size]大小的向量,包含了当前的batch中的序列长度 # 添加CRF output layer blstm_crf = BLSTM_CRF(embedd_chars=embedding, hidden_unit=lstm_size, cell_type=cell, num_layers=num_layers, dropout_rate=dropout_rate, initializers=initializers, num_labels=num_labels, seq_length=max_seq_length, labels=labels, lengths=lengths, is_training=is_training) res = blstm_crf.add_blstm_crf_layer(crf_only=True) return res
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings, dropout_rate=1.0, lstm_size=1, cell='lstm', num_layers=1): """ 创建X模型 :param bert_config: bert 配置 :param is_training: :param input_ids: 数据的idx 表示 :param input_mask: :param segment_ids: :param labels: 标签的idx 表示 :param num_labels: 类别数量 :param use_one_hot_embeddings: :return: """ # 使用数据加载BertModel,获取对应的字embedding import tensorflow as tf from bert_base.bert import modeling model = modeling.BertModel(config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) # 获取对应的embedding 输入数据[batch_size, seq_length, embedding_size] label_vocab = {"O": 0, "B": 1, "I": 2, "X": 3, "[CLS]": 4, "[SEP]": 5} SPAN_TYPE = "IOB2" mask = transition_mask(label_vocab, SPAN_TYPE, label_vocab["[CLS]"], label_vocab["[SEP]"]) embedding = model.get_sequence_output() max_seq_length = embedding.shape[1].value # 算序列真实长度 used = tf.sign(tf.abs(input_ids)) lengths = tf.reduce_sum( used, reduction_indices=1) # [batch_size] 大小的向量,包含了当前batch中的序列长度 # 添加CRF output layer blstm_crf = BLSTM_CRF(embedded_chars=embedding, hidden_unit=lstm_size, cell_type=cell, num_layers=num_layers, dropout_rate=dropout_rate, initializers=initializers, num_labels=num_labels, seq_length=max_seq_length, labels=labels, lengths=lengths, is_training=is_training) rst = blstm_crf.add_blstm_crf_layer(crf_only=True) return rst
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings, dropout_rate=1.0, lstm_size=1, cell='lstm', num_layers=1, crf_only=False, lstm_only=False): """ 创建X模型 :param bert_config: bert 配置 :param is_training: :param input_ids: 数据的idx 表示 :param input_mask: :param segment_ids: :param labels: 标签的idx 表示 :param num_labels: 类别数量 :param use_one_hot_embeddings: :return: """ # 使用数据加载BertModel,获取对应的字embedding import tensorflow as tf from bert_base.bert import modeling model = modeling.BertModel(config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) # 获取对应的embedding 输入数据[batch_size, seq_length, embedding_size] embedding = model.get_sequence_output() max_seq_length = embedding.shape[1].value # 算序列真实长度 used = tf.sign(tf.abs(input_ids)) lengths = tf.reduce_sum( used, reduction_indices=1) # [batch_size] 大小的向量,包含了当前batch中的序列长度 # 添加CRF output layer with tf.variable_scope('finetune'): blstm_crf = BLSTM_CRF(embedded_chars=embedding, input_mask=input_mask, hidden_unit=lstm_size, cell_type=cell, num_layers=num_layers, dropout_rate=dropout_rate, initializers=initializers, num_labels=num_labels, seq_length=max_seq_length, labels=labels, lengths=lengths, is_training=is_training) rst = blstm_crf.add_blstm_crf_layer(crf_only=crf_only, lstm_only=lstm_only) return rst
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" unique_ids = features["unique_ids"] input_ids = features["input_ids"] input_mask = features["input_mask"] input_type_ids = features["input_type_ids"] model = modeling.BertModel( config=bert_config, is_training=False, input_ids=input_ids, input_mask=input_mask, token_type_ids=input_type_ids, use_one_hot_embeddings=use_one_hot_embeddings) if mode != tf.estimator.ModeKeys.PREDICT: raise ValueError("Only PREDICT modes are supported: %s" % (mode)) tvars = tf.trainable_variables() scaffold_fn = None (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) all_layers = model.get_all_encoder_layers() predictions = { "unique_id": unique_ids, } for (i, layer_index) in enumerate(layer_indexes): predictions["layer_output_%d" % i] = all_layers[layer_index] output_spec = tf.contrib.tpu.TPUEstimatorSpec(mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) return output_spec
def bert_layer(self): bert_config = modeling.BertConfig.from_json_file(ARGS.bert_config) model = modeling.BertModel(config=bert_config, is_training=self.is_training, input_ids=self.input_ids, input_mask=self.input_mask, token_type_ids=self.segment_ids, use_one_hot_embeddings=True) self.embedded = model.get_sequence_output() self.model_inputs = tf.nn.dropout(self.embedded, self.dropout)
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings): """ 创建X模型 :param bert_config: bert 配置 :param is_training: :param input_ids: 数据的idx 表示 :param input_mask: :param segment_ids: :param labels: 标签的idx 表示 :param num_labels: 类别数量 :param use_one_hot_embeddings :return: """ # 使用数据加载BertModel,获取对应的字embedding model = modeling.BertModel(config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) print("获取到bert的输出") # 获取对应的embedding 输入数据[batch_size, seq_length, embedding_size] embedding = model.get_sequence_output() max_seq_length = embedding.shape[1].value used = tf.sign(tf.abs(input_ids)) lengths = tf.reduce_sum( used, reduction_indices=1) # [batch_size] 大小的向量,包含了当前batch中的序列长度 # 将embedding作为LSTM结构的输入 # 加载BLSTM-CRF模型对象 blstm_crf = BLSTM_CRF(embedded_chars=embedding, hidden_unit=FLAGS.lstm_size, cell_type=FLAGS.cell, num_layers=FLAGS.num_layers, dropout_rate=FLAGS.dropout_rate, initializers=initializers, num_labels=num_labels, seq_length=max_seq_length, labels=labels, lengths=lengths, is_training=is_training) # 获取添加我们自己网络结构后的结果,这些结果包括loss, logits, trans, pred_ids # 因为BERT里面已经存在双向编码,所以LSTM并不是必须的,可以将BERT最后一层的结构直接扔给CRF进行解码 try: rst = blstm_crf.add_blstm_crf_layer() except Exception as e: print(str(e)) return rst
def bert_layer(self): bert_config = modeling.BertConfig.from_json_file(self.bert_config) model = modeling.BertModel( config=bert_config, is_training=self.is_training, input_ids=self.input_ids, input_mask=self.input_mask, token_type_ids=self.segment_ids, use_one_hot_embeddings=False ) # 获取各词embedding结果 self.embedded_sentence = model.get_sequence_output() self.model_inputs = tf.nn.dropout( self.embedded_sentence, self.dropout ) # 获取整句embedding结果 self.embedded_pooled = model.get_pooled_output() self.model_inputs_1 = tf.nn.dropout( self.embedded_pooled, self.dropout )
def optimize_bert_graph(args, logger=None): if not logger: logger = set_logger(colored('GRAPHOPT', 'cyan'), args.verbose) try: if not os.path.exists(args.model_pb_dir): os.mkdir(args.model_pb_dir) pb_file = os.path.join(args.model_pb_dir, 'bert_model.pb') if os.path.exists(pb_file): return pb_file # we don't need GPU for optimizing the graph tf = import_tf(verbose=args.verbose) from tensorflow.python.tools.optimize_for_inference_lib import optimize_for_inference config = tf.ConfigProto(device_count={'GPU': 0}, allow_soft_placement=True) config_fp = os.path.join(args.model_dir, args.config_name) init_checkpoint = os.path.join(args.tuned_model_dir or args.bert_model_dir, args.ckpt_name) if args.fp16: logger.warning('fp16 is turned on! ' 'Note that not all CPU GPU support fast fp16 instructions, ' 'worst case you will have degraded performance!') logger.info('model config: %s' % config_fp) logger.info( 'checkpoint%s: %s' % ( ' (override by the fine-tuned model)' if args.tuned_model_dir else '', init_checkpoint)) with tf.gfile.GFile(config_fp, 'r') as f: bert_config = modeling.BertConfig.from_dict(json.load(f)) logger.info('build graph...') # input placeholders, not sure if they are friendly to XLA input_ids = tf.placeholder(tf.int32, (None, args.max_seq_len), 'input_ids') input_mask = tf.placeholder(tf.int32, (None, args.max_seq_len), 'input_mask') input_type_ids = tf.placeholder(tf.int32, (None, args.max_seq_len), 'input_type_ids') jit_scope = tf.contrib.compiler.jit.experimental_jit_scope if args.xla else contextlib.suppress with jit_scope(): input_tensors = [input_ids, input_mask, input_type_ids] model = modeling.BertModel( config=bert_config, is_training=False, input_ids=input_ids, input_mask=input_mask, token_type_ids=input_type_ids, use_one_hot_embeddings=False) tvars = tf.trainable_variables() (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint(tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) minus_mask = lambda x, m: x - tf.expand_dims(1.0 - m, axis=-1) * 1e30 mul_mask = lambda x, m: x * tf.expand_dims(m, axis=-1) masked_reduce_max = lambda x, m: tf.reduce_max(minus_mask(x, m), axis=1) masked_reduce_mean = lambda x, m: tf.reduce_sum(mul_mask(x, m), axis=1) / ( tf.reduce_sum(m, axis=1, keepdims=True) + 1e-10) with tf.variable_scope("pooling"): if len(args.pooling_layer) == 1: encoder_layer = model.all_encoder_layers[args.pooling_layer[0]] else: all_layers = [model.all_encoder_layers[l] for l in args.pooling_layer] encoder_layer = tf.concat(all_layers, -1) input_mask = tf.cast(input_mask, tf.float32) if args.pooling_strategy == PoolingStrategy.REDUCE_MEAN: pooled = masked_reduce_mean(encoder_layer, input_mask) elif args.pooling_strategy == PoolingStrategy.REDUCE_MAX: pooled = masked_reduce_max(encoder_layer, input_mask) elif args.pooling_strategy == PoolingStrategy.REDUCE_MEAN_MAX: pooled = tf.concat([masked_reduce_mean(encoder_layer, input_mask), masked_reduce_max(encoder_layer, input_mask)], axis=1) elif args.pooling_strategy == PoolingStrategy.FIRST_TOKEN or \ args.pooling_strategy == PoolingStrategy.CLS_TOKEN: pooled = tf.squeeze(encoder_layer[:, 0:1, :], axis=1) elif args.pooling_strategy == PoolingStrategy.LAST_TOKEN or \ args.pooling_strategy == PoolingStrategy.SEP_TOKEN: seq_len = tf.cast(tf.reduce_sum(input_mask, axis=1), tf.int32) rng = tf.range(0, tf.shape(seq_len)[0]) indexes = tf.stack([rng, seq_len - 1], 1) pooled = tf.gather_nd(encoder_layer, indexes) elif args.pooling_strategy == PoolingStrategy.NONE: pooled = mul_mask(encoder_layer, input_mask) else: raise NotImplementedError() if args.fp16: pooled = tf.cast(pooled, tf.float16) pooled = tf.identity(pooled, 'final_encodes') output_tensors = [pooled] tmp_g = tf.get_default_graph().as_graph_def() with tf.Session(config=config) as sess: logger.info('load parameters from checkpoint...') sess.run(tf.global_variables_initializer()) dtypes = [n.dtype for n in input_tensors] logger.info('optimize...') tmp_g = optimize_for_inference( tmp_g, [n.name[:-2] for n in input_tensors], [n.name[:-2] for n in output_tensors], [dtype.as_datatype_enum for dtype in dtypes], False) logger.info('freeze...') tmp_g = convert_variables_to_constants(sess, tmp_g, [n.name[:-2] for n in output_tensors], use_fp16=args.fp16) logger.info('write graph to a tmp file: %s' % args.model_pb_dir) with tf.gfile.GFile(pb_file, 'wb') as f: f.write(tmp_g.SerializeToString()) except Exception: logger.error('fail to optimize the graph!', exc_info=True)
graph = tf.get_default_graph() with graph.as_default(): print("going to restore checkpoint") #sess.run(tf.global_variables_initializer()) input_ids_p = tf.placeholder(tf.int32, [batch_size, MAX_SEQ_LENGTH], name="input_ids") input_mask_p = tf.placeholder(tf.int32, [batch_size, MAX_SEQ_LENGTH], name="input_mask") bert_config = modeling.BertConfig.from_json_file( os.path.join(bert_dir, 'bert_config.json')) model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids_p, input_mask=input_mask_p, token_type_ids=None, # 默认为单句子分类任务 use_one_hot_embeddings=use_one_hot_embeddings) # embedding = model.get_pooled_output() output_layer = model.get_pooled_output() hidden_size = output_layer.shape[-1].value output_weights = tf.get_variable( "output_weights", [2, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02)) output_bias = tf.get_variable("output_bias", [2],
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings, pos_ids, dropout_rate=1.0, lstm_size=1, cell='lstm', num_layers=1, add_pos_embedding=True): """ 创建X模型 :param bert_config: bert 配置 :param is_training: :param input_ids: 数据的idx 表示 :param input_mask: :param segment_ids: :param pos_ids: 词性的idx 表示 :param labels: 标签的idx 表示 :param num_labels: 类别数量 :param use_one_hot_embeddings: :return: """ # 使用数据加载BertModel,获取对应的字embedding import tensorflow as tf from bert_base.bert import modeling model = modeling.BertModel(config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) # 获取对应的embedding 输入数据[batch_size, seq_length, embedding_size] embedding = model.get_sequence_output() # jzhang add if add_pos_embedding: embedding = embedding_addpos(embedding, pos_ids) max_seq_length = embedding.shape[1].value # 算序列真实长度 used = tf.sign(tf.abs(input_ids)) lengths = tf.reduce_sum( used, reduction_indices=1) # [batch_size] 大小的向量,包含了当前batch中的序列长度 # 添加CRF output layer blstm_crf = BLSTM_CRF(embedded_chars=embedding, hidden_unit=lstm_size, cell_type=cell, num_layers=num_layers, dropout_rate=dropout_rate, initializers=initializers, num_labels=num_labels, seq_length=max_seq_length, labels=labels, lengths=lengths, is_training=is_training) # jzhang: 设置参数crf_only=True,控制模型只使用CRF进行解码 # 如果设置crf_only=False,模型将使用BiLSTM-CRF作为Decoder rst = blstm_crf.add_blstm_crf_layer(crf_only=False) return rst
bert_path = '/home/ywd/tf_model/pre_training_model/chinese_L-12_H-768_A-12/' init_checkpoint = os.path.join(bert_path, 'bert_model.ckpt') bert_config_file = os.path.join(bert_path, 'bert_config.json') vocab_file = os.path.join(bert_path, 'vocab.txt') bert_config = modeling.BertConfig.from_json_file(bert_config_file) with tf.Session() as sess: input_ids = tf.placeholder(tf.int32, shape=[20, 128]) input_mask = tf.placeholder(tf.int32, shape=[20, 128]) token_type_ids= tf.placeholder(tf.int32, shape=[20, 128]) model = modeling.BertModel( config=bert_config, is_training=True, input_ids=input_ids, input_mask=input_mask, token_type_ids=token_type_ids, use_one_hot_embeddings=False ) # 调用init_from_checkpoint方法 # 最后初始化变量 graph = tf.get_default_graph() tvars = tf.trainable_variables() (assignment_map, initialized_variable_names) = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) # 初始化所有的变量