def write_result(result, threshold, path=PROJECT_PATH / 'data/test_data/positive_result'): with codecs.open(path, 'w', 'utf-8') as file: for line in result: sentence, predict_id = line[0], line[1] to_write = sentence + '\t' + str(predict_id) + '\n' file.write(to_write) file.flush() _info('The result has been saved to {}'.format(path))
def model_fn(features, labels, mode, params): """this is prototype syntax, all parameters are necessary.""" # obtain the data _info('*** Features ***') for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features['input_ids'] # [batch_size, seq_length] input_mask = features['input_mask'] # [batch_size, seq_length] # if mode != tf.estimator.ModeKeys.PREDICT: # # segment_idx = features['segment_dis'] # masked_lm_positions = features['masked_lm_positions'] # [batch_size, seq_length], specify the answer # masked_lm_ids = features['masked_lm_ids'] # [batch_size, answer_seq_length], specify the answer labels # masked_lm_weights = features['masked_lm_weights'] # [batch_size, seq_length], [1, 1, 0], 0 refers to the mask # # next_sentence_labels = features['next_sentence_labels'] # else: masked_lm_positions = features['masked_lm_positions'] masked_lm_ids = features['masked_lm_ids'] masked_lm_weights = features['masked_lm_weights'] if bert_config.train_type == 'seq2seq': _info('Training seq2seq task.') elif bert_config.train_type == 'lm': _info('Training language model task.') # build model is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = BertModel(config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask) # compute loss loss, per_loss, log_probs, logits = get_masked_lm_output( bert_config, model.get_sequence_output(), model.embedding_table, model.projection_table, masked_lm_positions, masked_lm_ids, masked_lm_weights, mode) if mode == tf.estimator.ModeKeys.PREDICT: masked_lm_predictions = tf.reshape( tf.argmax(log_probs, axis=-1, output_type=tf.int32), [-1]) output_spec = tf.estimator.EstimatorSpec( mode, predictions=masked_lm_predictions) else: if mode == tf.estimator.ModeKeys.TRAIN: # restore from the checkpoint, # tf.estimator automatically restore from the model typically, # maybe here is for restore some pre-trained parameters tvars = tf.trainable_variables() initialized_variable_names = {} if init_checkpoint: (assignment_map, initialized_variable_names ) = get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) _info('*** Trainable Variables ***') for var in tvars: init_string = '' if var.name in initialized_variable_names: init_string = ', *INIT_FROM_CKPT*' _info('name = {}, shape={}{}'.format( var.name, var.shape, init_string)) train_op = optimization.create_optimizer( loss, bert_config.learning_rate, num_train_steps, bert_config.lr_limit) # learning_rate = tf.train.polynomial_decay(bert_config.learning_rate, # tf.train.get_or_create_global_step(), # num_train_steps, # end_learning_rate=0.0, # power=1.0, # cycle=False) # optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) # gradients = tf.gradients(loss, tvars, colocate_gradients_with_ops=True) # clipped_gradients, _ = tf.clip_by_global_norm(gradients, 5.0) # train_op = optimizer.apply_gradients(zip(clipped_gradients, tvars), global_step=tf.train.get_global_step()) output_spec = tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op) elif mode == tf.estimator.ModeKeys.EVAL: is_real_example = tf.ones(tf.shape(masked_lm_ids), dtype=tf.float32) def metric_fn(loss, label_ids, logits, is_real_example): """ Args: loss: tf.float32. label_ids: [b, s]. logits: [b, s, v]. """ # [b * s, v] logits = tf.reshape(logits, [-1, logits.shape[-1]]) # [b * s, 1] predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) # [b * s] label_ids = tf.reshape(label_ids, [-1]) accuracy = tf.metrics.accuracy(labels=label_ids, predictions=predictions) loss = tf.metrics.mean(values=loss) return {'eval_accuracy': accuracy, 'eval_loss': loss} eval_metrics = metric_fn(loss, masked_lm_ids, logits, is_real_example) output_spec = tf.estimator.EstimatorSpec( mode=mode, loss=loss, eval_metric_ops=eval_metrics) return output_spec
def model_fn(features, labels, mode, params): _info('*** Features ***') for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) input_ids = features['input_ids'] # [batch_size, seq_length] # build model is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = BertModelOfficial( config=bert_config, is_training=is_training, input_ids=input_ids) # [b, s, h] sequence_output = model.get_pooled_output() # sequence_output = tf.reshape(sequence_output, # [-1, bert_config.max_length * bert_config.hidden_size]) _info(sequence_output.shape) with tf.variable_scope('prediction'): logits = tf.layers.dense(sequence_output, bert_config.classes, name='prediction', kernel_initializer=_mh.create_initializer(0.2)) # logits = _mh.batch_norm(logits, is_training=is_training) prob = tf.nn.softmax(logits, axis=-1) # [b, 2] predict_ids = tf.argmax(prob, axis=-1) # [b, ] if mode == tf.estimator.ModeKeys.PREDICT: predictions = {'class': predict_ids} # the default key in 'output', however, when customized, the keys are identical with the keys in dict. output_spec = tf.estimator.EstimatorSpec(mode, predictions=predictions) else: if mode == tf.estimator.ModeKeys.TRAIN: tvars = tf.trainable_variables() initialized_variable_names = {} if init_checkpoint: (assignment_map, initialized_variable_names) = get_assignment_map_from_checkpoint(tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) _info('*** Trainable Variables ***') for var in tvars: init_string = '' if var.name in initialized_variable_names: init_string = ', *INIT_FROM_CKPT*' _info('name = {}, shape={}{}'.format(var.name, var.shape, init_string)) batch_size = tf.cast(bert_config.batch_size, tf.float32) labels = tf.reshape(labels, [-1]) # logits = tf.expand_dims(logits, axis=1) seq_loss = tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=logits)) / batch_size loss = seq_loss """ Tutorial on `polynomial_decay`: The formula is as below: global_step = min(global_step, decay_steps) decayed_learning_rate = (learning_rate - end_learning_rate) * (1 - global_step / decay_steps) ^ (power) + end_learning_rate global_step: each batch step. decay_steps: the whole step, the lr will touch the end_learning_rate after the decay_steps. TRAIN_STEPS: the number for repeating the whole dataset, so the decay_steps = len(dataset) / batch_size * TRAIN_STEPS. """ train_op, lr = optimization.create_optimizer(loss, bert_config.learning_rate, bert_config.num_train_steps * 100, bert_config.lr_limit) """ learning_rate = tf.train.polynomial_decay(config.learning_rate, tf.train.get_or_create_global_step(), _cg.TRIAN_STEPS, end_learning_rate=0.0, power=1.0, cycle=False) lr = tf.maximum(tf.constant(config.lr_limit), learning_rate) optimizer = tf.train.AdamOptimizer(lr, name='optimizer') tvars = tf.trainable_variables() gradients = tf.gradients(loss, tvars, colocate_gradients_with_ops=config.colocate_gradients_with_ops) clipped_gradients, _ = tf.clip_by_global_norm(gradients, 5.0) train_op = optimizer.apply_gradients(zip(clipped_gradients, tvars), global_step=tf.train.get_global_step()) """ # this is excellent, because it could display the result each step, i.e., each step equals to batch_size. # the output_spec, display the result every save checkpoints step. logging_hook = tf.train.LoggingTensorHook({'loss' : loss, 'lr': lr}, every_n_iter=10) output_spec = tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op, training_hooks=[logging_hook]) elif mode == tf.estimator.ModeKeys.EVAL: # TODO raise NotImplementedError return output_spec
path=bert_config.data_path, batch_size=bert_config.batch_size, repeat_num=bert_config.num_train_steps, max_length = bert_config.max_length) gpu_config = tf.ConfigProto() gpu_config.gpu_options.allow_growth=True run_config = tf.contrib.tpu.RunConfig( session_config=gpu_config, keep_checkpoint_max=1, save_checkpoints_steps=10, model_dir=bert_config.model_dir) estimaotr = tf.estimator.Estimator(model_fn, config=run_config) estimaotr.train(input_fn) # train_input_fn should be callable def package_model(ckpt_path, pb_path): model_fn = model_fn_builder(bert_config, None, bert_config.learning_rate, bert_config.num_train_steps) estimator = tf.estimator.Estimator(model_fn, ckpt_path) estimator.export_saved_model(pb_path, server_input_receiver_fn) if __name__ == '__main__': if sys.argv[1] == 'train': main() elif sys.argv[1] == 'package': package_model(str(PROJECT_PATH / 'models_lm'), str(PROJECT_PATH / 'models_deploy_lm')) else: _error('Unknown parameter: {}.'.format(sys.argv[1])) _info('Choose from [train | package].')
def model_fn(features, labels, mode, params): # obtain the data _info('*** Features ***') for name in sorted(features.keys()): tf.logging.info(' name = %s, shape = %s' % (name, features[name].shape)) is_training = (mode == tf.estimator.ModeKeys.TRAIN) if is_training: input_A = features['input_A'] input_B = features['input_B'] input_A_length = features['input_A_length'] input_B_length = features['input_B_length'] else: input_A = features['input_A'] input_B = features['input_A'] input_A_length = features['input_A_length'] input_B_length = features['input_A_length'] # if mode != tf.estimator.ModeKeys.PREDICT: # decoder_input_data = features['decoder_input_data'] # seq_length_decoder_input_data = features['seq_length_decoder_input_data'] # else: # decoder_input_data = None # seq_length_decoder_input_data = None # build Encoder model = ERCNNModel(config=config, is_training=is_training, sent_A=input_A, sent_B=input_B, sent_length_A=input_A_length, sent_length_B=input_B_length) output = model.get_output() # [b, s] batch_size = tf.cast(_mh.get_shape_list(output)[0], dtype=tf.float32) # output = tf.reduce_sum(tf.multiply(output_A, output_B), axis=-1) # output = tf.reshape(output, (batch_size, 1)) if mode == tf.estimator.ModeKeys.PREDICT: predictions = {'output_vector': output} # the default key in 'output', however, when customized, the keys are identical with the keys in dict. output_spec = tf.estimator.EstimatorSpec(mode, predictions=predictions) else: if mode == tf.estimator.ModeKeys.TRAIN: # labels = tf.cast(labels, tf.float32) # loss = tf.losses.mean_squared_error(labels, output) # loss = tf.losses.mean_squared_error(output_A, output_B) loss = tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=output)) / batch_size # # loss = vae_loss + seq_loss # loss = seq_loss """ Tutorial on `polynomial_decay`: The formula is as below: global_step = min(global_step, decay_steps) decayed_learning_rate = (learning_rate - end_learning_rate) * (1 - global_step / decay_steps) ^ (power) + end_learning_rate global_step: each batch step. decay_steps: the whole step, the lr will touch the end_learning_rate after the decay_steps. TRAIN_STEPS: the number for repeating the whole dataset, so the decay_steps = len(dataset) / batch_size * TRAIN_STEPS. """ # train_op, lr = create_optimizer(loss, config.learning_rate, _cg.TRIAN_STEPS, config.lr_limit) learning_rate = tf.train.polynomial_decay(config.learning_rate, tf.train.get_or_create_global_step(), _cg.TRIAN_STEPS, end_learning_rate=0.0, power=1.0, cycle=False) lr = tf.maximum(tf.constant(config.lr_limit), learning_rate) optimizer = tf.train.AdamOptimizer(lr, name='optimizer') tvars = tf.trainable_variables() gradients = tf.gradients(loss, tvars, colocate_gradients_with_ops=config.colocate_gradients_with_ops) clipped_gradients, _ = tf.clip_by_global_norm(gradients, 5.0) train_op = optimizer.apply_gradients(zip(clipped_gradients, tvars), global_step=tf.train.get_global_step()) # this is excellent, because it could display the result each step, i.e., each step equals to batch_size. # the output_spec, display the result every save checkpoints step. logging_hook = tf.train.LoggingTensorHook({'loss' : loss, 'lr': lr}, every_n_iter=10) output_spec = tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op, training_hooks=[logging_hook]) elif mode == tf.estimator.ModeKeys.EVAL: # TODO raise NotImplementedError return output_spec
def model_fn(features, labels, mode, params): """the above formal parameters are necessary.""" # display the features _info('*** Features ***') for name in sorted(features.keys()): tf.logging.info(' name = %s, shape = %s' % (name, features[name].shape)) # get the input feature # TODO customized define input_x = features['input_x'] # define the model is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = Model(config=_cg, input_x=input_x) output = model.get_result() # TRAIN, EVAL, PREDICT if mode == tf.estimator.ModeKeys.PREDICT: # TODO customized define predict_results = {'result_1': output} output_spec = tf.estimator.EstimatorSpec( mode, predictions=predict_results) else: if mode == tf.estimator.ModeKeys.TRAIN: # TODO customized define labels = tf.reshape(labels, [-1]) output = tf.reshape(output, [-1]) loss = tf.keras.losses.MSE(labels, output) learning_rate = tf.train.polynomial_decay( _cg.learning_rate, tf.train.get_or_create_global_step(), _cg.train_steps, end_learning_rate=_cg.end_learning_rate, power=1.0, cycle=False) optimizer = tf.train.AdamOptimizer(learning_rate, name='optimizer') tvars = tf.trainable_variables() gradients = tf.gradients(loss, tvars, colocate_gradients_with_ops=_cg. colocate_gradients_with_ops) clipper_gradients, _ = tf.clip_by_global_norm(gradients, 2.0) train_op = optimizer.apply_gradients( zip(clipper_gradients, tvars), global_step=tf.train.get_global_step()) logging_hook = tf.train.LoggingTensorHook( { 'loss': loss, 'lr': learning_rate }, every_n_iter=_cg.print_info_interval) output_spec = tf.estimator.EstimatorSpec( mode, loss=loss, train_op=train_op, training_hooks=[logging_hook]) elif mode == tf.estimator.ModeKeys.EVAL: # TODO raise NotImplementedError return output_spec