Beispiel #1
0
 def write_result(result,
                  threshold,
                  path=PROJECT_PATH / 'data/test_data/positive_result'):
     with codecs.open(path, 'w', 'utf-8') as file:
         for line in result:
             sentence, predict_id = line[0], line[1]
             to_write = sentence + '\t' + str(predict_id) + '\n'
             file.write(to_write)
             file.flush()
     _info('The result has been saved to {}'.format(path))
Beispiel #2
0
    def model_fn(features, labels, mode, params):
        """this is prototype syntax, all parameters are necessary."""
        # obtain the data
        _info('*** Features ***')
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        input_ids = features['input_ids']  # [batch_size, seq_length]
        input_mask = features['input_mask']  # [batch_size, seq_length]

        # if mode != tf.estimator.ModeKeys.PREDICT:
        #     # segment_idx = features['segment_dis']
        #     masked_lm_positions = features['masked_lm_positions']   # [batch_size, seq_length], specify the answer
        #     masked_lm_ids = features['masked_lm_ids']               # [batch_size, answer_seq_length], specify the answer labels
        #     masked_lm_weights = features['masked_lm_weights']        # [batch_size, seq_length], [1, 1, 0], 0 refers to the mask
        #     # next_sentence_labels = features['next_sentence_labels']
        # else:
        masked_lm_positions = features['masked_lm_positions']
        masked_lm_ids = features['masked_lm_ids']
        masked_lm_weights = features['masked_lm_weights']

        if bert_config.train_type == 'seq2seq':
            _info('Training seq2seq task.')
        elif bert_config.train_type == 'lm':
            _info('Training language model task.')

        # build model
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        model = BertModel(config=bert_config,
                          is_training=is_training,
                          input_ids=input_ids,
                          input_mask=input_mask)

        # compute loss
        loss, per_loss, log_probs, logits = get_masked_lm_output(
            bert_config, model.get_sequence_output(), model.embedding_table,
            model.projection_table, masked_lm_positions, masked_lm_ids,
            masked_lm_weights, mode)

        if mode == tf.estimator.ModeKeys.PREDICT:
            masked_lm_predictions = tf.reshape(
                tf.argmax(log_probs, axis=-1, output_type=tf.int32), [-1])
            output_spec = tf.estimator.EstimatorSpec(
                mode, predictions=masked_lm_predictions)
        else:
            if mode == tf.estimator.ModeKeys.TRAIN:
                # restore from the checkpoint,
                # tf.estimator automatically restore from the model typically,
                # maybe here is for restore some pre-trained parameters
                tvars = tf.trainable_variables()
                initialized_variable_names = {}
                if init_checkpoint:
                    (assignment_map, initialized_variable_names
                     ) = get_assignment_map_from_checkpoint(
                         tvars, init_checkpoint)
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)

                _info('*** Trainable Variables ***')
                for var in tvars:
                    init_string = ''
                    if var.name in initialized_variable_names:
                        init_string = ', *INIT_FROM_CKPT*'
                    _info('name = {}, shape={}{}'.format(
                        var.name, var.shape, init_string))

                train_op = optimization.create_optimizer(
                    loss, bert_config.learning_rate, num_train_steps,
                    bert_config.lr_limit)

                # learning_rate = tf.train.polynomial_decay(bert_config.learning_rate,
                #                                         tf.train.get_or_create_global_step(),
                #                                         num_train_steps,
                #                                         end_learning_rate=0.0,
                #                                         power=1.0,
                #                                         cycle=False)
                # optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
                # gradients = tf.gradients(loss, tvars, colocate_gradients_with_ops=True)
                # clipped_gradients, _ = tf.clip_by_global_norm(gradients, 5.0)
                # train_op = optimizer.apply_gradients(zip(clipped_gradients, tvars), global_step=tf.train.get_global_step())
                output_spec = tf.estimator.EstimatorSpec(mode,
                                                         loss=loss,
                                                         train_op=train_op)
            elif mode == tf.estimator.ModeKeys.EVAL:
                is_real_example = tf.ones(tf.shape(masked_lm_ids),
                                          dtype=tf.float32)

                def metric_fn(loss, label_ids, logits, is_real_example):
                    """
                    Args:
                        loss: tf.float32.
                        label_ids: [b, s].
                        logits: [b, s, v].
                    """
                    # [b * s, v]
                    logits = tf.reshape(logits, [-1, logits.shape[-1]])
                    # [b * s, 1]
                    predictions = tf.argmax(logits,
                                            axis=-1,
                                            output_type=tf.int32)
                    # [b * s]
                    label_ids = tf.reshape(label_ids, [-1])
                    accuracy = tf.metrics.accuracy(labels=label_ids,
                                                   predictions=predictions)
                    loss = tf.metrics.mean(values=loss)
                    return {'eval_accuracy': accuracy, 'eval_loss': loss}

                eval_metrics = metric_fn(loss, masked_lm_ids, logits,
                                         is_real_example)
                output_spec = tf.estimator.EstimatorSpec(
                    mode=mode, loss=loss, eval_metric_ops=eval_metrics)

        return output_spec
Beispiel #3
0
    def model_fn(features, labels, mode, params):
        _info('*** Features ***')
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" % (name, features[name].shape))

        input_ids = features['input_ids']       # [batch_size, seq_length]

        # build model
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        model = BertModelOfficial(
            config=bert_config,
            is_training=is_training,
            input_ids=input_ids)
   
        # [b, s, h]
        sequence_output = model.get_pooled_output()
        # sequence_output = tf.reshape(sequence_output, 
        #                         [-1, bert_config.max_length * bert_config.hidden_size])
        _info(sequence_output.shape)
        with tf.variable_scope('prediction'):
            logits  = tf.layers.dense(sequence_output, 
                                  bert_config.classes,
                                  name='prediction',
                                  kernel_initializer=_mh.create_initializer(0.2))
      
            # logits = _mh.batch_norm(logits, is_training=is_training)
            prob = tf.nn.softmax(logits, axis=-1)       # [b, 2]
            predict_ids = tf.argmax(prob, axis=-1)    # [b, ]

            if mode == tf.estimator.ModeKeys.PREDICT:
                predictions = {'class': predict_ids}
                # the default key in 'output', however, when customized, the keys are identical with the keys in dict.
                output_spec = tf.estimator.EstimatorSpec(mode, predictions=predictions)
            else:
                if mode == tf.estimator.ModeKeys.TRAIN:
                    tvars = tf.trainable_variables()
                    initialized_variable_names = {}
                    if init_checkpoint:
                        (assignment_map, initialized_variable_names) = get_assignment_map_from_checkpoint(tvars, init_checkpoint)
                        tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

                    _info('*** Trainable Variables ***')
                    for var in tvars:
                        init_string = ''
                        if var.name in initialized_variable_names:
                            init_string = ', *INIT_FROM_CKPT*'
                        _info('name = {}, shape={}{}'.format(var.name, var.shape, init_string))


                    batch_size = tf.cast(bert_config.batch_size, tf.float32) 

                    labels = tf.reshape(labels, [-1])
    
                    # logits = tf.expand_dims(logits, axis=1)
                    seq_loss = tf.reduce_sum(
                            tf.nn.sparse_softmax_cross_entropy_with_logits(
                                labels=labels, logits=logits)) / batch_size
                    loss = seq_loss
                    """
                    Tutorial on `polynomial_decay`:
                        The formula is as below:
                            
                            global_step = min(global_step, decay_steps)
                            decayed_learning_rate = (learning_rate - end_learning_rate) * (1 - global_step / decay_steps) ^ (power) + end_learning_rate
                        
                        global_step: each batch step.
                        decay_steps: the whole step, the lr will touch the end_learning_rate after the decay_steps.
                        TRAIN_STEPS: the number for repeating the whole dataset, so the decay_steps = len(dataset) / batch_size * TRAIN_STEPS.
                    """
                    train_op, lr = optimization.create_optimizer(loss, bert_config.learning_rate, bert_config.num_train_steps * 100, bert_config.lr_limit)
                    """
                    learning_rate = tf.train.polynomial_decay(config.learning_rate,
                                                            tf.train.get_or_create_global_step(),
                                                            _cg.TRIAN_STEPS,
                                                            end_learning_rate=0.0,
                                                            power=1.0,
                                                            cycle=False)

                    lr = tf.maximum(tf.constant(config.lr_limit), learning_rate)
                    optimizer = tf.train.AdamOptimizer(lr, name='optimizer')
                    tvars = tf.trainable_variables()
                    gradients = tf.gradients(loss, tvars, colocate_gradients_with_ops=config.colocate_gradients_with_ops)
                    clipped_gradients, _ = tf.clip_by_global_norm(gradients, 5.0)
                    train_op = optimizer.apply_gradients(zip(clipped_gradients, tvars), global_step=tf.train.get_global_step())
                    """

                    # this is excellent, because it could display the result each step, i.e., each step equals to batch_size.
                    # the output_spec, display the result every save checkpoints step.
                    logging_hook = tf.train.LoggingTensorHook({'loss' : loss, 'lr': lr}, every_n_iter=10)

                    output_spec = tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op, training_hooks=[logging_hook])

                elif mode == tf.estimator.ModeKeys.EVAL:
                    # TODO
                    raise NotImplementedError
            
            return output_spec
Beispiel #4
0
                                path=bert_config.data_path,
                                batch_size=bert_config.batch_size,
                                repeat_num=bert_config.num_train_steps,
                                max_length = bert_config.max_length)

    gpu_config = tf.ConfigProto()
    gpu_config.gpu_options.allow_growth=True

    run_config = tf.contrib.tpu.RunConfig(
        session_config=gpu_config,
        keep_checkpoint_max=1,
        save_checkpoints_steps=10,
        model_dir=bert_config.model_dir)
    
    estimaotr = tf.estimator.Estimator(model_fn, config=run_config)
    estimaotr.train(input_fn)     # train_input_fn should be callable

def package_model(ckpt_path, pb_path):
    model_fn = model_fn_builder(bert_config, None, bert_config.learning_rate, bert_config.num_train_steps)
    estimator = tf.estimator.Estimator(model_fn, ckpt_path)
    estimator.export_saved_model(pb_path, server_input_receiver_fn)

if __name__ == '__main__':
    if sys.argv[1] == 'train':
        main()
    elif sys.argv[1] == 'package':
        package_model(str(PROJECT_PATH / 'models_lm'), str(PROJECT_PATH / 'models_deploy_lm'))
    else:
        _error('Unknown parameter: {}.'.format(sys.argv[1]))
        _info('Choose from [train | package].')
Beispiel #5
0
		def model_fn(features, labels, mode, params):
				# obtain the data
				_info('*** Features ***')
				for name in sorted(features.keys()):
						tf.logging.info(' name = %s, shape = %s' % (name, features[name].shape))
				
				is_training = (mode == tf.estimator.ModeKeys.TRAIN)

				if is_training:
					input_A = features['input_A']
					input_B = features['input_B']
					input_A_length = features['input_A_length']
					input_B_length = features['input_B_length']
				else:
					input_A = features['input_A']
					input_B = features['input_A']
					input_A_length = features['input_A_length']
					input_B_length = features['input_A_length']
					

				# if mode != tf.estimator.ModeKeys.PREDICT:
				#     decoder_input_data = features['decoder_input_data']
				#     seq_length_decoder_input_data = features['seq_length_decoder_input_data']
				# else:
				#     decoder_input_data = None
				#     seq_length_decoder_input_data = None

				# build Encoder
				model = ERCNNModel(config=config,
													 is_training=is_training,
													 sent_A=input_A,
													 sent_B=input_B,
													 sent_length_A=input_A_length,
													 sent_length_B=input_B_length)

				output = model.get_output()

				# [b, s]
				batch_size = tf.cast(_mh.get_shape_list(output)[0], dtype=tf.float32)
				# output = tf.reduce_sum(tf.multiply(output_A, output_B), axis=-1)
				# output = tf.reshape(output, (batch_size, 1))

				if mode == tf.estimator.ModeKeys.PREDICT:
						predictions = {'output_vector': output}
						# the default key in 'output', however, when customized, the keys are identical with the keys in dict.
						output_spec = tf.estimator.EstimatorSpec(mode, predictions=predictions)
				else:
						if mode == tf.estimator.ModeKeys.TRAIN:
								# labels = tf.cast(labels, tf.float32)
								# loss = tf.losses.mean_squared_error(labels, output)

								# loss = tf.losses.mean_squared_error(output_A, output_B)

								loss = tf.reduce_sum(
								        tf.nn.sparse_softmax_cross_entropy_with_logits(
								            labels=labels, logits=output)) / batch_size 
								# # loss = vae_loss + seq_loss
								# loss = seq_loss
								
								"""
								Tutorial on `polynomial_decay`:
										The formula is as below:
													
													global_step = min(global_step, decay_steps)
													decayed_learning_rate = (learning_rate - end_learning_rate) * (1 - global_step / decay_steps) ^ (power) + end_learning_rate
										
										global_step: each batch step.
										decay_steps: the whole step, the lr will touch the end_learning_rate after the decay_steps.
										TRAIN_STEPS: the number for repeating the whole dataset, so the decay_steps = len(dataset) / batch_size * TRAIN_STEPS.
								"""
								# train_op, lr = create_optimizer(loss, config.learning_rate, _cg.TRIAN_STEPS, config.lr_limit)
								
								
								learning_rate = tf.train.polynomial_decay(config.learning_rate,
																													tf.train.get_or_create_global_step(),
																													_cg.TRIAN_STEPS,
																													end_learning_rate=0.0,
																													power=1.0,
																													cycle=False)

								lr = tf.maximum(tf.constant(config.lr_limit), learning_rate)
								optimizer = tf.train.AdamOptimizer(lr, name='optimizer')
								tvars = tf.trainable_variables()
								gradients = tf.gradients(loss, tvars, colocate_gradients_with_ops=config.colocate_gradients_with_ops)
								clipped_gradients, _ = tf.clip_by_global_norm(gradients, 5.0)
								train_op = optimizer.apply_gradients(zip(clipped_gradients, tvars), global_step=tf.train.get_global_step())
								

								# this is excellent, because it could display the result each step, i.e., each step equals to batch_size.
								# the output_spec, display the result every save checkpoints step.
								logging_hook = tf.train.LoggingTensorHook({'loss' : loss, 'lr': lr}, every_n_iter=10)

								output_spec = tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op, training_hooks=[logging_hook])

						elif mode == tf.estimator.ModeKeys.EVAL:
								# TODO
								raise NotImplementedError
				
				return output_spec
Beispiel #6
0
    def model_fn(features, labels, mode, params):
        """the above formal parameters are necessary."""
        # display the features
        _info('*** Features ***')
        for name in sorted(features.keys()):
            tf.logging.info(' name = %s, shape = %s' %
                            (name, features[name].shape))

        # get the input feature
        # TODO customized define
        input_x = features['input_x']

        # define the model
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        model = Model(config=_cg, input_x=input_x)
        output = model.get_result()

        # TRAIN, EVAL, PREDICT
        if mode == tf.estimator.ModeKeys.PREDICT:
            # TODO customized define
            predict_results = {'result_1': output}
            output_spec = tf.estimator.EstimatorSpec(
                mode, predictions=predict_results)
        else:
            if mode == tf.estimator.ModeKeys.TRAIN:
                # TODO customized define
                labels = tf.reshape(labels, [-1])
                output = tf.reshape(output, [-1])
                loss = tf.keras.losses.MSE(labels, output)
                learning_rate = tf.train.polynomial_decay(
                    _cg.learning_rate,
                    tf.train.get_or_create_global_step(),
                    _cg.train_steps,
                    end_learning_rate=_cg.end_learning_rate,
                    power=1.0,
                    cycle=False)
                optimizer = tf.train.AdamOptimizer(learning_rate,
                                                   name='optimizer')
                tvars = tf.trainable_variables()
                gradients = tf.gradients(loss,
                                         tvars,
                                         colocate_gradients_with_ops=_cg.
                                         colocate_gradients_with_ops)
                clipper_gradients, _ = tf.clip_by_global_norm(gradients, 2.0)
                train_op = optimizer.apply_gradients(
                    zip(clipper_gradients, tvars),
                    global_step=tf.train.get_global_step())

                logging_hook = tf.train.LoggingTensorHook(
                    {
                        'loss': loss,
                        'lr': learning_rate
                    },
                    every_n_iter=_cg.print_info_interval)
                output_spec = tf.estimator.EstimatorSpec(
                    mode,
                    loss=loss,
                    train_op=train_op,
                    training_hooks=[logging_hook])
            elif mode == tf.estimator.ModeKeys.EVAL:
                # TODO
                raise NotImplementedError
        return output_spec