Beispiel #1
0
    def __init__(self, **kwargs):
        self.tf = import_tf(kwargs['gpu_no'], kwargs['verbose'])
        self.logger = set_logger('BertNer', kwargs['log_dir'],
                                 kwargs['verbose'])
        self.model_dir = kwargs['ner_model']

        from bert.tokenization import FullTokenizer
        self.tokenizer = FullTokenizer(
            os.path.join(self.model_dir, 'vocab.txt'))

        self.ner_sq_len = 128
        self.input_ids = self.tf.placeholder(self.tf.int32,
                                             (None, self.ner_sq_len),
                                             'input_ids')
        self.input_mask = self.tf.placeholder(self.tf.int32,
                                              (None, self.ner_sq_len),
                                              'input_mask')

        # init graph
        self._init_graph()

        # init ner assist data
        self._init_predict_var()

        self.per_proun = [
            '甲', '乙', '丙', '丁', '戊', '己', '庚', '辛', '壬', '癸', '子', '丑', '寅',
            '卯', '辰', '巳', '午', '未', '申', '酉', '戌', '亥'
        ]
    def __init__(self, gpu_no, log_dir, bert_sim_dir, verbose=False):
        self.bert_sim_dir = bert_sim_dir
        self.logger = set_logger(colored('BS', 'cyan'), log_dir, verbose)

        self.tf = import_tf(gpu_no, verbose)

        # add tokenizer
        from bert import tokenization
        self.tokenizer = tokenization.FullTokenizer(os.path.join(bert_sim_dir, 'vocab.txt'))
        # add placeholder
        self.input_ids = self.tf.placeholder(self.tf.int32, (None, 45), 'input_ids')
        self.input_mask = self.tf.placeholder(self.tf.int32, (None, 45), 'input_mask')
        self.input_type_ids = self.tf.placeholder(self.tf.int32, (None, 45), 'input_type_ids')
        # init graph
        self._init_graph()
Beispiel #3
0
                                               }))

    return input_fn


args = get_run_args()

logger = set_logger(colored('VENTILATOR', 'magenta'), args.verbose)
graph_path, bert_config = optimize_graph(args=args)

if graph_path:
    logger.info('optimized graph is stored at: %s' % graph_path)

logger.info('use device %s, load graph from %s' % ('cpu', graph_path))

tf = import_tf(device_id=-1, verbose=args.verbose, use_fp16=args.fp16)
estimator = get_estimator(args=args, tf=tf, graph_path=graph_path)

save_hook = tf.train.CheckpointSaverHook(checkpoint_dir=args.checkpoint_dir,
                                         save_secs=1)
predicts = estimator.predict(input_fn=input_fn_builder(), hooks=[save_hook])

for predict in predicts:
    print(predict)

feature_spec = {
    "unique_ids":
    tf.placeholder(dtype=tf.int32, shape=[None], name="unique_ids"),
    "input_ids":
    tf.placeholder(dtype=tf.int32, shape=[None, None], name="input_ids"),
    "input_mask":
Beispiel #4
0
def optimize_graph(args, logger=None):
    if not logger:
        logger = set_logger(colored('GRAPHOPT', 'cyan'), args.verbose)
    try:
        # we don't need GPU for optimizing the graph
        tf = import_tf(verbose=args.verbose)
        from tensorflow.python.tools.optimize_for_inference_lib import optimize_for_inference

        config = tf.ConfigProto(device_count={'GPU': 0},
                                allow_soft_placement=True)

        config_fp = os.path.join(args.model_dir, args.config_name)
        init_checkpoint = os.path.join(args.tuned_model_dir or args.model_dir,
                                       args.ckpt_name)
        if args.fp16:
            logger.warning(
                'fp16 is turned on! '
                'Note that not all CPU GPU support fast fp16 instructions, '
                'worst case you will have degraded performance!')
        logger.info('model config: %s' % config_fp)
        logger.info('checkpoint%s: %s' %
                    (' (override by the fine-tuned model)'
                     if args.tuned_model_dir else '', init_checkpoint))
        with tf.gfile.GFile(config_fp, 'r') as f:
            bert_config = modeling.BertConfig.from_dict(json.load(f))

        logger.info('build graph...')
        # input placeholders, not sure if they are friendly to XLA
        input_ids = tf.placeholder(tf.int32, (None, None), 'input_ids')
        input_mask = tf.placeholder(tf.int32, (None, None), 'input_mask')
        input_type_ids = tf.placeholder(tf.int32, (None, None),
                                        'input_type_ids')

        jit_scope = tf.contrib.compiler.jit.experimental_jit_scope if args.xla else contextlib.suppress

        with jit_scope():
            input_tensors = [input_ids, input_mask, input_type_ids]

            model = modeling.BertModel(config=bert_config,
                                       is_training=False,
                                       input_ids=input_ids,
                                       input_mask=input_mask,
                                       token_type_ids=input_type_ids,
                                       use_one_hot_embeddings=False)

            tvars = tf.trainable_variables()

            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)

            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

            minus_mask = lambda x, m: x - tf.expand_dims(1.0 - m, axis=-1
                                                         ) * 1e30
            mul_mask = lambda x, m: x * tf.expand_dims(m, axis=-1)
            masked_reduce_max = lambda x, m: tf.reduce_max(minus_mask(x, m),
                                                           axis=1)
            masked_reduce_mean = lambda x, m: tf.reduce_sum(
                mul_mask(x, m), axis=1) / (tf.reduce_sum(
                    m, axis=1, keepdims=True) + 1e-10)

            with tf.variable_scope("pooling"):
                if len(args.pooling_layer) == 1:
                    encoder_layer = model.all_encoder_layers[
                        args.pooling_layer[0]]
                else:
                    all_layers = [
                        model.all_encoder_layers[l] for l in args.pooling_layer
                    ]
                    encoder_layer = tf.concat(all_layers, -1)

                input_mask = tf.cast(input_mask, tf.float32)
                if args.pooling_strategy == PoolingStrategy.REDUCE_MEAN:
                    pooled = masked_reduce_mean(encoder_layer, input_mask)
                elif args.pooling_strategy == PoolingStrategy.REDUCE_MAX:
                    pooled = masked_reduce_max(encoder_layer, input_mask)
                elif args.pooling_strategy == PoolingStrategy.REDUCE_MEAN_MAX:
                    pooled = tf.concat([
                        masked_reduce_mean(encoder_layer, input_mask),
                        masked_reduce_max(encoder_layer, input_mask)
                    ],
                                       axis=1)
                elif args.pooling_strategy == PoolingStrategy.FIRST_TOKEN or \
                        args.pooling_strategy == PoolingStrategy.CLS_TOKEN:
                    pooled = tf.squeeze(encoder_layer[:, 0:1, :], axis=1)
                elif args.pooling_strategy == PoolingStrategy.LAST_TOKEN or \
                        args.pooling_strategy == PoolingStrategy.SEP_TOKEN:
                    seq_len = tf.cast(tf.reduce_sum(input_mask, axis=1),
                                      tf.int32)
                    rng = tf.range(0, tf.shape(seq_len)[0])
                    indexes = tf.stack([rng, seq_len - 1], 1)
                    pooled = tf.gather_nd(encoder_layer, indexes)
                elif args.pooling_strategy == PoolingStrategy.NONE:
                    pooled = mul_mask(encoder_layer, input_mask)
                else:
                    raise NotImplementedError()

            if args.fp16:
                pooled = tf.cast(pooled, tf.float16)

            pooled = tf.identity(pooled, 'final_encodes')
            output_tensors = [pooled]
            tmp_g = tf.get_default_graph().as_graph_def()

        with tf.Session(config=config) as sess:
            logger.info('load parameters from checkpoint...')

            sess.run(tf.global_variables_initializer())
            dtypes = [n.dtype for n in input_tensors]
            logger.info('optimize...')
            tmp_g = optimize_for_inference(
                tmp_g, [n.name[:-2] for n in input_tensors],
                [n.name[:-2] for n in output_tensors],
                [dtype.as_datatype_enum for dtype in dtypes], False)

            logger.info('freeze...')
            tmp_g = convert_variables_to_constants(
                sess,
                tmp_g, [n.name[:-2] for n in output_tensors],
                use_fp16=args.fp16)

        tmp_file = tempfile.NamedTemporaryFile('w',
                                               delete=False,
                                               dir=args.graph_tmp_dir).name
        logger.info('write graph to a tmp file: %s' % tmp_file)
        with tf.gfile.GFile(tmp_file, 'wb') as f:
            f.write(tmp_g.SerializeToString())
        return tmp_file, bert_config
    except Exception:
        logger.error('fail to optimize the graph!', exc_info=True)