def main():
    config = {
        "model_config_file":
        "/search/odin/liruihong/tts/multi_attn_model/config_data/classify_config.json",
        "max_seq_length": 128,
        "batch_size": 32,
        "word2vec_file":
        "/search/odin/liruihong/tts/multi_attn_model/config_data/100000-small.txt",
        "stop_words_file":
        "/search/odin/liruihong/tts/multi_attn_model/config_data/cn_stopwords.txt",
        "init_checkpoint":
        "/search/odin/liruihong/tts/bert_output/wordvec_attn/annotate_part_unlimitlen/model.ckpt-4600",
        "model_output_dir":
        "/search/odin/liruihong/tts/bert_output/wordvec_attn/annotate_part_unlimitlen",
        "http_port": 9001,
    }

    logger = set_logger("root", verbose=True, handler=logging.StreamHandler())
    ready_to_classify_que = Queue()
    classify_res_que = Queue()
    http_server = HTTPServer(config, ready_to_classify_que, classify_res_que,
                             1, logger)
    logger.info("start server")
    http_server.start()

    logger.info("finish all start")
Example #2
0
 def __init__(self, id, args, worker_address, sink_address):
     super().__init__()
     self.model_dir = args.model_dir
     self.config_fp = os.path.join(self.model_dir, 'bert_config.json')
     self.checkpoint_fp = os.path.join(self.model_dir, 'bert_model.ckpt')
     self.vocab_fp = os.path.join(args.model_dir, 'vocab.txt')
     self.tokenizer = tokenization.FullTokenizer(vocab_file=self.vocab_fp)
     self.max_seq_len = args.max_seq_len
     self.worker_id = id
     self.daemon = True
     self.model_fn = model_fn_builder(
         bert_config=modeling.BertConfig.from_json_file(self.config_fp),
         init_checkpoint=self.checkpoint_fp,
         pooling_strategy=args.pooling_strategy,
         pooling_layer=args.pooling_layer)
     os.environ['CUDA_VISIBLE_DEVICES'] = str(self.worker_id)
     config = tf.ConfigProto()
     config.gpu_options.allow_growth = True
     config.gpu_options.per_process_gpu_memory_fraction = args.gpu_memory_fraction
     self.estimator = Estimator(self.model_fn,
                                config=RunConfig(session_config=config))
     self.exit_flag = multiprocessing.Event()
     self.logger = set_logger('WORKER-%d' % self.worker_id)
     self.worker_address = worker_address
     self.sink_address = sink_address
Example #3
0
    def __init__(self, **kwargs):
        self.tf = import_tf(kwargs['gpu_no'], kwargs['verbose'])
        self.logger = set_logger('BertNer', kwargs['log_dir'],
                                 kwargs['verbose'])
        self.model_dir = kwargs['ner_model']

        from bert.tokenization import FullTokenizer
        self.tokenizer = FullTokenizer(
            os.path.join(self.model_dir, 'vocab.txt'))

        self.ner_sq_len = 128
        self.input_ids = self.tf.placeholder(self.tf.int32,
                                             (None, self.ner_sq_len),
                                             'input_ids')
        self.input_mask = self.tf.placeholder(self.tf.int32,
                                              (None, self.ner_sq_len),
                                              'input_mask')

        # init graph
        self._init_graph()

        # init ner assist data
        self._init_predict_var()

        self.per_proun = [
            '甲', '乙', '丙', '丁', '戊', '己', '庚', '辛', '壬', '癸', '子', '丑', '寅',
            '卯', '辰', '巳', '午', '未', '申', '酉', '戌', '亥'
        ]
Example #4
0
 def __init__(self, args):
     super().__init__()
     self.model_dir = args.model_dir
     self.max_seq_len = args.max_seq_len
     self.num_worker = args.num_worker
     self.max_batch_size = args.max_batch_size
     self.port = args.port
     self.args = args
     self.args_dict = {
         'model_dir': args.model_dir,
         'max_seq_len': args.max_seq_len,
         'num_worker': args.num_worker,
         'max_batch_size': args.max_batch_size,
         'port': args.port,
         'tensorflow_version': tf.__version__,
         'python_version': sys.version,
         'server_time': str(datetime.now())
     }
     self.processes = []
     self.frontend = None  # REQ->ROUTER
     self.backend = None  # PUSH->PULL
     self.context = None
     self.exit_flag = threading.Event()
     self.logger = set_logger('DISPATCHER')
     self.client_checksum = {}
     self.pending_client = {}
     self.pending_checksum = {}
Example #5
0
 def __init__(self, args, frontend):
     super().__init__()
     self.port = args.port
     self.context = None
     self.receiver = None
     self.frontend = frontend
     self.exit_flag = threading.Event()
     self.logger = set_logger('SINK')
Example #6
0
 def __init__(self, args, frontend, client_chk):
     super().__init__()
     self.port = args.port
     self.context = None
     self.receiver = None
     self.frontend = frontend
     self.exit_flag = threading.Event()
     self.logger = set_logger('SINK')
     self.address = None
     self.client_checksum = client_chk
    def __init__(self, gpu_no, log_dir, bert_sim_dir, verbose=False):
        self.bert_sim_dir = bert_sim_dir
        self.logger = set_logger(colored('BS', 'cyan'), log_dir, verbose)

        self.tf = import_tf(gpu_no, verbose)

        # add tokenizer
        from bert import tokenization
        self.tokenizer = tokenization.FullTokenizer(os.path.join(bert_sim_dir, 'vocab.txt'))
        # add placeholder
        self.input_ids = self.tf.placeholder(self.tf.int32, (None, 45), 'input_ids')
        self.input_mask = self.tf.placeholder(self.tf.int32, (None, 45), 'input_mask')
        self.input_type_ids = self.tf.placeholder(self.tf.int32, (None, 45), 'input_type_ids')
        # init graph
        self._init_graph()
Example #8
0
    def __init__(self, args):
        super().__init__()
        self.logger = set_logger('VENTILATOR')

        self.model_dir = args.model_dir
        self.max_seq_len = args.max_seq_len
        self.num_worker = args.num_worker
        self.max_batch_size = args.max_batch_size
        self.port = args.port
        self.args = args
        self.args_dict = {
            'model_dir': args.model_dir,
            'max_seq_len': args.max_seq_len,
            'num_worker': args.num_worker,
            'max_batch_size': args.max_batch_size,
            'port': args.port,
            'port_out': args.port_out,
            'pooling_layer': args.pooling_layer,
            'pooling_strategy': args.pooling_strategy.value,
            'tensorflow_version': tf.__version__,
            'python_version': sys.version,
            'server_start_time': str(datetime.now())
        }
        self.processes = []
        self.context = zmq.Context()

        # frontend facing client
        self.frontend = self.context.socket(zmq.PULL)
        self.frontend.bind('tcp://*:%d' % self.port)

        # pair connection between frontend and sink
        self.sink = self.context.socket(zmq.PAIR)
        self.sink.bind('ipc://*')
        self.addr_front2sink = self.sink.getsockopt(
            zmq.LAST_ENDPOINT).decode('ascii')

        # backend facing workers
        self.backend = self.context.socket(zmq.PUSH)
        self.backend.bind('ipc://*')
        self.addr_backend = self.backend.getsockopt(
            zmq.LAST_ENDPOINT).decode('ascii')

        # start the sink thread
        proc_sink = BertSink(self.args, self.addr_front2sink)
        proc_sink.start()
        self.processes.append(proc_sink)
        self.addr_sink = self.sink.recv().decode('ascii')
        self.logger.info('frontend-sink ipc: %s' % self.addr_sink)
Example #9
0
 def __init__(self, id, args):
     super().__init__()
     self.model_dir = args.model_dir
     self.config_fp = os.path.join(self.model_dir, 'bert_config.json')
     self.checkpoint_fp = os.path.join(self.model_dir, 'bert_model.ckpt')
     self.vocab_fp = os.path.join(args.model_dir, 'vocab.txt')
     self.tokenizer = tokenization.FullTokenizer(vocab_file=self.vocab_fp)
     self.max_seq_len = args.max_seq_len
     self.worker_id = id
     self.daemon = True
     self.model_fn = model_fn_builder(
         bert_config=modeling.BertConfig.from_json_file(self.config_fp),
         init_checkpoint=self.checkpoint_fp)
     os.environ['CUDA_VISIBLE_DEVICES'] = str(self.worker_id)
     self.estimator = Estimator(self.model_fn)
     self.exit_flag = multiprocessing.Event()
     self.logger = set_logger('WORKER-%d' % self.worker_id)
Example #10
0
 def __init__(self, args, front_sink_addr):
     super().__init__()
     self.port = args.port_out
     self.exit_flag = multiprocessing.Event()
     self.logger = set_logger('SINK')
     self.front_sink_addr = front_sink_addr
Example #11
0
import os
import pickle
import threading
import time
from multiprocessing import Process

import tensorflow as tf
import zmq
from tensorflow.python.estimator.estimator import Estimator

from bert import tokenization, modeling
from bert.extract_features import model_fn_builder, convert_lst_to_features
from helper import set_logger

logger = set_logger()


class BertServer(threading.Thread):
    def __init__(self, args):
        super().__init__()
        self.model_dir = args.model_dir
        self.max_len = args.max_len
        self.num_worker = args.num_worker
        self.port = args.port
        self.args = args

    def run(self):
        context = zmq.Context.instance()
        frontend = context.socket(zmq.ROUTER)
        frontend.bind('tcp://*:%d' % self.port)
Example #12
0
                                                   "input_type_ids": tf.int32,
                                               },
                                               output_shapes={
                                                   "unique_ids": [None],
                                                   "input_ids": [None, None],
                                                   "input_mask": [None, None],
                                                   "input_type_ids":
                                                   [None, None],
                                               }))

    return input_fn


args = get_run_args()

logger = set_logger(colored('VENTILATOR', 'magenta'), args.verbose)
graph_path, bert_config = optimize_graph(args=args)

if graph_path:
    logger.info('optimized graph is stored at: %s' % graph_path)

logger.info('use device %s, load graph from %s' % ('cpu', graph_path))

tf = import_tf(device_id=-1, verbose=args.verbose, use_fp16=args.fp16)
estimator = get_estimator(args=args, tf=tf, graph_path=graph_path)

save_hook = tf.train.CheckpointSaverHook(checkpoint_dir=args.checkpoint_dir,
                                         save_secs=1)
predicts = estimator.predict(input_fn=input_fn_builder(), hooks=[save_hook])

for predict in predicts:
Example #13
0
def optimize_graph(args, logger=None):
    if not logger:
        logger = set_logger(colored('GRAPHOPT', 'cyan'), args.verbose)
    try:
        # we don't need GPU for optimizing the graph
        tf = import_tf(verbose=args.verbose)
        from tensorflow.python.tools.optimize_for_inference_lib import optimize_for_inference

        config = tf.ConfigProto(device_count={'GPU': 0},
                                allow_soft_placement=True)

        config_fp = os.path.join(args.model_dir, args.config_name)
        init_checkpoint = os.path.join(args.tuned_model_dir or args.model_dir,
                                       args.ckpt_name)
        if args.fp16:
            logger.warning(
                'fp16 is turned on! '
                'Note that not all CPU GPU support fast fp16 instructions, '
                'worst case you will have degraded performance!')
        logger.info('model config: %s' % config_fp)
        logger.info('checkpoint%s: %s' %
                    (' (override by the fine-tuned model)'
                     if args.tuned_model_dir else '', init_checkpoint))
        with tf.gfile.GFile(config_fp, 'r') as f:
            bert_config = modeling.BertConfig.from_dict(json.load(f))

        logger.info('build graph...')
        # input placeholders, not sure if they are friendly to XLA
        input_ids = tf.placeholder(tf.int32, (None, None), 'input_ids')
        input_mask = tf.placeholder(tf.int32, (None, None), 'input_mask')
        input_type_ids = tf.placeholder(tf.int32, (None, None),
                                        'input_type_ids')

        jit_scope = tf.contrib.compiler.jit.experimental_jit_scope if args.xla else contextlib.suppress

        with jit_scope():
            input_tensors = [input_ids, input_mask, input_type_ids]

            model = modeling.BertModel(config=bert_config,
                                       is_training=False,
                                       input_ids=input_ids,
                                       input_mask=input_mask,
                                       token_type_ids=input_type_ids,
                                       use_one_hot_embeddings=False)

            tvars = tf.trainable_variables()

            (assignment_map, initialized_variable_names
             ) = modeling.get_assignment_map_from_checkpoint(
                 tvars, init_checkpoint)

            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

            minus_mask = lambda x, m: x - tf.expand_dims(1.0 - m, axis=-1
                                                         ) * 1e30
            mul_mask = lambda x, m: x * tf.expand_dims(m, axis=-1)
            masked_reduce_max = lambda x, m: tf.reduce_max(minus_mask(x, m),
                                                           axis=1)
            masked_reduce_mean = lambda x, m: tf.reduce_sum(
                mul_mask(x, m), axis=1) / (tf.reduce_sum(
                    m, axis=1, keepdims=True) + 1e-10)

            with tf.variable_scope("pooling"):
                if len(args.pooling_layer) == 1:
                    encoder_layer = model.all_encoder_layers[
                        args.pooling_layer[0]]
                else:
                    all_layers = [
                        model.all_encoder_layers[l] for l in args.pooling_layer
                    ]
                    encoder_layer = tf.concat(all_layers, -1)

                input_mask = tf.cast(input_mask, tf.float32)
                if args.pooling_strategy == PoolingStrategy.REDUCE_MEAN:
                    pooled = masked_reduce_mean(encoder_layer, input_mask)
                elif args.pooling_strategy == PoolingStrategy.REDUCE_MAX:
                    pooled = masked_reduce_max(encoder_layer, input_mask)
                elif args.pooling_strategy == PoolingStrategy.REDUCE_MEAN_MAX:
                    pooled = tf.concat([
                        masked_reduce_mean(encoder_layer, input_mask),
                        masked_reduce_max(encoder_layer, input_mask)
                    ],
                                       axis=1)
                elif args.pooling_strategy == PoolingStrategy.FIRST_TOKEN or \
                        args.pooling_strategy == PoolingStrategy.CLS_TOKEN:
                    pooled = tf.squeeze(encoder_layer[:, 0:1, :], axis=1)
                elif args.pooling_strategy == PoolingStrategy.LAST_TOKEN or \
                        args.pooling_strategy == PoolingStrategy.SEP_TOKEN:
                    seq_len = tf.cast(tf.reduce_sum(input_mask, axis=1),
                                      tf.int32)
                    rng = tf.range(0, tf.shape(seq_len)[0])
                    indexes = tf.stack([rng, seq_len - 1], 1)
                    pooled = tf.gather_nd(encoder_layer, indexes)
                elif args.pooling_strategy == PoolingStrategy.NONE:
                    pooled = mul_mask(encoder_layer, input_mask)
                else:
                    raise NotImplementedError()

            if args.fp16:
                pooled = tf.cast(pooled, tf.float16)

            pooled = tf.identity(pooled, 'final_encodes')
            output_tensors = [pooled]
            tmp_g = tf.get_default_graph().as_graph_def()

        with tf.Session(config=config) as sess:
            logger.info('load parameters from checkpoint...')

            sess.run(tf.global_variables_initializer())
            dtypes = [n.dtype for n in input_tensors]
            logger.info('optimize...')
            tmp_g = optimize_for_inference(
                tmp_g, [n.name[:-2] for n in input_tensors],
                [n.name[:-2] for n in output_tensors],
                [dtype.as_datatype_enum for dtype in dtypes], False)

            logger.info('freeze...')
            tmp_g = convert_variables_to_constants(
                sess,
                tmp_g, [n.name[:-2] for n in output_tensors],
                use_fp16=args.fp16)

        tmp_file = tempfile.NamedTemporaryFile('w',
                                               delete=False,
                                               dir=args.graph_tmp_dir).name
        logger.info('write graph to a tmp file: %s' % tmp_file)
        with tf.gfile.GFile(tmp_file, 'wb') as f:
            f.write(tmp_g.SerializeToString())
        return tmp_file, bert_config
    except Exception:
        logger.error('fail to optimize the graph!', exc_info=True)