コード例 #1
0
        def gen():
            tokenizer = FullTokenizer(vocab_file=os.path.join(self.args.bert_model_dir, 'vocab.txt'))
            # Windows does not support logger in MP environment, thus get a new logger
            # inside the process for better compatibility
            logger = set_logger(colored('WORKER-%d' % self.worker_id, 'yellow'), self.verbose)

            poller = zmq.Poller()
            for sock in socks:
                poller.register(sock, zmq.POLLIN)

            logger.info('ready and listening!')
            while not self.exit_flag.is_set():
                events = dict(poller.poll())
                for sock_idx, sock in enumerate(socks):
                    if sock in events:
                        #接收来自客户端的消息
                        client_id, raw_msg = sock.recv_multipart()
                        msg = jsonapi.loads(raw_msg)
                        logger.info('new job\tsocket: %d\tsize: %d\tclient: %s' % (sock_idx, len(msg), client_id))
                        # check if msg is a list of list, if yes consider the input is already tokenized
                        # 对接收到的字符进行切词,并且转化为id格式
                        # logger.info('get msg:%s, type:%s' % (msg[0], type(msg[0])))
                        is_tokenized = all(isinstance(el, list) for el in msg)
                        tmp_f = list(convert_lst_to_features(msg, self.max_seq_len, tokenizer, logger,
                                                             is_tokenized, self.mask_cls_sep))
                        #print([f.input_ids for f in tmp_f])
                        yield {
                            'client_id': client_id,
                            'input_ids': [f.input_ids for f in tmp_f],
                            'input_mask': [f.input_mask for f in tmp_f],
                            'input_type_ids': [f.input_type_ids for f in tmp_f]
                        }
コード例 #2
0
def localTestPb():
    sentence = [
        '重庆市南川中学副校长张竞说,寒假放假前高三年级还有10%左右的内容没复习完,学校开学受疫情影响延期不少,“老师们压力比较大,怕耽误复习进度。”'
    ]
    is_tokenized = all(isinstance(el, list) for el in sentence)
    tokenizer = FullTokenizer(vocab_file=os.path.join(
        "D:\\LiuXianXian\\pycharm--code\\BertServiceBase_classify\\checkpoints",
        'vocab.txt'))
    logger = set_logger('WORKER-%d')
    tmp_f = list(
        convert_lst_to_features(sentence, 128, tokenizer, logger, is_tokenized,
                                False))
    ids = tmp_f[0].input_ids
    mask = tmp_f[0].input_mask
    segment_ids = tmp_f[0].input_type_ids

    export_dir = 'D:\\LiuXianXian\\Pycharm--code\\BertServiceBase_classify\\pbs'
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    meta_graph_def = tf.saved_model.loader.load(
        sess, [tf.saved_model.tag_constants.SERVING], export_dir)
    signature = meta_graph_def.signature_def
    feed_dict = {
        "input_ids": [ids],
        "input_mask": [mask],
        "segment_ids": [segment_ids]
    }
    logits = predict(sess, feed_dict=feed_dict, signature=signature)
    print(logits)