def gen(): tokenizer = FullTokenizer(vocab_file=os.path.join(self.args.bert_model_dir, 'vocab.txt')) # Windows does not support logger in MP environment, thus get a new logger # inside the process for better compatibility logger = set_logger(colored('WORKER-%d' % self.worker_id, 'yellow'), self.verbose) poller = zmq.Poller() for sock in socks: poller.register(sock, zmq.POLLIN) logger.info('ready and listening!') while not self.exit_flag.is_set(): events = dict(poller.poll()) for sock_idx, sock in enumerate(socks): if sock in events: #接收来自客户端的消息 client_id, raw_msg = sock.recv_multipart() msg = jsonapi.loads(raw_msg) logger.info('new job\tsocket: %d\tsize: %d\tclient: %s' % (sock_idx, len(msg), client_id)) # check if msg is a list of list, if yes consider the input is already tokenized # 对接收到的字符进行切词,并且转化为id格式 # logger.info('get msg:%s, type:%s' % (msg[0], type(msg[0]))) is_tokenized = all(isinstance(el, list) for el in msg) tmp_f = list(convert_lst_to_features(msg, self.max_seq_len, tokenizer, logger, is_tokenized, self.mask_cls_sep)) #print([f.input_ids for f in tmp_f]) yield { 'client_id': client_id, 'input_ids': [f.input_ids for f in tmp_f], 'input_mask': [f.input_mask for f in tmp_f], 'input_type_ids': [f.input_type_ids for f in tmp_f] }
def localTestPb(): sentence = [ '重庆市南川中学副校长张竞说,寒假放假前高三年级还有10%左右的内容没复习完,学校开学受疫情影响延期不少,“老师们压力比较大,怕耽误复习进度。”' ] is_tokenized = all(isinstance(el, list) for el in sentence) tokenizer = FullTokenizer(vocab_file=os.path.join( "D:\\LiuXianXian\\pycharm--code\\BertServiceBase_classify\\checkpoints", 'vocab.txt')) logger = set_logger('WORKER-%d') tmp_f = list( convert_lst_to_features(sentence, 128, tokenizer, logger, is_tokenized, False)) ids = tmp_f[0].input_ids mask = tmp_f[0].input_mask segment_ids = tmp_f[0].input_type_ids export_dir = 'D:\\LiuXianXian\\Pycharm--code\\BertServiceBase_classify\\pbs' sess = tf.Session() sess.run(tf.global_variables_initializer()) meta_graph_def = tf.saved_model.loader.load( sess, [tf.saved_model.tag_constants.SERVING], export_dir) signature = meta_graph_def.signature_def feed_dict = { "input_ids": [ids], "input_mask": [mask], "segment_ids": [segment_ids] } logits = predict(sess, feed_dict=feed_dict, signature=signature) print(logits)