# NOTE: First install bert-as-service via # $ # $ pip install serving-server # $ pip install serving-client # $ # using BertClient in sync way import sys import time from transformer_serving.client import BertClient if __name__ == '__main__': port = 6006 port_out = 6007 bc = BertClient(port=port, port_out=port_out, show_server_config=True, timeout=-1) # encode a list of strings # with open('README.md') as fp: # data = [v for v in fp if v.strip()][:512] # num_tokens = sum(len([vv for vv in v.split() if vv.strip()]) for v in data) # show_tokens = len(sys.argv) > 3 and bool(sys.argv[3]) data = ['aaaaaaaaa'] output = bc.encode(data) # warm-up GPU print(output)
# read and write TFRecord import os import GPUtil import tensorflow as tf from transformer_serving.client import BertClient os.environ['CUDA_VISIBLE_DEVICES'] = str(GPUtil.getFirstAvailable()[0]) tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) with open('README.md') as fp: data = [v for v in fp if v.strip()] bc = BertClient() list_vec = bc.encode(data) list_label = [0 for _ in data] # a dummy list of all-zero labels # write tfrecords with tf.python_io.TFRecordWriter('tmp.tfrecord') as writer: def create_float_feature(values): return tf.train.Feature(float_list=tf.train.FloatList(value=values)) def create_int_feature(values): return tf.train.Feature(int64_list=tf.train.Int64List( value=list(values))) for (vec, label) in zip(list_vec, list_label): features = {
'-pooling_strategy', 'REDUCE_MEAN', '-pooling_layer', '-2', '-gpu_memory_fraction', '0.2', '-device','3', ] args = get_args_parser().parse_args(common) for pool_layer in range(1, 13): setattr(args, 'pooling_layer', [-pool_layer]) server = BertServer(args) server.start() print('wait until server is ready...') time.sleep(20) print('encoding...') bc = BertClient(port=port, port_out=port_out, show_server_config=True) subset_vec_all_layers.append(bc.encode(subset_text)) bc.close() server.close() print('done at layer -%d' % pool_layer) #save bert vectors and labels stacked_subset_vec_all_layers = np.stack(subset_vec_all_layers) np.save('example7_5k_2',stacked_subset_vec_all_layers) np_subset_label = np.array(subset_label) np.save('example7_5k_2_subset_label',np_subset_label) #load bert vectors and labels subset_vec_all_layers = np.load('example7_5k_mxnet.npy') np_subset_label = np.load('example7_5k_mxnet_subset_label.npy') subset_label = np_subset_label.tolist() #=========================== visualize ===========================
# $ # using BertClient in sync way import sys import time from transformer_serving.client import BertClient if __name__ == '__main__': bc = BertClient(port=int(sys.argv[1]), port_out=int(sys.argv[2]), show_server_config=True) # encode a list of strings with open('README.md') as fp: data = [v for v in fp if v.strip()][:512] num_tokens = sum( len([vv for vv in v.split() if vv.strip()]) for v in data) show_tokens = len(sys.argv) > 3 and bool(sys.argv[3]) bc.encode(data) # warm-up GPU for j in range(10): tmp = data * (2**j) c_num_tokens = num_tokens * (2**j) start_t = time.time() bc.encode(tmp, show_tokens=show_tokens) time_t = time.time() - start_t print('encoding %10d sentences\t%.2fs\t%4d samples/s\t%6d tokens/s' % (len(tmp), time_t, int( len(tmp) / time_t), int(c_num_tokens / time_t)))
# using BertClient in multicast way import sys import threading from transformer_serving.client import BertClient def client_clone(id, idx): bc = BertClient(port=int(sys.argv[1]), port_out=int(sys.argv[2]), identity=id) for j in bc.fetch(): print('clone-client-%d: received %d x %d' % (idx, j.shape[0], j.shape[1])) if __name__ == '__main__': bc = BertClient(port=int(sys.argv[1]), port_out=int(sys.argv[2])) # start two cloned clients sharing the same identity as bc for j in range(2): t = threading.Thread(target=client_clone, args=(bc.identity, j)) t.start() with open('README.md') as fp: data = [v for v in fp if v.strip()] for _ in range(3): vec = bc.encode(data) print('bc received %d x %d' % (vec.shape[0], vec.shape[1]))