def client_clone(id, idx): bc = BertClient(port=int(sys.argv[1]), port_out=int(sys.argv[2]), identity=id) for j in bc.fetch(): print('clone-client-%d: received %d x %d' % (idx, j.shape[0], j.shape[1]))
def run(self): try: from transformer_serving.client import BertClient except ImportError: raise ImportError( 'BertClient module is not available, it is required for benchmarking.' 'Please use "pip install -U serving-client" to install it.') with BertClient(port=self.port, port_out=self.port_out, show_server_config=True, check_version=False, check_length=False) as bc: time_all = [] for _ in range(self.num_repeat): start_t = time.perf_counter() bc.encode(self.batch) time_all.append(time.perf_counter() - start_t) self.avg_time = mean( time_all[2:] ) # first one is often slow due to cold-start/warm-up effect
def __init__(self, max_concurrency=10, **kwargs): """ A thread-safe client object connected to a BertServer Create a BertClient that connects to a BertServer. Note, server must be ready at the moment you are calling this function. If you are not sure whether the server is ready, then please set `check_version=False` and `check_length=False` :type max_concurrency: int :param max_concurrency: the maximum number of concurrent connections allowed """ try: from transformer_serving.client import BertClient except ImportError: raise ImportError( 'BertClient module is not available, it is required for serving HTTP requests.' 'Please use "pip install -U serving-client" to install it.' 'If you do not want to use it as an HTTP server, ' 'then remove "-http_port" from the command line.') self.available_bc = [ BertClient(**kwargs) for _ in range(max_concurrency) ] self.max_concurrency = max_concurrency
import numpy as np from transformer_serving.client import BertClient from termcolor import colored prefix_q = '##### **Q:** ' topk = 5 with open('README.md') as fp: questions = [ v.replace(prefix_q, '').strip() for v in fp if v.strip() and v.startswith(prefix_q) ] print('%d questions loaded, avg. len of %d' % (len(questions), np.mean([len(d.split()) for d in questions]))) with BertClient(port=4000, port_out=4001) as bc: doc_vecs = bc.encode(questions) while True: query = input(colored('your question: ', 'green')) query_vec = bc.encode([query])[0] # compute normalized dot product as score score = np.sum(query_vec * doc_vecs, axis=1) / np.linalg.norm(doc_vecs, axis=1) topk_idx = np.argsort(score)[::-1][:topk] print('top %d questions similar to "%s"' % (topk, colored(query, 'green'))) for idx in topk_idx: print('> %s\t%s' % (colored('%.1f' % score[idx], 'cyan'), colored(questions[idx], 'yellow')))
# NOTE: First install bert-as-service via # $ # $ pip install serving-server # $ pip install serving-client # $ # using BertClient in sync way import sys import time from transformer_serving.client import BertClient if __name__ == '__main__': port = 6006 port_out = 6007 bc = BertClient(port=port, port_out=port_out, show_server_config=True, timeout=-1) # encode a list of strings # with open('README.md') as fp: # data = [v for v in fp if v.strip()][:512] # num_tokens = sum(len([vv for vv in v.split() if vv.strip()]) for v in data) # show_tokens = len(sys.argv) > 3 and bool(sys.argv[3]) data = ['aaaaaaaaa'] output = bc.encode(data) # warm-up GPU print(output)
# $ # read and write TFRecord import os import GPUtil import tensorflow as tf from transformer_serving.client import BertClient os.environ['CUDA_VISIBLE_DEVICES'] = str(GPUtil.getFirstAvailable()[0]) tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) with open('README.md') as fp: data = [v for v in fp if v.strip()] bc = BertClient() list_vec = bc.encode(data) list_label = [0 for _ in data] # a dummy list of all-zero labels # write tfrecords with tf.python_io.TFRecordWriter('tmp.tfrecord') as writer: def create_float_feature(values): return tf.train.Feature(float_list=tf.train.FloatList(value=values)) def create_int_feature(values): return tf.train.Feature(int64_list=tf.train.Int64List( value=list(values))) for (vec, label) in zip(list_vec, list_label):
# NOTE: First install bert-as-service via # $ # $ pip install serving-server # $ pip install serving-client # $ # using BertClient in sync way import sys import time from transformer_serving.client import BertClient if __name__ == '__main__': bc = BertClient(port=int(sys.argv[1]), port_out=int(sys.argv[2]), show_server_config=True) # encode a list of strings with open('README.md') as fp: data = [v for v in fp if v.strip()][:512] num_tokens = sum( len([vv for vv in v.split() if vv.strip()]) for v in data) show_tokens = len(sys.argv) > 3 and bool(sys.argv[3]) bc.encode(data) # warm-up GPU for j in range(10): tmp = data * (2**j) c_num_tokens = num_tokens * (2**j) start_t = time.time() bc.encode(tmp, show_tokens=show_tokens) time_t = time.time() - start_t
import sys from transformer_serving.client import BertClient def send_without_block(bc, data, repeat=10): # encoding without blocking: print('sending all data without blocking...') for _ in range(repeat): bc.encode(data, blocking=False) print('all sent!') if __name__ == '__main__': bc = BertClient(port=int(sys.argv[1]), port_out=int(sys.argv[2])) num_repeat = 20 with open('README.md') as fp: data = [v for v in fp if v.strip()] send_without_block(bc, data, num_repeat) num_expect_vecs = len(data) * num_repeat # then fetch all print('now waiting until all results are available...') vecs = bc.fetch_all(concat=True) print('received %s, expected: %d' % (vecs.shape, num_expect_vecs)) # now send it again