def forward_request(self, model_info, inputs, outputs=None, options=None): translate_fn = model_info tokens, lengths = utils.pad_lists(inputs, padding_value='') outputs = translate_fn(tokens=tf.constant(tokens, dtype=tf.string), length=tf.constant(lengths, dtype=tf.int32)) batch_predictions = outputs['tokens'].numpy() batch_lengths = outputs['length'].numpy() batch_log_probs = outputs['log_probs'].numpy() batch_outputs = [] for predictions, lengths, log_probs in zip(batch_predictions, batch_lengths, batch_log_probs): outputs = [] for prediction, length, log_prob in zip(predictions, lengths, log_probs): prediction = prediction[:length].tolist() prediction = [token.decode('utf-8') for token in prediction] score = float(log_prob) outputs.append( serving.TranslationOutput(prediction, score=score)) batch_outputs.append(outputs) return batch_outputs
def forward_request(self, batch_inputs, info, timeout=None): channel = grpc.insecure_channel("localhost:%s" % info['port']) stub = prediction_service_pb2_grpc.PredictionServiceStub(channel) max_length = max(len(src) for src in batch_inputs) tokens, lengths = utils.pad_lists(batch_inputs, padding_value='', max_length=max_length) batch_size = len(lengths) predict_request = predict_pb2.PredictRequest() predict_request.model_spec.name = info['model_name'] predict_request.inputs['tokens'].CopyFrom( tf.make_tensor_proto(tokens, dtype=tf.string, shape=(batch_size, max_length))) predict_request.inputs['length'].CopyFrom( tf.make_tensor_proto(lengths, dtype=tf.int32, shape=(batch_size, ))) try: future = stub.Predict.future(predict_request, timeout) result = future.result() except ExpirationError as e: logger.error('%s', e) return None lengths = tf.make_ndarray(result.outputs['length']) predictions = tf.make_ndarray(result.outputs['tokens']) log_probs = tf.make_ndarray(result.outputs['log_probs']) batch_outputs = [] for hypotheses, length, log_prob in zip(predictions, lengths, log_probs): outputs = [] for i, prediction in enumerate(hypotheses): prediction_length = length[i] - 1 # Ignore </s>. prediction = prediction[0:prediction_length].tolist() prediction = [tf.compat.as_text(pred) for pred in prediction] score = float(log_prob[i]) outputs.append( serving.TranslationOutput(prediction, score=score)) batch_outputs.append(outputs) return batch_outputs