def forward_request(self, model_info, inputs, outputs=None, options=None):
        translate_fn = model_info

        tokens, lengths = utils.pad_lists(inputs, padding_value='')
        outputs = translate_fn(tokens=tf.constant(tokens, dtype=tf.string),
                               length=tf.constant(lengths, dtype=tf.int32))

        batch_predictions = outputs['tokens'].numpy()
        batch_lengths = outputs['length'].numpy()
        batch_log_probs = outputs['log_probs'].numpy()

        batch_outputs = []
        for predictions, lengths, log_probs in zip(batch_predictions,
                                                   batch_lengths,
                                                   batch_log_probs):
            outputs = []
            for prediction, length, log_prob in zip(predictions, lengths,
                                                    log_probs):
                prediction = prediction[:length].tolist()
                prediction = [token.decode('utf-8') for token in prediction]
                score = float(log_prob)
                outputs.append(
                    serving.TranslationOutput(prediction, score=score))
            batch_outputs.append(outputs)
        return batch_outputs
Ejemplo n.º 2
0
    def forward_request(self, batch_inputs, info, timeout=None):
        channel = grpc.insecure_channel("localhost:%s" % info['port'])
        stub = prediction_service_pb2_grpc.PredictionServiceStub(channel)

        max_length = max(len(src) for src in batch_inputs)
        tokens, lengths = utils.pad_lists(batch_inputs,
                                          padding_value='',
                                          max_length=max_length)
        batch_size = len(lengths)

        predict_request = predict_pb2.PredictRequest()
        predict_request.model_spec.name = info['model_name']
        predict_request.inputs['tokens'].CopyFrom(
            tf.make_tensor_proto(tokens,
                                 dtype=tf.string,
                                 shape=(batch_size, max_length)))
        predict_request.inputs['length'].CopyFrom(
            tf.make_tensor_proto(lengths, dtype=tf.int32,
                                 shape=(batch_size, )))

        try:
            future = stub.Predict.future(predict_request, timeout)
            result = future.result()
        except ExpirationError as e:
            logger.error('%s', e)
            return None

        lengths = tf.make_ndarray(result.outputs['length'])
        predictions = tf.make_ndarray(result.outputs['tokens'])
        log_probs = tf.make_ndarray(result.outputs['log_probs'])

        batch_outputs = []
        for hypotheses, length, log_prob in zip(predictions, lengths,
                                                log_probs):
            outputs = []
            for i, prediction in enumerate(hypotheses):
                prediction_length = length[i] - 1  # Ignore </s>.
                prediction = prediction[0:prediction_length].tolist()
                prediction = [tf.compat.as_text(pred) for pred in prediction]
                score = float(log_prob[i])
                outputs.append(
                    serving.TranslationOutput(prediction, score=score))
            batch_outputs.append(outputs)
        return batch_outputs