def _test(mod, doc, encoding):
    """Perform encoding specific serialization tests."""
    # Encode/decode and re-assert document.
    as_repr = tu.encode(doc, encoding)
    as_doc_1 = tu.decode(as_repr, encoding)
    tu.assert_doc(mod, as_doc_1)

    # Re-encode.
    as_repr_1 = tu.encode(as_doc_1, encoding)

    # TODO - explore why sometimes XML encoding is problematic
    # although all good when reencoding to json.
    if len(as_repr) != len(as_repr_1) and encoding == 'xml':
        as_repr = tu.encode(tu.decode(as_repr, encoding), 'json')
        as_repr_1 = tu.encode(tu.decode(as_repr_1, encoding), 'json')

    # Verify encoding equivalence.
    tu.assert_int(len(as_repr), len(as_repr_1), msg=str(type(doc)))
    tu.assert_str(as_repr, as_repr_1)
Example #2
0
def _test(mod, doc, encoding):
    """Perform encoding specific serialization tests."""
    # Encode/decode and re-assert document.
    as_repr = tu.encode(doc, encoding)
    as_doc_1 = tu.decode(as_repr, encoding)
    tu.assert_doc(mod, as_doc_1)

    # Re-encode and assert encodings.
    as_repr_1 = tu.encode(as_doc_1, encoding)
    tu.assert_int(len(as_repr), len(as_repr_1))
    tu.assert_str(as_repr, as_repr_1)
def _test(mod, doc, encoding):
    """Perform encoding specific serialization tests."""
    # Encode/decode and re-assert document.
    as_repr = tu.encode(doc, encoding)
    as_doc_1 = tu.decode(as_repr, encoding)
    tu.assert_doc(mod, as_doc_1)

    # Re-encode and assert encodings.
    as_repr_1 = tu.encode(as_doc_1, encoding)
    tu.assert_int(len(as_repr), len(as_repr_1))
    tu.assert_str(as_repr, as_repr_1)
def predict(inputs_list, problem, predict_fn):
    """Encodes inputs, makes request to deployed TF model, and decodes outputs."""
    assert isinstance(inputs_list, list)
    fname = "inputs" if problem.has_inputs else "targets"
    input_encoder = problem.feature_info[fname].encoder
    encode_start = time.time()
    input_ids_list = [
        encode(inputs, input_encoder, add_eos=problem.has_inputs)
        for inputs in inputs_list
    ]
    encode_end = time.time()
    examples = [
        make_example(input_ids, problem, fname).SerializeToString()
        for input_ids in input_ids_list
    ]
    examples = {'input': examples}
    predict_start = time.time()
    predictions = predict_fn(examples)
    predict_end = time.time()
    output_decoder = problem.feature_info["targets"].encoder
    decode_start = time.time()
    outputs = [
        (decode(output, output_decoder), score)
        for output, score in zip(predictions["outputs"], predictions["scores"])
    ]
    decode_end = time.time()
    encode_time = (encode_end - encode_start) * 1000
    predict_time = (predict_end - predict_start) * 1000
    decode_time = (decode_end - decode_start) * 1000
    total_time = (decode_end - encode_start) * 1000
    print_str = """
  Batch:{batch:d} \t
  Encode:{encode:.3f} \t 
  Prediction:{predict:.3f} \t 
  Decode:{decode:.3f} \t
  Total:{total:.3f}
  """
    print(
        print_str.format(batch=len(outputs),
                         encode=encode_time,
                         predict=predict_time,
                         decode=decode_time,
                         total=total_time))

    return outputs