def test_preprocess_example(): class Processor: def process_input(self, source, **kwargs): return source.split(), None, None example = serving.preprocess_example(Processor(), 1, {"text": "a b c"}) assert example.index == 1 assert example.source_tokens == [["a", "b", "c"]] assert example.metadata == [None] assert example.mode == "default" class Processor: def process_input(self, source, target=None, **kwargs): return source.split(), target.split(), len(source) raw_example = {"text": "a b c", "target_prefix": "d e", "mode": "alternatives"} example = serving.preprocess_example(Processor(), 2, raw_example) assert example.index == 2 assert example.source_tokens == [["a", "b", "c"]] assert example.target_tokens == [["d", "e"]] assert example.metadata == [5] assert example.mode == "alternatives" class Processor: def process_input(self, source, **kwargs): source = source.split() return [source[:2], source[2:]], None, [1, 2] example = serving.preprocess_example(Processor(), 3, {"text": "a b c d"}) assert example.index == 3 assert example.source_tokens == [["a", "b"], ["c", "d"]] assert example.metadata == [1, 2] assert example.mode == "default"
def test_preprocess_example(): func = lambda *args: (args[0].split(), None) example = serving.preprocess_example(func, 1, {"text": "a b c"}) assert example.index == 1 assert example.source_tokens == [["a", "b", "c"]] assert example.metadata == [None] assert example.mode == "default" func = lambda *args: (args[0].split(), args[1].split(), len(args[0])) raw_example = { "text": "a b c", "target_prefix": "d e", "mode": "alternatives" } example = serving.preprocess_example(func, 2, raw_example) assert example.index == 2 assert example.source_tokens == [["a", "b", "c"]] assert example.target_tokens == [["d", "e"]] assert example.metadata == [5] assert example.mode == "alternatives" func = lambda *args: ([args[0].split()[:2], args[0].split()[2:]], None, [1, 2]) example = serving.preprocess_example(func, 3, {"text": "a b c d"}) assert example.index == 3 assert example.source_tokens == [["a", "b"], ["c", "d"]] assert example.metadata == [1, 2] assert example.mode == "default"
def test_preprocess_example_with_v2_options(): config = { "source": "en", "target": "fr", "preprocess": [ { "op": "_add_marker", "name": "politeness-op", "default_value": "neutral", }, { "op": "_add_emotion", "name": "emotion-op", "default_mood": "neutral", }, ], "inference_options": { "json_schema": { "type": "object", "properties": { "politeness": { "type": "string", "default": "neutral", "enum": ["formal", "informal", "neutral"], } }, }, "options": [ { "option_path": "politeness", "config_path": [ "preprocess/politeness-op/value", "preprocess/emotion-op/mood", ], }, ], }, } class Processor: def process_input( self, source, target=None, config=None, options=None, **kwargs ): assert config is None assert options == { "politeness-op": {"value": "informal"}, "emotion-op": {"mood": "informal"}, } return source.split(), None, None example = {"text": "a b c d", "options": {"politeness": "informal"}} serving.preprocess_example(Processor(), 0, example, config=config)
def test_preprocess_example_with_fuzzy(): class Processor: def __init__(self, support_fuzzy): self._support_fuzzy = support_fuzzy def process_input(self, source, target=None, target_name=None, **kwargs): if self._support_fuzzy: assert target is not None assert target_name == "fuzzy" return source.split(), target.split(), None else: assert target is None assert target_name is None return source.split(), None, None example = {"text": "Hello world", "fuzzy": "Bonjour monde"} config = {"supported_features": {"NFA": True}} serving.preprocess_example(Processor(False), 0, example) serving.preprocess_example(Processor(True), 0, example, config=config)
def test_preprocess_example_with_v1_options(): config = { "source": "en", "target": "fr", "preprocess": { "politeness": { "default_value": "neutral", } }, "inference_options": { "json_schema": { "type": "object", "properties": { "politeness": { "type": "string", "default": "neutral", "enum": ["formal", "informal", "neutral"] } } }, "options": [ { "option_path": "politeness", "config_path": "preprocess/politeness/value" }, ] } } class Processor: def process_input(self, source, target=None, config=None, options=None, **kwargs): assert config["preprocess"]["politeness"]["value"] == "informal" assert options is None return source.split(), None, None example = {"text": "a b c d", "options": {"politeness": "informal"}} serving.preprocess_example(Processor(), 0, example, config=config)