def test_multiple_output_for_input_dict(expected_values): lower_case_name_field = Field("Lowercase_name", keep_raw=True) lower_case_name_field.add_pretokenize_hook(str.lower) upper_case_name_field = Field("Uppercase_name", keep_raw=True) upper_case_name_field.add_pretokenize_hook(str.upper) test_field_dict = field_dict.copy() test_field_dict["Name"] = ( field_dict["Name"], lower_case_name_field, upper_case_name_field, ) example_factory = ExampleFactory(test_field_dict) example = example_factory.from_dict(expected_values) raw, tokenized = example["Name"] assert raw == expected_values["Name"] assert tokenized == expected_values["Name"].split() raw, tokenized = example["Lowercase_name"] assert raw == expected_values["Name"].lower() assert tokenized == expected_values["Name"].lower().split() raw, tokenized = example["Uppercase_name"] assert raw == expected_values["Name"].upper() assert tokenized == expected_values["Name"].upper().split() raw, tokenized = example["Score"] assert raw == expected_values["Score"] raw, tokenized = example["Favorite_food"] assert raw == expected_values["Favorite_food"]
def get_dataset(): data = [ { "Name": "Mark Dark", "Score": 5 }, { "Name": "Stephen Smith", "Score": 10 }, { "Name": "Ann Mann", "Score": 15 }, ] name_field = Field("Name", numericalizer=Vocab(), keep_raw=True, tokenizer="split") score_field = Field("Score", numericalizer=int, keep_raw=True, tokenizer=None, is_target=True) fields = {"Name": name_field, "Score": score_field} example_factory = ExampleFactory(fields) examples = [example_factory.from_dict(data_) for data_ in data] ds = Dataset(examples, fields) ds.finalize_fields() return ds
def test_cache_data_field_from_dict(expected_values): example_factory = ExampleFactory(field_dict) example = example_factory.from_dict(expected_values) for field in field_dict.values(): field_name = field.name assert field_name in example assert hasattr(example, field_name)
def test_ignore_values_dict(expected_values): fields = {"Name": name_field} example_factory = ExampleFactory(fields) example = example_factory.from_dict(expected_values) assert "Name" in example assert hasattr(example, "Name") raw, _ = example["Name"] assert raw == expected_values["Name"]
def test_create_from_dict(expected_values): example_factory = ExampleFactory(field_dict) example = example_factory.from_dict(expected_values) raw, tokenized = example["Name"] assert raw == expected_values["Name"] assert tokenized == expected_values["Name"].split() raw, tokenized = example["Score"] assert raw == expected_values["Score"] raw, tokenized = example["Favorite_food"] assert raw == expected_values["Favorite_food"]