Beispiel #1
0
def test_multiple_output_for_input_dict(expected_values):
    lower_case_name_field = Field("Lowercase_name", keep_raw=True)
    lower_case_name_field.add_pretokenize_hook(str.lower)

    upper_case_name_field = Field("Uppercase_name", keep_raw=True)
    upper_case_name_field.add_pretokenize_hook(str.upper)

    test_field_dict = field_dict.copy()
    test_field_dict["Name"] = (
        field_dict["Name"],
        lower_case_name_field,
        upper_case_name_field,
    )

    example_factory = ExampleFactory(test_field_dict)
    example = example_factory.from_dict(expected_values)

    raw, tokenized = example["Name"]
    assert raw == expected_values["Name"]
    assert tokenized == expected_values["Name"].split()

    raw, tokenized = example["Lowercase_name"]
    assert raw == expected_values["Name"].lower()
    assert tokenized == expected_values["Name"].lower().split()

    raw, tokenized = example["Uppercase_name"]
    assert raw == expected_values["Name"].upper()
    assert tokenized == expected_values["Name"].upper().split()

    raw, tokenized = example["Score"]
    assert raw == expected_values["Score"]

    raw, tokenized = example["Favorite_food"]
    assert raw == expected_values["Favorite_food"]
Beispiel #2
0
def get_dataset():
    data = [
        {
            "Name": "Mark Dark",
            "Score": 5
        },
        {
            "Name": "Stephen Smith",
            "Score": 10
        },
        {
            "Name": "Ann Mann",
            "Score": 15
        },
    ]

    name_field = Field("Name",
                       numericalizer=Vocab(),
                       keep_raw=True,
                       tokenizer="split")

    score_field = Field("Score",
                        numericalizer=int,
                        keep_raw=True,
                        tokenizer=None,
                        is_target=True)

    fields = {"Name": name_field, "Score": score_field}

    example_factory = ExampleFactory(fields)
    examples = [example_factory.from_dict(data_) for data_ in data]

    ds = Dataset(examples, fields)
    ds.finalize_fields()
    return ds
Beispiel #3
0
def test_cache_data_field_from_dict(expected_values):
    example_factory = ExampleFactory(field_dict)
    example = example_factory.from_dict(expected_values)

    for field in field_dict.values():
        field_name = field.name

        assert field_name in example
        assert hasattr(example, field_name)
Beispiel #4
0
def test_ignore_values_dict(expected_values):
    fields = {"Name": name_field}
    example_factory = ExampleFactory(fields)
    example = example_factory.from_dict(expected_values)

    assert "Name" in example
    assert hasattr(example, "Name")

    raw, _ = example["Name"]
    assert raw == expected_values["Name"]
Beispiel #5
0
def test_create_from_dict(expected_values):
    example_factory = ExampleFactory(field_dict)
    example = example_factory.from_dict(expected_values)

    raw, tokenized = example["Name"]
    assert raw == expected_values["Name"]
    assert tokenized == expected_values["Name"].split()

    raw, tokenized = example["Score"]
    assert raw == expected_values["Score"]

    raw, tokenized = example["Favorite_food"]
    assert raw == expected_values["Favorite_food"]