Ejemplo n.º 1
0
def test_concatenations():
    p = Pipeline("tests/data/pipeline", "pipeline-one")
    concat = p.concatenations()
    assert concat == {
        "combined-field": {
            "fields": ["field-one", "field-two"],
            "separator": ". "
        }
    }
Ejemplo n.º 2
0
def test_resource_specific_concatenations():
    p = Pipeline("tests/data/pipeline", "pipeline-one")
    concat = p.concatenations("some-resource")
    assert concat == {
        "other-combined-field": {
            "fields": ["field-one", "field-three"],
            "separator": ". ",
        },
        "combined-field": {
            "fields": ["field-one", "field-two"],
            "separator": ". "
        },
    }
Ejemplo n.º 3
0
def test_columns():
    p = Pipeline("tests/data/pipeline/", "pipeline-one")
    column = p.columns()

    assert column == {
        "dos": "two",
        "due": "one",
        "thirdcolumn": "three",
        "um": "one",
        "un": "one",
        "una": "one",
        "uno": "one",
    }
Ejemplo n.º 4
0
def test_harmonise_field():
    specification = Specification("tests/data/specification")
    pipeline = Pipeline("tests/data/pipeline", "pipeline-one")
    h = Harmoniser(specification, pipeline)

    assert h.harmonise_field("field-string", None) == ""
    assert h.harmonise_field("field-string", "value") == "value"
Ejemplo n.º 5
0
def test_harmonise_passes_resource():
    specification = Specification("tests/data/specification")
    pipeline = Pipeline("tests/data/pipeline", "pipeline-one")
    h = Harmoniser(specification, pipeline)
    reader = FakeDictReader([{"field-integer": "123"}], "some-resource")
    output = h.harmonise(reader)
    assert next(output)["resource"] == "some-resource"
Ejemplo n.º 6
0
def test_resource_specific_columns():
    p = Pipeline("tests/data/pipeline/", "pipeline-one")
    column = p.columns("some-resource")

    assert (
        list(column)[0] == "quatro"
    ), "resource specific column 'quatro' should appear first in the returned dict"

    assert column == {
        "dos": "two",
        "due": "one",
        "thirdcolumn": "three",
        "um": "one",
        "un": "one",
        "una": "one",
        "uno": "one",
        "quatro": "four",
    }
Ejemplo n.º 7
0
def test_default():
    specification = Specification("tests/data/specification")
    pipeline = Pipeline("tests/data/pipeline", "pipeline-one")
    h = Harmoniser(specification, pipeline)
    reader = FakeDictReader(
        [
            {"field-integer": "", "field-other-integer": "123"},
            {"field-integer": "321", "field-other-integer": "123"},
        ],
        "resource-one",
    )
    output = list(h.harmonise(reader))
    assert output[0]["row"]["field-integer"] == "123", "value is taken from default"
    assert output[1]["row"]["field-integer"] == "321", "value is not overridden"
Ejemplo n.º 8
0
def test_harmonise():
    specification = Specification("tests/data/specification")
    pipeline = Pipeline("tests/data/pipeline", "pipeline-one")
    h = Harmoniser(specification, pipeline)
    reader = FakeDictReader(
        [
            {"field-integer": "123"},
            {"field-integer": "  321   "},
            {"field-integer": "hello"},
        ]
    )
    output = list(h.harmonise(reader))
    assert len(output) == 3
    assert output[0]["row"] == {"field-integer": "123"}, "pass through valid data"
    assert output[1]["row"] == {"field-integer": "321"}, "whitespace trimmed"
    assert output[2]["row"] == {"field-integer": ""}, "remove bad data"
Ejemplo n.º 9
0
def test_resource_specific_default_fieldnames():
    p = Pipeline("tests/data/pipeline", "pipeline-one")
    assert p.default_fieldnames("resource-one") == {
        "field-integer": ["field-other-integer", "field-two"]
    }
Ejemplo n.º 10
0
def test_default_fieldnames():
    p = Pipeline("tests/data/pipeline", "pipeline-one")
    assert p.default_fieldnames() == {"field-integer": ["field-two"]}
Ejemplo n.º 11
0
def test_resource_specific_patches():
    p = Pipeline("tests/data/pipeline/", "pipeline-one")
    patches = p.patches("resource-one")
    assert patches == {"field-one": {"something": "else", "pat": "val"}}
Ejemplo n.º 12
0
def test_patches():
    p = Pipeline("tests/data/pipeline/", "pipeline-one")
    patches = p.patches()
    assert patches == {"field-one": {"pat": "val"}}
Ejemplo n.º 13
0
def test_skip_patterns():
    p = Pipeline("tests/data/pipeline/", "pipeline-one")
    pattern = p.skip_patterns()
    assert isinstance(pattern, list)
    assert "^Unnamed: 0," in pattern
Ejemplo n.º 14
0
def test_pipeline():
    p = Pipeline("tests/data/pipeline/", "pipeline-one")
    assert p.schema == "schema-one"
Ejemplo n.º 15
0
def test_transform():
    p = Pipeline("tests/data/pipeline", "pipeline-one")
    transform = p.transformations()
    assert transform == {"field-one": "FieldOne"}