コード例 #1
0
def test_xlsx_parser_merged_cells_boolean():
    source = "data/merged-cells-boolean.xls"
    layout = Layout(header=False)
    with Resource(source, layout=layout) as resource:
        assert resource.read_rows() == [
            {
                "field1": True,
                "field2": None
            },
            {
                "field1": None,
                "field2": None
            },
            {
                "field1": None,
                "field2": None
            },
        ]
コード例 #2
0
def test_csv_parser_escaping():
    dialect = CsvDialect(escape_char="\\")
    with Resource("data/escaping.csv", dialect=dialect) as resource:
        assert resource.header == ["ID", "Test"]
        assert resource.read_rows() == [
            {
                "ID": 1,
                "Test": "Test line 1"
            },
            {
                "ID": 2,
                "Test": 'Test " line 2'
            },
            {
                "ID": 3,
                "Test": 'Test " line 3'
            },
        ]
コード例 #3
0
def test_inline_parser_from_generator_not_callable():
    def generator():
        yield ["id", "name"]
        yield ["1", "english"]
        yield ["2", "中国人"]

    with Resource(generator()) as resource:
        assert resource.header == ["id", "name"]
        assert resource.read_rows() == [
            {
                "id": 1,
                "name": "english"
            },
            {
                "id": 2,
                "name": "中国人"
            },
        ]
コード例 #4
0
def test_xlsx_parser_preserve_formatting_percentage():
    source = "data/preserve-formatting-percentage.xlsx"
    dialect = ExcelDialect(preserve_formatting=True)
    with Resource(source, dialect=dialect) as resource:
        assert resource.read_rows() == [
            {
                "col1": 123,
                "col2": "52.00%"
            },
            {
                "col1": 456,
                "col2": "30.00%"
            },
            {
                "col1": 789,
                "col2": "6.00%"
            },
        ]
コード例 #5
0
ファイル: test_row.py プロジェクト: roll/frictionless-py
def test_step_row_sort_with_reverse_in_desriptor_issue_996():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.row_sort({
                "fieldNames": ["id"],
                "reverse": True
            }),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
    ]
コード例 #6
0
def test_step_table_recast():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.table_melt(field_name="id"),
            steps.table_recast(field_name="id"),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }

    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
    ]
コード例 #7
0
def test_step_cell_fill_direction_left():
    source = Resource(path="data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.field_update(name="id", type="string"),
            steps.field_update(name="population", type="string"),
            steps.cell_replace(pattern="france", replace=None),
            steps.cell_fill(direction="left"),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "string"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "string"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": "1",
            "name": "germany",
            "population": "83"
        },
        {
            "id": "2",
            "name": "66",
            "population": "66"
        },
        {
            "id": "3",
            "name": "spain",
            "population": "47"
        },
    ]
コード例 #8
0
def test_step_cell_replace_using_regex():
    source = Resource(path="data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.cell_replace(pattern="<regex>.*r.*",
                               replace="center",
                               field_name="name"),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "center",
            "population": 83
        },
        {
            "id": 2,
            "name": "center",
            "population": 66
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
    ]
コード例 #9
0
def test_step_cell_replace_with_field_name():
    source = Resource(path="data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.cell_replace(pattern="france",
                               replace="FRANCE",
                               field_name="id"),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
    ]
コード例 #10
0
def test_validate_custom_check():

    # Create check
    class custom(Check):
        def validate_row(self, row):
            yield errors.BlankRowError(
                note="",
                cells=list(map(str, row.values())),
                row_number=row.row_number,
                row_position=row.row_position,
            )

    # Validate resource
    resource = Resource("data/table.csv")
    report = resource.validate(checks=[custom()])
    assert report.flatten(["rowPosition", "fieldPosition", "code"]) == [
        [2, None, "blank-row"],
        [3, None, "blank-row"],
    ]
コード例 #11
0
def test_resource_infer_source_non_tabular():
    resource = Resource(path="data/text.txt")
    resource.infer()
    print(resource.metadata_errors)
    assert resource.metadata_valid
    assert resource == {
        "name": "text",
        "path": "data/text.txt",
        "hash": "e1cbb0c3879af8347246f12c559a86b5",
        "bytes": 5,
        "rows": 0,
        "profile": "data-resource",
        "scheme": "file",
        "format": "txt",
        "hashing": "md5",
        "encoding": "utf-8",
        "compression": "no",
        "compressionPath": "",
    }
コード例 #12
0
def test_step_cell_convert():
    source = Resource(path="data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.field_update(name="id", type="string"),
            steps.field_update(name="population", type="string"),
            steps.cell_convert(value="n/a"),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "string"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "string"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": "n/a",
            "name": "n/a",
            "population": "n/a"
        },
        {
            "id": "n/a",
            "name": "n/a",
            "population": "n/a"
        },
        {
            "id": "n/a",
            "name": "n/a",
            "population": "n/a"
        },
    ]
コード例 #13
0
def test_step_field_remove():
    source = Resource(path="data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.field_remove(names=["id"]),
        ],
    )
    assert target.schema == {
        "fields": [
            {"name": "name", "type": "string"},
            {"name": "population", "type": "integer"},
        ]
    }
    assert target.read_rows() == [
        {"name": "germany", "population": 83},
        {"name": "france", "population": 66},
        {"name": "spain", "population": 47},
    ]
コード例 #14
0
def test_step_field_filter():
    source = Resource(path="data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.field_filter(names=["id", "name"]),
        ],
    )
    assert target.schema == {
        "fields": [
            {"name": "id", "type": "integer"},
            {"name": "name", "type": "string"},
        ]
    }
    assert target.read_rows() == [
        {"id": 1, "name": "germany"},
        {"id": 2, "name": "france"},
        {"id": 3, "name": "spain"},
    ]
コード例 #15
0
def test_table_format_tsv():
    detector = Detector(schema_patch={"missingValues": ["\\N"]})
    with Resource("data/table.tsv", detector=detector) as resource:
        assert resource.dialect == {"delimiter": "\t"}
        assert resource.header == ["id", "name"]
        assert resource.read_rows() == [
            {
                "id": 1,
                "name": "english"
            },
            {
                "id": 2,
                "name": "中国人"
            },
            {
                "id": 3,
                "name": None
            },
        ]
コード例 #16
0
def test_xls_parser_merged_cells_fill():
    source = "data/merged-cells.xls"
    dialect = ExcelDialect(fill_merged_cells=True)
    layout = Layout(header=False)
    with Resource(source, dialect=dialect, layout=layout) as resource:
        assert resource.read_rows() == [
            {
                "field1": "data",
                "field2": "data"
            },
            {
                "field1": "data",
                "field2": "data"
            },
            {
                "field1": "data",
                "field2": "data"
            },
        ]
コード例 #17
0
def test_analyze_resource_detailed_numeric_descriptive_statistics_with_missingValues(
):
    resource = Resource({"path": "data/analysis-data.csv"})
    analysis = resource.analyze(detailed=True)
    assert analysis["fieldStats"]["average_grades"]["bounds"] == [81, 96]
    assert analysis["fieldStats"]["average_grades"]["max"] == 10000.0
    assert analysis["fieldStats"]["average_grades"]["mean"] == 1503.28
    assert analysis["fieldStats"]["average_grades"]["median"] == 86.91
    assert analysis["fieldStats"]["average_grades"]["min"] == 84.65
    assert analysis["fieldStats"]["average_grades"]["missingValues"] == 2
    assert analysis["fieldStats"]["average_grades"]["mode"] == 86.79
    assert analysis["fieldStats"]["average_grades"]["quantiles"] == [
        86.79, 86.91, 90.39
    ]
    assert round(analysis["fieldStats"]["average_grades"]["stdev"]) == 3747
    assert analysis["fieldStats"]["average_grades"]["uniqueValues"] == 6
    assert round(
        analysis["fieldStats"]["average_grades"]["variance"]) == 14037774
    assert analysis["fieldStats"]["average_grades"]["outliers"] == [10000.0]
コード例 #18
0
ファイル: test_resource.py プロジェクト: kant/frictionless-py
def test_resource_source_multipart():
    descriptor = {
        "path": ["chunk1.csv", "chunk2.csv"],
        "schema": "resource-schema.json",
    }
    resource = Resource(descriptor, basepath="data")
    assert resource.inline is False
    assert resource.multipart is True
    assert resource.tabular is True
    assert resource.read_rows() == [
        {
            "id": 1,
            "name": "english"
        },
        {
            "id": 2,
            "name": "中国人"
        },
    ]
コード例 #19
0
def test_sql_parser_headers_false(database_url):
    dialect = SqlDialect(table="table")
    layout = Layout(header=False)
    with Resource(database_url, dialect=dialect, layout=layout) as resource:
        assert resource.header == ["id", "name"]
        assert resource.read_rows() == [
            {
                "id": None,
                "name": "name"
            },
            {
                "id": 1,
                "name": "english"
            },
            {
                "id": 2,
                "name": "中国人"
            },
        ]
コード例 #20
0
def test_step_cell_interpolate():
    source = Resource(path="data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.field_update(name="id", type="string"),
            steps.field_update(name="population", type="string"),
            steps.cell_interpolate(template="Prefix: %s"),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "string"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "string"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": "Prefix: 1",
            "name": "Prefix: germany",
            "population": "Prefix: 83"
        },
        {
            "id": "Prefix: 2",
            "name": "Prefix: france",
            "population": "Prefix: 66"
        },
        {
            "id": "Prefix: 3",
            "name": "Prefix: spain",
            "population": "Prefix: 47"
        },
    ]
コード例 #21
0
def test_step_row_subset_distinct_with_duplicates():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.field_update(name="id", value=1),
            steps.row_subset(subset="distinct", field_name="id"),
        ],
    )
    assert target.schema == {
        "fields": [
            {"name": "id", "type": "integer"},
            {"name": "name", "type": "string"},
            {"name": "population", "type": "integer"},
        ]
    }
    assert target.read_rows() == [
        {"id": 1, "name": "germany", "population": 83},
    ]
コード例 #22
0
def test_step_table_melt_with_to_field_names():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.table_melt(field_name="name",
                             variables=["population"],
                             to_field_names=["key", "val"]),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "key"
            },
            {
                "name": "val"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "name": "germany",
            "key": "population",
            "val": 83
        },
        {
            "name": "france",
            "key": "population",
            "val": 66
        },
        {
            "name": "spain",
            "key": "population",
            "val": 47
        },
    ]
コード例 #23
0
def test_step_field_pack_object_907():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.field_pack(
                name="details",
                from_names=["name", "population"],
                field_type="object",
                preserve=True,
            )
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
            {
                "name": "details",
                "type": "object"
            },
        ]
    }
    assert target.read_rows()[0] == {
        "id": 1,
        "name": "germany",
        "population": 83,
        "details": {
            "name": "germany",
            "population": "83"
        },
    }
コード例 #24
0
ファイル: test_row.py プロジェクト: yyht/frictionless-py
def test_step_row_filter_petl_selectisinstance():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.row_filter(function=lambda row: isinstance(row["id"], int)),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
        {
            "id": 2,
            "name": "france",
            "population": 66
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
    ]
コード例 #25
0
def test_step_row_split():
    source = Resource(path="data/transform.csv")
    source.infer()
    target = transform(
        source,
        steps=[
            steps.row_split(field_name="name", pattern="a"),
        ],
    )
    assert target.schema == source.schema
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germ",
            "population": 83
        },
        {
            "id": 1,
            "name": "ny",
            "population": 83
        },
        {
            "id": 2,
            "name": "fr",
            "population": 66
        },
        {
            "id": 2,
            "name": "nce",
            "population": 66
        },
        {
            "id": 3,
            "name": "sp",
            "population": 47
        },
        {
            "id": 3,
            "name": "in",
            "population": 47
        },
    ]
コード例 #26
0
def test_step_table_intersect_from_dict():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.table_normalize(),
            steps.table_intersect(resource=dict(data=[
                ["id", "name", "population"],
                [1, "germany", 83],
                [2, "france", 50],
                [3, "spain", 47],
            ])),
        ],
    )
    assert target.schema == {
        "fields": [
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "name",
                "type": "string"
            },
            {
                "name": "population",
                "type": "integer"
            },
        ]
    }
    assert target.read_rows() == [
        {
            "id": 1,
            "name": "germany",
            "population": 83
        },
        {
            "id": 3,
            "name": "spain",
            "population": 47
        },
    ]
コード例 #27
0
def test_resource_layout_header_xlsx_multiline():
    source = "data/multiline-headers.xlsx"
    dialect = ExcelDialect(fill_merged_cells=True)
    layout = Layout(header_rows=[1, 2, 3, 4, 5])
    with Resource(source, dialect=dialect, layout=layout) as resource:
        header = resource.header
        assert header == [
            "Region",
            "Caloric contribution (%)",
            "Cumulative impact of changes on cost of food basket from previous quarter",
            "Cumulative impact of changes on cost of food basket from baseline (%)",
        ]
        assert resource.read_rows() == [
            {
                header[0]: "A",
                header[1]: "B",
                header[2]: "C",
                header[3]: "D"
            },
        ]
コード例 #28
0
def test_resource_layout_header_inline_keyed_headers_is_none():
    source = [{"id": "1", "name": "english"}, {"id": "2", "name": "中国人"}]
    layout = Layout(header=False)
    with Resource(source, layout=layout) as resource:
        assert resource.labels == []
        assert resource.header == ["field1", "field2"]
        assert resource.read_rows() == [
            {
                "field1": "id",
                "field2": "name"
            },
            {
                "field1": "1",
                "field2": "english"
            },
            {
                "field1": "2",
                "field2": "中国人"
            },
        ]
コード例 #29
0
def test_resource_open_read_rows():
    with Resource("data/table.csv") as resource:
        headers = resource.header
        row1, row2 = resource.read_rows()
        assert headers == ["id", "name"]
        assert headers.field_positions == [1, 2]
        assert headers.errors == []
        assert headers.valid is True
        assert row1.to_dict() == {"id": 1, "name": "english"}
        assert row1.field_positions == [1, 2]
        assert row1.row_position == 2
        assert row1.row_number == 1
        assert row1.errors == []
        assert row1.valid is True
        assert row2.to_dict() == {"id": 2, "name": "中国人"}
        assert row2.field_positions == [1, 2]
        assert row2.row_position == 3
        assert row2.row_number == 2
        assert row2.errors == []
        assert row2.valid is True
コード例 #30
0
def test_step_row_subset_distinct():
    source = Resource("data/transform.csv")
    target = transform(
        source,
        steps=[
            steps.row_subset(subset="distinct", field_name="id"),
        ],
    )
    assert target.schema == {
        "fields": [
            {"name": "id", "type": "integer"},
            {"name": "name", "type": "string"},
            {"name": "population", "type": "integer"},
        ]
    }
    assert target.read_rows() == [
        {"id": 1, "name": "germany", "population": 83},
        {"id": 2, "name": "france", "population": 66},
        {"id": 3, "name": "spain", "population": 47},
    ]