def test_resource_write(tmpdir): path1 = "data/table.csv" path2 = str(tmpdir.join("table.csv")) source = Resource(path=path1) source.write(path2) target = Resource(path=path2, trusted=True) assert target.read_header() == ["id", "name"] assert target.read_rows() == [ { "id": 1, "name": "english" }, { "id": 2, "name": "中国人" }, ]
def test_xls_parser_sheet_by_index(): source = "data/sheet2.xls" dialect = ExcelDialect(sheet=2) with Resource(source, dialect=dialect) as resource: assert resource.header == ["id", "name"] assert resource.read_rows() == [ { "id": 1, "name": "english" }, { "id": 2, "name": "中国人" }, ]
def test_resource_expand_with_dialect(): dialect = {"delimiter": "custom"} resource = Resource({ "name": "name", "path": "data/table.csv", "dialect": dialect }) resource.expand() assert resource == { "name": "name", "path": "data/table.csv", "profile": "data-resource", "dialect": { "header": True, "headerRows": [1], "headerJoin": " ", "headerCase": True, "delimiter": "custom", "lineTerminator": "\r\n", "doubleQuote": True, "quoteChar": '"', "skipInitialSpace": False, }, }
def test_step_field_add_with_formula(): source = Resource(path="data/transform.csv") target = transform( source, steps=[ steps.table_normalize(), steps.field_add(name="calc", formula="id * 100 + population"), ], ) assert target.schema == { "fields": [ { "name": "id", "type": "integer" }, { "name": "name", "type": "string" }, { "name": "population", "type": "integer" }, { "name": "calc" }, ] } assert target.read_rows() == [ { "id": 1, "name": "germany", "population": 83, "calc": 183 }, { "id": 2, "name": "france", "population": 66, "calc": 266 }, { "id": 3, "name": "spain", "population": 47, "calc": 347 }, ]
def test_step_field_add(): source = Resource(path="data/transform.csv") target = transform( source, steps=[ steps.field_add(name="note", type="string", value="eu"), ], ) assert target.schema == { "fields": [ { "name": "id", "type": "integer" }, { "name": "name", "type": "string" }, { "name": "population", "type": "integer" }, { "name": "note", "type": "string" }, ] } assert target.read_rows() == [ { "id": 1, "name": "germany", "population": 83, "note": "eu" }, { "id": 2, "name": "france", "population": 66, "note": "eu" }, { "id": 3, "name": "spain", "population": 47, "note": "eu" }, ]
def test_step_row_ungroup_last(): source = Resource("data/transform-groups.csv") target = transform( source, steps=[ steps.row_ungroup(group_name="name", selection="last"), ], ) assert target.schema == { "fields": [ { "name": "id", "type": "integer" }, { "name": "name", "type": "string" }, { "name": "population", "type": "integer" }, { "name": "year", "type": "integer" }, ] } assert target.read_rows() == [ { "id": 4, "name": "france", "population": 54, "year": 1920 }, { "id": 2, "name": "germany", "population": 77, "year": 1920 }, { "id": 6, "name": "spain", "population": 33, "year": 1920 }, ]
def test_resource_source_non_tabular_remote(): path = BASEURL % "data/text.txt" with Resource(path) as resource: assert resource.path == path assert resource.data is None assert resource.memory is False assert resource.tabular is False assert resource.multipart is False assert resource.basepath == "" assert resource.fullpath == path if IS_UNIX: assert resource.read_bytes() == b"text\n" assert resource.stats == { "hash": "e1cbb0c3879af8347246f12c559a86b5", "bytes": 5, }
def test_resource_relative_parent_path_with_trusted_option_issue_171(): # trusted=false (default) resource = Resource(path="data/../data/table.csv") with pytest.raises(exceptions.FrictionlessException) as excinfo: resource.read_rows() error = excinfo.value.error assert error.code == "resource-error" assert error.note.count("data/table.csv") # trusted=true resource = Resource(path="data/../data/table.csv", trusted=True) assert resource.read_rows() == [ {"id": 1, "name": "english"}, {"id": 2, "name": "中国人"}, ]
def test_resource_to_zip(tmpdir): # Write target = os.path.join(tmpdir, "dataresource.zip") resource = Resource("data/resource.json") resource.to_zip(target) # Read resource = Resource(target) assert resource == {"name": "name", "path": "table.csv"} assert resource.read_rows() == [ {"id": 1, "name": "english"}, {"id": 2, "name": "中国人"}, ]
def test_xlsx_parser_preserve_formatting_number_multicode(): source = "data/number-format-multicode.xlsx" dialect = ExcelDialect(preserve_formatting=True) layout = Layout(skip_fields=["<blank>"]) with Resource(source, dialect=dialect, layout=layout) as resource: assert resource.read_rows() == [ { "col1": Decimal("4.5") }, { "col1": Decimal("-9.032") }, { "col1": Decimal("15.8") }, ]
def test_type_custom(custom_plugin): schema = Schema(fields=[ Field(name="integer", type="integer"), Field(name="custom", type="custom"), ]) with Resource(path="data/table.csv", schema=schema) as resource: assert resource.read_rows() == [ { "integer": 1, "custom": ["english"] }, { "integer": 2, "custom": ["中国人"] }, ]
def test_csv_parser_excel_tab(): source = "header1\theader2\nvalue1\tvalue2\nvalue3\tvalue4" dialect = CsvDialect(delimiter="\t") with Resource(source, scheme="text", format="csv", dialect=dialect) as resource: assert resource.header == ["header1", "header2"] assert resource.read_rows() == [ { "header1": "value1", "header2": "value2" }, { "header1": "value3", "header2": "value4" }, ]
def test_csv_parser_skipinitialspace(): source = "header1, header2\nvalue1, value2\nvalue3, value4" dialect = CsvDialect(skip_initial_space=False) with Resource(source, scheme="text", format="csv", dialect=dialect) as resource: assert resource.header == ["header1", "header2"] assert resource.read_rows() == [ { "header1": "value1", "header2": " value2" }, { "header1": "value3", "header2": " value4" }, ]
def test_csv_parser_delimiter(): source = '"header1";"header2"\n"value1";"value2"\n"value3";"value4"' dialect = CsvDialect(delimiter=";") with Resource(source, scheme="text", format="csv", dialect=dialect) as resource: assert resource.header == ["header1", "header2"] assert resource.read_rows() == [ { "header1": "value1", "header2": "value2" }, { "header1": "value3", "header2": "value4" }, ]
def test_inline_parser_keyed_with_keys_provided(): source = [{"id": "1", "name": "english"}, {"id": "2", "name": "中国人"}] dialect = InlineDialect(keys=["name", "id"]) with Resource(source, format="inline", dialect=dialect) as resource: assert resource.dialect.keyed is True assert resource.header == ["name", "id"] assert resource.read_rows() == [ { "id": 1, "name": "english" }, { "id": 2, "name": "中国人" }, ]
def test_json_parser_from_buffer_keyed(): source = '[{"id": 1, "name": "english" }, {"id": 2, "name": "中国人" }]'.encode( "utf-8") with Resource(source, format="json") as resource: assert resource.dialect.keyed is True assert resource.header == ["id", "name"] assert resource.read_rows() == [ { "id": 1, "name": "english" }, { "id": 2, "name": "中国人" }, ]
def test_resource_layout_header_csv_multiline_headers_duplicates(): source = b"k1\nk1\nv1\nv2\nv3" layout = Layout(header_rows=[1, 2]) with Resource(source, format="csv", layout=layout) as resource: assert resource.header == ["k1"] assert resource.read_rows() == [ { "k1": "v1" }, { "k1": "v2" }, { "k1": "v3" }, ]
def test_step_row_subset_conflicts(): source = Resource("data/transform.csv") target = transform( source, steps=[ steps.row_subset(subset="conflicts", field_name="id"), ], ) assert target.schema == { "fields": [ {"name": "id", "type": "integer"}, {"name": "name", "type": "string"}, {"name": "population", "type": "integer"}, ] } assert target.read_rows() == []
def test_resource_official_hash_bytes_rows_with_hashing_algorithm(): resource = Resource({ "path": "path", "hash": "sha256:hash", "bytes": 1, "rows": 1 }) assert resource == { "path": "path", "hashing": "sha256", "stats": { "hash": "hash", "bytes": 1, "rows": 1, }, }
def test_resource_open_without_rows(): with Resource("data/without-rows.csv") as resource: assert resource.header == ["id", "name"] assert resource.read_rows() == [] assert resource.schema == { "fields": [ { "name": "id", "type": "any" }, { "name": "name", "type": "any" }, ] }
def test_step_row_subset_conflicts_from_descriptor_issue_996(): source = Resource("data/transform.csv") target = transform( source, steps=[ steps.row_subset({"subset": "conflicts", "fieldName": "id"}), ], ) assert target.schema == { "fields": [ {"name": "id", "type": "integer"}, {"name": "name", "type": "string"}, {"name": "population", "type": "integer"}, ] } assert target.read_rows() == []
def test_step_field_unpack(): source = Resource(path="data/transform.csv") target = transform( source, steps=[ steps.field_update(name="id", type="array", value=[1, 1]), steps.field_unpack(name="id", to_names=["id2", "id3"]), ], ) assert target.schema == { "fields": [ { "name": "name", "type": "string" }, { "name": "population", "type": "integer" }, { "name": "id2" }, { "name": "id3" }, ] } assert target.read_rows() == [ { "name": "germany", "population": 83, "id2": 1, "id3": 1 }, { "name": "france", "population": 66, "id2": 1, "id3": 1 }, { "name": "spain", "population": 47, "id2": 1, "id3": 1 }, ]
def test_step_field_add_with_incremental(): source = Resource(path="data/transform.csv") target = transform( source, steps=[ steps.field_add(name="number", incremental=True), ], ) assert target.schema == { "fields": [ { "name": "number" }, { "name": "id", "type": "integer" }, { "name": "name", "type": "string" }, { "name": "population", "type": "integer" }, ] } assert target.read_rows() == [ { "number": 1, "id": 1, "name": "germany", "population": 83 }, { "number": 2, "id": 2, "name": "france", "population": 66 }, { "number": 3, "id": 3, "name": "spain", "population": 47 }, ]
def test_csv_parser_detect_delimiter_tab(): source = "a1\tb1\tc1A,c1B\na2\tb2\tc2\n" layout = Layout(header=False) with Resource(source, scheme="text", format="csv", layout=layout) as resource: assert resource.read_rows() == [ { "field1": "a1", "field2": "b1", "field3": "c1A,c1B" }, { "field1": "a2", "field2": "b2", "field3": "c2" }, ]
def test_resource_schema_from_path_remote(): resource = Resource(BASE_URL % "data/resource-with-dereferencing.json") assert resource == { "name": "name", "path": "table.csv", "dialect": "dialect.json", "schema": "schema-simple.json", } assert resource.schema == { "fields": [{ "name": "id", "type": "integer" }, { "name": "name", "type": "string" }] }
def test_resource_schema_unique_error(): source = [["name"], [1], [2], [2]] detector = Detector( schema_patch={"fields": { "name": { "constraints": { "unique": True } } }}) with Resource(source, detector=detector) as resource: for row in resource: if row.row_number == 3: assert row.valid is False assert row.errors[0].code == "unique-error" continue assert row.valid
def test_step_table_join_from_dict(): source = Resource("data/transform.csv") target = transform( source, steps=[ steps.table_normalize(), steps.table_join( resource=dict(data=[["id", "note"], [1, "beer"], [2, "vine"]]), field_name="id", ), ], ) assert target.schema == { "fields": [ { "name": "id", "type": "integer" }, { "name": "name", "type": "string" }, { "name": "population", "type": "integer" }, { "name": "note", "type": "string" }, ] } assert target.read_rows() == [ { "id": 1, "name": "germany", "population": 83, "note": "beer" }, { "id": 2, "name": "france", "population": 66, "note": "vine" }, ]
def test_resource_to_zip_source_remote(tmpdir): # Write path = BASE_URL % "data/table.csv" target = os.path.join(tmpdir, "datapackage.zip") resource = Resource(name="name", path=path) resource.to_zip(target) # Read resource = Resource(target) assert resource == {"name": "name", "path": path} assert resource.read_rows() == [ {"id": 1, "name": "english"}, {"id": 2, "name": "中国人"}, ]
def test_resource_to_zip_source_inline(tmpdir): # Write target = os.path.join(tmpdir, "dataresource.zip") data = [["id", "name"], ["1", "english"], ["2", "中国人"]] resource = Resource(name="name", data=data) resource.to_zip(target) # Read resource = Resource(target) assert resource == {"name": "name", "data": data} assert resource.read_rows() == [ {"id": 1, "name": "english"}, {"id": 2, "name": "中国人"}, ]
def test_step_row_subset_unique_with_name(): source = Resource("data/transform.csv") target = transform( source, steps=[ steps.field_update(name="id", value=1), steps.row_subset(subset="unique", field_name="id"), ], ) assert target.schema == { "fields": [ {"name": "id", "type": "integer"}, {"name": "name", "type": "string"}, {"name": "population", "type": "integer"}, ] } assert target.read_rows() == []