def test_resource_source_multipart_error_bad_path(): resource = Resource({"name": "name", "path": ["chunk1.csv", "chunk2.csv"]}) with pytest.raises(exceptions.FrictionlessException) as excinfo: resource.read_rows() error = excinfo.value.error assert error.code == "source-error" assert error.note == "[Errno 2] No such file or directory: 'chunk1.csv'"
def test_resource_source_path_error_bad_path(): resource = Resource({"name": "name", "path": "table.csv"}) with pytest.raises(FrictionlessException) as excinfo: resource.read_rows() error = excinfo.value.error assert error.code == "scheme-error" assert error.note.count("[Errno 2]") and error.note.count("table.csv")
def test_resource_integrity_onerror_row_warn(): data = [["name"], [1], [2], [3]] schema = {"fields": [{"name": "name", "type": "string"}]} resource = Resource(data=data, schema=schema, onerror="warn") assert resource.onerror == "warn" with pytest.warns(UserWarning): resource.read_rows()
def test_resource_schema_from_path_error_bad_path(): resource = Resource({"name": "name", "path": "path", "schema": "data/bad.json"}) with pytest.raises(exceptions.FrictionlessException) as excinfo: resource.read_rows() error = excinfo.value.error assert error.code == "schema-error" assert error.note.count("bad.json")
def test_resource_source_path_error_bad_path_not_safe_traversing(): resource = Resource(path="data/../data/table.csv") with pytest.raises(exceptions.FrictionlessException) as excinfo: resource.read_rows() error = excinfo.value.error assert error.code == "resource-error" assert error.note.count("data/table.csv")
def test_resource_integrity_onerror_row_raise(): data = [["name"], [1], [2], [3]] schema = {"fields": [{"name": "name", "type": "string"}]} resource = Resource(data=data, schema=schema, onerror="raise") assert resource.onerror == "raise" with pytest.raises(exceptions.FrictionlessException): resource.read_rows()
def test_resource_source_path_error_bad_path(): resource = Resource({"name": "name", "path": "table.csv"}) with pytest.raises(exceptions.FrictionlessException) as excinfo: resource.read_rows() error = excinfo.value.error assert error.code == "scheme-error" assert error.note == "[Errno 2] No such file or directory: 'table.csv'"
def test_resource_onerror_header_raise(): data = [["name"], [1], [2], [3]] schema = {"fields": [{"name": "bad", "type": "integer"}]} resource = Resource(data=data, schema=schema, onerror="raise") assert resource.onerror == "raise" with pytest.raises(FrictionlessException): resource.read_rows()
def test_multipart_loader_resource_error_bad_path(): resource = Resource({"name": "name", "path": ["chunk1.csv", "chunk2.csv"]}) with pytest.raises(FrictionlessException) as excinfo: resource.read_rows() error = excinfo.value.error assert error.code == "scheme-error" assert error.note.count("[Errno 2]") and error.note.count("chunk1.csv")
def test_resource_source_multipart_error_bad_path_not_safe_traversing(): bad_path = os.path.abspath("data/../chunk2.csv") resource = Resource({"name": "name", "path": ["data/chunk1.csv", bad_path]}) with pytest.raises(exceptions.FrictionlessException) as excinfo: resource.read_rows() error = excinfo.value.error assert error.code == "resource-error" assert error.note.count("not safe")
def test_resource_schema_from_path_error_path_not_safe(): schema = os.path.abspath("data/schema.json") resource = Resource({"name": "name", "path": "path", "schema": schema}) with pytest.raises(exceptions.FrictionlessException) as excinfo: resource.read_rows() error = excinfo.value.error assert error.code == "resource-error" assert error.note.count("schema.json")
def test_resource_reset_on_close_issue_190(): layout = Layout(header=False, limit_rows=1) source = [["1", "english"], ["2", "中国人"]] resource = Resource(source, layout=layout) resource.open() assert resource.read_rows() == [{"field1": 1, "field2": "english"}] resource.open() assert resource.read_rows() == [{"field1": 1, "field2": "english"}] resource.close()
def test_resource_source_no_path_and_no_data(): resource = Resource({}) assert resource.path is None assert resource.data == [] assert resource.fullpath is None with pytest.raises(FrictionlessException) as excinfo: resource.read_rows() error = excinfo.value.error assert error.code == "resource-error" assert error.note.count("is not valid")
def test_resource_relative_parent_path_with_trusted_option_issue_171(): # trusted=false (default) resource = Resource(path="data/../data/table.csv") with pytest.raises(exceptions.FrictionlessException) as excinfo: resource.read_rows() error = excinfo.value.error assert error.code == "resource-error" assert error.note.count("data/table.csv") # trusted=true resource = Resource(path="data/../data/table.csv", trusted=True) assert resource.read_rows() == [ {"id": 1, "name": "english"}, {"id": 2, "name": "中国人"}, ]
def test_resource_source_data(): data = [["id", "name"], ["1", "english"], ["2", "中国人"]] resource = Resource({"data": data}) assert resource.path is None assert resource.data == data assert resource.source == data assert resource.basepath == "" assert resource.inline is True assert resource.tabular is True assert resource.multipart is False assert resource.read_bytes() == b"" assert resource.read_data() == data[1:] assert resource.read_rows() == [ { "id": 1, "name": "english" }, { "id": 2, "name": "中国人" }, ] assert resource.read_header() == ["id", "name"] assert resource.read_sample() == data[1:] assert resource.read_stats() == { "hash": "", "bytes": 0, "fields": 2, "rows": 2, }
def test_resource_read_rows(): resource = Resource(path="data/table.json") rows = resource.read_rows() assert rows == [ {"id": 1, "name": "english"}, {"id": 2, "name": "中国人"}, ]
def test_resource_source_path(): path = "data/table.csv" resource = Resource({"path": path}) assert resource.path == path assert resource.data is None assert resource.source == path assert resource.basepath == "" assert resource.inline is False assert resource.tabular is True assert resource.multipart is False assert (resource.read_bytes() == b"id,name\n1,english\n2,\xe4\xb8\xad\xe5\x9b\xbd\xe4\xba\xba\n") assert resource.read_data() == [["1", "english"], ["2", "中国人"]] assert resource.read_rows() == [ { "id": 1, "name": "english" }, { "id": 2, "name": "中国人" }, ] assert resource.read_header() == ["id", "name"] assert resource.read_sample() == [["1", "english"], ["2", "中国人"]] assert resource.read_stats() == { "hash": "6c2c61dd9b0e9c6876139a449ed87933", "bytes": 30, "fields": 2, "rows": 2, }
def test_resource_integrity_foreign_keys_invalid(): resource = Resource(DESCRIPTOR_FK, path="data/nested-invalid.csv") rows = resource.read_rows() assert rows[0].valid assert rows[1].valid assert rows[2].valid assert rows[3].valid assert rows[4].errors[0].code == "foreign-key-error" assert rows == [ { "id": 1, "cat": None, "name": "England" }, { "id": 2, "cat": None, "name": "France" }, { "id": 3, "cat": 1, "name": "London" }, { "id": 4, "cat": 2, "name": "Paris" }, { "id": 5, "cat": 6, "name": "Rome" }, ]
def test_resource_integrity_foreign_keys(): resource = Resource(DESCRIPTOR_FK) rows = resource.read_rows() assert rows[0].valid assert rows[1].valid assert rows[2].valid assert rows[3].valid assert rows == [ { "id": 1, "cat": None, "name": "England" }, { "id": 2, "cat": None, "name": "France" }, { "id": 3, "cat": 1, "name": "London" }, { "id": 4, "cat": 2, "name": "Paris" }, ]
def test_resource_source_path_remote_and_basepath_remote(): resource = Resource(path=BASE_URL % "data/table.csv", basepath=BASE_URL % "data") assert resource.source == BASE_URL % "data/table.csv" assert resource.read_rows() == [ {"id": 1, "name": "english"}, {"id": 2, "name": "中国人"}, ]
def test_resource_from_zip(): resource = Resource("data/resource.zip") assert resource.path == "table.csv" assert resource.read_rows() == [ {"id": 1, "name": "english"}, {"id": 2, "name": "中国人"}, ]
def test_step_table_write(tmpdir): path = str(tmpdir.join("table.json")) # Write source = Resource("data/transform.csv") transform( source, steps=[ steps.cell_set(field_name="population", value=100), steps.table_write(path=path), ], ) # Read resource = Resource(path=path) assert resource.read_rows() == [ { "id": 1, "name": "germany", "population": 100 }, { "id": 2, "name": "france", "population": 100 }, { "id": 3, "name": "spain", "population": 100 }, ]
def test_resource_schema_source_remote(): descriptor = { "name": "name", "profile": "tabular-data-resource", "path": "table.csv", "schema": "resource_schema.json", } resource = Resource(descriptor, basepath=BASE_URL % "data") assert resource.schema == { "fields": [{ "name": "id", "type": "integer" }, { "name": "name", "type": "string" }] } assert resource.read_rows() == [ { "id": 1, "name": "english" }, { "id": 2, "name": "中国人" }, ]
def test_resource_dialect(): dialect = { "delimiter": "|", "quoteChar": "#", "escapeChar": "-", "doubleQuote": False, "skipInitialSpace": False, } descriptor = { "name": "name", "profile": "tabular-data-resource", "path": "dialect.csv", "schema": "resource-schema.json", "dialect": dialect, } resource = Resource(descriptor, basepath="data") assert resource.dialect == dialect assert resource.read_rows() == [ { "id": 1, "name": "english" }, { "id": 2, "name": " |##" }, ]
def test_resource_schema_source_data(): descriptor = { "name": "name", "profile": "tabular-data-resource", "data": [["id", "name"], ["1", "english"], ["2", "中国人"]], "schema": "resource-schema.json", } resource = Resource(descriptor, basepath="data") assert resource.schema == { "fields": [{ "name": "id", "type": "integer" }, { "name": "name", "type": "string" }] } assert resource.read_rows() == [ { "id": 1, "name": "english" }, { "id": 2, "name": "中国人" }, ]
def test_stream_loader_without_open(): with open("data/table.csv", mode="rb") as file: resource = Resource(file, format="csv") assert resource.read_rows() == [ {"id": 1, "name": "english"}, {"id": 2, "name": "中国人"}, ]
def test_spss_parser_write_timezone(tmpdir): # Write source = Resource("data/timezone.csv") target = Resource(str(tmpdir.join("table.sav"))) source.write(target) # Read with target: assert target.read_rows() == [ { "datetime": datetime.datetime(2020, 1, 1, 15), "time": datetime.time(15) }, { "datetime": datetime.datetime(2020, 1, 1, 15), "time": datetime.time(15) }, { "datetime": datetime.datetime(2020, 1, 1, 15), "time": datetime.time(15) }, { "datetime": datetime.datetime(2020, 1, 1, 15), "time": datetime.time(15) }, ]
def test_decimal_to_json(): resource = Resource(data=[["dec1"], [Decimal("53.940135311587831")]]) row = resource.read_rows()[0] # all we really want to 'assert' here is that these methods run without throwing # TypeError: Object of type 'Decimal' is not JSON serializable assert isinstance(json.dumps(row.to_dict(json=True)), str) assert isinstance(json.dumps(row.to_list(json=True)), str)
def test_resource_layout_header_false(): layout = {"header": False} descriptor = { "name": "name", "profile": "tabular-data-resource", "path": "without-headers.csv", "layout": layout, "schema": "resource-schema.json", } resource = Resource(descriptor, basepath="data") assert resource.layout == layout assert resource.read_rows() == [ { "id": 1, "name": "english" }, { "id": 2, "name": "中国人" }, { "id": 3, "name": "german" }, ]
def test_resource_detector_field_type(): detector = Detector(field_type="string") resource = Resource(path="data/table.csv", detector=detector) resource.infer(stats=True) assert resource.schema == { "fields": [ { "name": "id", "type": "string" }, { "name": "name", "type": "string" }, ] } assert resource.header == ["id", "name"] assert resource.read_rows() == [ { "id": "1", "name": "english" }, { "id": "2", "name": "中国人" }, ]