Ejemplo n.º 1
0
def test_bigquery_parser_write_timezone(options):
    prefix = options.pop("prefix")
    service = options.pop("service")
    dialect = BigqueryDialect(table=prefix, **options)
    source = Resource("data/timezone.csv")
    target = source.write(service, dialect=dialect)
    with target:
        assert target.read_rows() == [
            {
                "datetime": datetime.datetime(2020, 1, 1, 15),
                "time": datetime.time(15)
            },
            {
                "datetime": datetime.datetime(2020, 1, 1, 15),
                "time": datetime.time(15)
            },
            {
                "datetime": datetime.datetime(2020, 1, 1, 15),
                "time": datetime.time(15)
            },
            {
                "datetime": datetime.datetime(2020, 1, 1, 15),
                "time": datetime.time(15)
            },
        ]
Ejemplo n.º 2
0
def test_bigquery_storage_constraints(options):
    prefix = options.pop("prefix")
    service = options.pop("service")
    dialect = BigqueryDialect(table=prefix, **options)
    source = Package("data/storage/constraints.json")
    storage = source.to_bigquery(service, dialect=dialect)
    target = Package.from_bigquery(service, dialect=dialect)

    # Assert metadata
    assert target.get_resource("constraints").schema == {
        "fields": [
            {
                "name": "required",
                "type": "string",
                "constraints": {
                    "required": True
                }
            },
            {
                "name": "minLength",
                "type": "string"
            },  # constraint removal
            {
                "name": "maxLength",
                "type": "string"
            },  # constraint removal
            {
                "name": "pattern",
                "type": "string"
            },  # constraint removal
            {
                "name": "enum",
                "type": "string"
            },  # constraint removal
            {
                "name": "minimum",
                "type": "integer"
            },  # constraint removal
            {
                "name": "maximum",
                "type": "integer"
            },  # constraint removal
        ],
    }

    # Assert data
    assert target.get_resource("constraints").read_rows() == [
        {
            "required": "passing",
            "minLength": "passing",
            "maxLength": "passing",
            "pattern": "passing",
            "enum": "passing",
            "minimum": 5,
            "maximum": 5,
        },
    ]

    # Cleanup storage
    storage.delete_package(target.resource_names)
Ejemplo n.º 3
0
def test_bigquery_parser_write_timezone(options):
    prefix = options.pop("prefix")
    service = options.pop("service")
    dialect = BigqueryDialect(table=prefix, **options)
    with Table("data/timezone.csv") as table:
        table.write(service, format="ckan", dialect=dialect)
    with Table(service, format="ckan", dialect=dialect) as table:
        assert table.read_rows() == [
            {
                "datetime": datetime.datetime(2020, 1, 1, 15),
                "time": datetime.time(15)
            },
            {
                "datetime": datetime.datetime(2020, 1, 1, 15),
                "time": datetime.time(15)
            },
            {
                "datetime": datetime.datetime(2020, 1, 1, 15),
                "time": datetime.time(15)
            },
            {
                "datetime": datetime.datetime(2020, 1, 1, 15),
                "time": datetime.time(15)
            },
        ]
Ejemplo n.º 4
0
def test_storage_big_file(options):
    service = options.pop("service")
    dialect = BigqueryDialect(**options)
    storage = BigqueryStorage(service, dialect=dialect)
    resource = Resource(name="table", data=[[1]] * 1500)
    storage.write_resource(resource, force=True)
    target = storage.read_resource("table")
    assert len(target.read_rows()) == 1500
    storage.delete_package(list(storage))
Ejemplo n.º 5
0
def test_bigquery_storage_delete_resource_not_existent_error(options):
    service = options.pop("service")
    dialect = BigqueryDialect(**options)
    storage = BigqueryStorage(service, dialect=dialect)
    with pytest.raises(FrictionlessException) as excinfo:
        storage.delete_resource("bad")
    error = excinfo.value.error
    assert error.code == "storage-error"
    assert error.note.count("does not exist")
Ejemplo n.º 6
0
def test_bigquery_storage_write_resource_existent_error(options):
    service = options.pop("service")
    dialect = BigqueryDialect(**options)
    storage = BigqueryStorage(service, dialect=dialect)
    resource = Resource(path="data/table.csv")
    storage.write_resource(resource, force=True)
    with pytest.raises(FrictionlessException) as excinfo:
        storage.write_resource(resource)
    error = excinfo.value.error
    assert error.code == "storage-error"
    assert error.note.count("already exists")
    # Cleanup storage
    storage.delete_package(list(storage))
Ejemplo n.º 7
0
def test_table_bigquery(options):
    prefix = options.pop("prefix")
    service = options.pop("service")
    dialect = BigqueryDialect(table=prefix, **options)

    # Write
    with Table("data/table.csv") as table:
        table.write(service, dialect=dialect)

    # Read
    with Table(service, dialect=dialect) as table:
        assert table.header == ["id", "name"]
        assert table.read_rows() == [
            {"id": 1, "name": "english"},
            {"id": 2, "name": "中国人"},
        ]
Ejemplo n.º 8
0
def test_bigquery_parser(options):
    prefix = options.pop("prefix")
    service = options.pop("service")
    dialect = BigqueryDialect(table=prefix, **options)

    # Write
    source = Resource("data/table.csv")
    target = Resource(service, dialect=dialect)
    source.write(target)

    # Read
    with target:
        assert target.header == ["id", "name"]
        assert target.read_rows() == [
            {"id": 1, "name": "english"},
            {"id": 2, "name": "中国人"},
        ]
Ejemplo n.º 9
0
def test_bigquery_storage_types(options):
    prefix = options.pop("prefix")
    service = options.pop("service")
    dialect = BigqueryDialect(table=prefix, **options)
    source = Package("data/storage/types.json")
    storage = source.to_bigquery(service, dialect=dialect)
    target = Package.from_bigquery(service, dialect=dialect)

    # Assert metadata
    assert target.get_resource("types").schema == {
        "fields": [
            {
                "name": "any",
                "type": "string"
            },  # type fallback
            {
                "name": "array",
                "type": "string"
            },  # type fallback
            {
                "name": "boolean",
                "type": "boolean"
            },
            {
                "name": "date",
                "type": "date"
            },
            {
                "name": "date_year",
                "type": "date"
            },  # format removal
            {
                "name": "datetime",
                "type": "datetime"
            },
            {
                "name": "duration",
                "type": "string"
            },  # type fallback
            {
                "name": "geojson",
                "type": "string"
            },  # type fallback
            {
                "name": "geopoint",
                "type": "string"
            },  # type fallback
            {
                "name": "integer",
                "type": "integer"
            },
            {
                "name": "number",
                "type": "number"
            },
            {
                "name": "object",
                "type": "string"
            },  # type fallback
            {
                "name": "string",
                "type": "string"
            },
            {
                "name": "time",
                "type": "time"
            },
            {
                "name": "year",
                "type": "integer"
            },  # type downgrade
            {
                "name": "yearmonth",
                "type": "string"
            },  # type fallback
        ],
    }

    # Assert data
    assert target.get_resource("types").read_rows() == [
        {
            "any": "中国人",
            "array": '["Mike", "John"]',
            "boolean": True,
            "date": datetime.date(2015, 1, 1),
            "date_year": datetime.date(2015, 1, 1),
            # converted into UTC
            "datetime": datetime.datetime(2015, 1, 1, 3, 0),
            "duration": "P1Y1M",
            "geojson": '{"type": "Point", "coordinates": [33, 33.33]}',
            "geopoint": "30,70",
            "integer": 1,
            "number": 7,
            "object": '{"chars": 560}',
            "string": "english",
            "time": datetime.time(3, 0),
            "year": 2015,
            "yearmonth": "2015-01",
        },
    ]

    # Cleanup storage
    storage.delete_package(target.resource_names)
Ejemplo n.º 10
0
def test_bigquery_storage_integrity(options):
    prefix = options.pop("prefix")
    service = options.pop("service")
    dialect = BigqueryDialect(table=prefix, **options)
    source = Package("data/storage/integrity.json")
    storage = source.to_bigquery(service, dialect=dialect)
    target = Package.from_bigquery(service, dialect=dialect)

    # Assert metadata (main)
    assert target.get_resource("integrity_main").schema == {
        "fields": [
            # added required
            {
                "name": "id",
                "type": "integer"
            },
            {
                "name": "parent",
                "type": "integer"
            },
            {
                "name": "description",
                "type": "string"
            },
        ],
        # primary key removal
        # foreign keys removal
    }

    # Assert metadata (link)
    assert target.get_resource("integrity_link").schema == {
        "fields": [
            {
                "name": "main_id",
                "type": "integer"
            },
            {
                "name": "some_id",
                "type": "integer"
            },  # constraint removal
            {
                "name": "description",
                "type": "string"
            },  # constraint removal
        ],
        # primary key removal
        # foreign keys removal
    }

    # Assert data (main)
    assert target.get_resource("integrity_main").read_rows() == [
        {
            "id": 1,
            "parent": None,
            "description": "english"
        },
        {
            "id": 2,
            "parent": 1,
            "description": "中国人"
        },
    ]

    # Assert data (link)
    assert target.get_resource("integrity_link").read_rows() == [
        {
            "main_id": 1,
            "some_id": 1,
            "description": "note1"
        },
        {
            "main_id": 2,
            "some_id": 2,
            "description": "note2"
        },
    ]

    # Cleanup storage
    storage.delete_package(target.resource_names)