Exemple #1
0
    def list_files_in_table(
        self,
        table: Table,
        *,
        predicateHints: Optional[Sequence[str]] = None,
        limitHint: Optional[int] = None,
    ) -> ListFilesInTableResponse:
        data: Dict = {}
        if predicateHints is not None:
            data["predicateHints"] = predicateHints
        if limitHint is not None:
            data["limitHint"] = limitHint

        with self._post_internal(
                f"/shares/{table.share}/schemas/{table.schema}/tables/{table.name}/query",
                data=data,
        ) as lines:
            protocol_json = json.loads(next(lines))
            metadata_json = json.loads(next(lines))
            return ListFilesInTableResponse(
                protocol=Protocol.from_json(protocol_json["protocol"]),
                metadata=Metadata.from_json(metadata_json["metaData"]),
                add_files=[
                    AddFile.from_json(json.loads(file)["file"])
                    for file in lines
                ],
            )
Exemple #2
0
def test_protocol():
    json = """
        {
            "minReaderVersion" : 1
        }
        """
    protocol = Protocol.from_json(json)
    assert protocol == Protocol(1)

    json = """
        {
            "minReaderVersion" : 100
        }
        """
    with pytest.raises(
            ValueError,
            match="The table requires a newer version 100 to read."):
        Protocol.from_json(json)
Exemple #3
0
 def query_table_metadata(self, table: Table) -> QueryTableMetadataResponse:
     with self._get_internal(
             f"/shares/{table.share}/schemas/{table.schema}/tables/{table.name}/metadata"
     ) as lines:
         protocol_json = json.loads(next(lines))
         metadata_json = json.loads(next(lines))
         return QueryTableMetadataResponse(
             protocol=Protocol.from_json(protocol_json["protocol"]),
             metadata=Metadata.from_json(metadata_json["metaData"]),
         )
Exemple #4
0
def test_list_files_in_table_partitioned_different_schemas(
        rest_client: DataSharingRestClient):
    response = rest_client.list_files_in_table(
        Table(name="table3", share="share1", schema="default"))
    assert response.protocol == Protocol(min_reader_version=1)
    assert response.metadata == Metadata(
        id="7ba6d727-a578-4234-a138-953f790b427c",
        format=Format(provider="parquet", options={}),
        schema_string=
        ('{"type":"struct","fields":['
         '{"name":"eventTime","type":"timestamp","nullable":true,"metadata":{}},'
         '{"name":"date","type":"date","nullable":true,"metadata":{}},'
         '{"name":"type","type":"string","nullable":true,"metadata":{}}'
         "]}"),
        partition_columns=["date"],
    )
    assert response.add_files == [
        AddFile(
            url=response.add_files[0].url,
            id="db213271abffec6fd6c7fc2aad9d4b3f",
            partition_values={"date": "2021-04-28"},
            size=778,
            stats=
            (r'{"numRecords":1,'
             r'"minValues":{"eventTime":"2021-04-28T23:36:51.945Z","type":"bar"},'
             r'"maxValues":{"eventTime":"2021-04-28T23:36:51.945Z","type":"bar"},'
             r'"nullCount":{"eventTime":0,"type":0}}'),
        ),
        AddFile(
            url=response.add_files[1].url,
            id="f1f8be229d8b18eb6d6a34255f2d7089",
            partition_values={"date": "2021-04-28"},
            size=778,
            stats=
            (r'{"numRecords":1,'
             r'"minValues":{"eventTime":"2021-04-28T23:36:47.599Z","type":"foo"},'
             r'"maxValues":{"eventTime":"2021-04-28T23:36:47.599Z","type":"foo"},'
             r'"nullCount":{"eventTime":0,"type":0}}'),
        ),
        AddFile(
            url=response.add_files[2].url,
            id="a892a55d770ee70b34ffb2ebf7dc2fd0",
            partition_values={"date": "2021-04-28"},
            size=573,
            stats=(r'{"numRecords":1,'
                   r'"minValues":{"eventTime":"2021-04-28T23:35:53.156Z"},'
                   r'"maxValues":{"eventTime":"2021-04-28T23:35:53.156Z"},'
                   r'"nullCount":{"eventTime":0}}'),
        ),
    ]
Exemple #5
0
def test_query_table_metadata_partitioned(rest_client: DataSharingRestClient):
    response = rest_client.query_table_metadata(
        Table(name="table2", share="share2", schema="default"))
    assert response.protocol == Protocol(min_reader_version=1)
    assert response.metadata == Metadata(
        id="f8d5c169-3d01-4ca3-ad9e-7dc3355aedb2",
        format=Format(provider="parquet", options={}),
        schema_string=
        ('{"type":"struct","fields":['
         '{"name":"eventTime","type":"timestamp","nullable":true,"metadata":{}},'
         '{"name":"date","type":"date","nullable":true,"metadata":{}}'
         "]}"),
        partition_columns=["date"],
    )
Exemple #6
0
def test_query_table_metadata_non_partitioned(
        rest_client: DataSharingRestClient):
    response = rest_client.query_table_metadata(
        Table(name="table1", share="share1", schema="default"))
    assert response.protocol == Protocol(min_reader_version=1)
    assert response.metadata == Metadata(
        id="ed96aa41-1d81-4b7f-8fb5-846878b4b0cf",
        format=Format(provider="parquet", options={}),
        schema_string=
        ('{"type":"struct","fields":['
         '{"name":"eventTime","type":"timestamp","nullable":true,"metadata":{}},'
         '{"name":"date","type":"date","nullable":true,"metadata":{}}'
         "]}"),
        partition_columns=[],
    )
Exemple #7
0
def test_query_table_metadata_partitioned_different_schemas(
        rest_client: DataSharingRestClient):
    response = rest_client.query_table_metadata(
        Table(name="table3", share="share1", schema="default"))
    assert response.protocol == Protocol(min_reader_version=1)
    assert response.metadata == Metadata(
        id="7ba6d727-a578-4234-a138-953f790b427c",
        format=Format(provider="parquet", options={}),
        schema_string=
        ('{"type":"struct","fields":['
         '{"name":"eventTime","type":"timestamp","nullable":true,"metadata":{}},'
         '{"name":"date","type":"date","nullable":true,"metadata":{}},'
         '{"name":"type","type":"string","nullable":true,"metadata":{}}'
         "]}"),
        partition_columns=["date"],
    )
Exemple #8
0
def test_list_files_in_table_non_partitioned(
        rest_client: DataSharingRestClient):
    response = rest_client.list_files_in_table(
        Table(name="table1", share="share1", schema="default"),
        predicateHints=["date = '2021-01-31'"],
    )
    assert response.protocol == Protocol(min_reader_version=1)
    assert response.metadata == Metadata(
        id="ed96aa41-1d81-4b7f-8fb5-846878b4b0cf",
        format=Format(provider="parquet", options={}),
        schema_string=
        ('{"type":"struct","fields":['
         '{"name":"eventTime","type":"timestamp","nullable":true,"metadata":{}},'
         '{"name":"date","type":"date","nullable":true,"metadata":{}}'
         "]}"),
        partition_columns=[],
    )
    assert response.add_files == [
        AddFile(
            url=response.add_files[0].url,
            id="061cb3683a467066995f8cdaabd8667d",
            partition_values={},
            size=781,
            stats=
            (r'{"numRecords":1,'
             r'"minValues":{"eventTime":"2021-04-28T06:32:22.421Z","date":"2021-04-28"},'
             r'"maxValues":{"eventTime":"2021-04-28T06:32:22.421Z","date":"2021-04-28"},'
             r'"nullCount":{"eventTime":0,"date":0}}'),
        ),
        AddFile(
            url=response.add_files[1].url,
            id="e268cbf70dbaa6143e7e9fa3e2d3b00e",
            partition_values={},
            size=781,
            stats=
            (r'{"numRecords":1,'
             r'"minValues":{"eventTime":"2021-04-28T06:32:02.070Z","date":"2021-04-28"},'
             r'"maxValues":{"eventTime":"2021-04-28T06:32:02.070Z","date":"2021-04-28"},'
             r'"nullCount":{"eventTime":0,"date":0}}'),
        ),
    ]
Exemple #9
0
def test_list_files_in_table_partitioned(rest_client: DataSharingRestClient):
    response = rest_client.list_files_in_table(
        Table(name="table2", share="share2", schema="default"),
        predicateHints=["date = '2021-01-31'"],
        limitHint=123,
    )
    assert response.protocol == Protocol(min_reader_version=1)
    assert response.metadata == Metadata(
        id="f8d5c169-3d01-4ca3-ad9e-7dc3355aedb2",
        format=Format(provider="parquet", options={}),
        schema_string=
        ('{"type":"struct","fields":['
         '{"name":"eventTime","type":"timestamp","nullable":true,"metadata":{}},'
         '{"name":"date","type":"date","nullable":true,"metadata":{}}'
         "]}"),
        partition_columns=["date"],
    )
    assert response.add_files == [
        AddFile(
            url=response.add_files[0].url,
            id="9f1a49539c5cffe1ea7f9e055d5c003c",
            partition_values={"date": "2021-04-28"},
            size=573,
            stats=(r'{"numRecords":1,'
                   r'"minValues":{"eventTime":"2021-04-28T23:33:57.955Z"},'
                   r'"maxValues":{"eventTime":"2021-04-28T23:33:57.955Z"},'
                   r'"nullCount":{"eventTime":0}}'),
        ),
        AddFile(
            url=response.add_files[1].url,
            id="cd2209b32f5ed5305922dd50f5908a75",
            partition_values={"date": "2021-04-28"},
            size=573,
            stats=(r'{"numRecords":1,'
                   r'"minValues":{"eventTime":"2021-04-28T23:33:48.719Z"},'
                   r'"maxValues":{"eventTime":"2021-04-28T23:33:48.719Z"},'
                   r'"nullCount":{"eventTime":0}}'),
        ),
    ]