def list_files_in_table( self, table: Table, *, predicateHints: Optional[Sequence[str]] = None, limitHint: Optional[int] = None, ) -> ListFilesInTableResponse: data: Dict = {} if predicateHints is not None: data["predicateHints"] = predicateHints if limitHint is not None: data["limitHint"] = limitHint with self._post_internal( f"/shares/{table.share}/schemas/{table.schema}/tables/{table.name}/query", data=data, ) as lines: protocol_json = json.loads(next(lines)) metadata_json = json.loads(next(lines)) return ListFilesInTableResponse( protocol=Protocol.from_json(protocol_json["protocol"]), metadata=Metadata.from_json(metadata_json["metaData"]), add_files=[ AddFile.from_json(json.loads(file)["file"]) for file in lines ], )
def test_protocol(): json = """ { "minReaderVersion" : 1 } """ protocol = Protocol.from_json(json) assert protocol == Protocol(1) json = """ { "minReaderVersion" : 100 } """ with pytest.raises( ValueError, match="The table requires a newer version 100 to read."): Protocol.from_json(json)
def query_table_metadata(self, table: Table) -> QueryTableMetadataResponse: with self._get_internal( f"/shares/{table.share}/schemas/{table.schema}/tables/{table.name}/metadata" ) as lines: protocol_json = json.loads(next(lines)) metadata_json = json.loads(next(lines)) return QueryTableMetadataResponse( protocol=Protocol.from_json(protocol_json["protocol"]), metadata=Metadata.from_json(metadata_json["metaData"]), )
def test_list_files_in_table_partitioned_different_schemas( rest_client: DataSharingRestClient): response = rest_client.list_files_in_table( Table(name="table3", share="share1", schema="default")) assert response.protocol == Protocol(min_reader_version=1) assert response.metadata == Metadata( id="7ba6d727-a578-4234-a138-953f790b427c", format=Format(provider="parquet", options={}), schema_string= ('{"type":"struct","fields":[' '{"name":"eventTime","type":"timestamp","nullable":true,"metadata":{}},' '{"name":"date","type":"date","nullable":true,"metadata":{}},' '{"name":"type","type":"string","nullable":true,"metadata":{}}' "]}"), partition_columns=["date"], ) assert response.add_files == [ AddFile( url=response.add_files[0].url, id="db213271abffec6fd6c7fc2aad9d4b3f", partition_values={"date": "2021-04-28"}, size=778, stats= (r'{"numRecords":1,' r'"minValues":{"eventTime":"2021-04-28T23:36:51.945Z","type":"bar"},' r'"maxValues":{"eventTime":"2021-04-28T23:36:51.945Z","type":"bar"},' r'"nullCount":{"eventTime":0,"type":0}}'), ), AddFile( url=response.add_files[1].url, id="f1f8be229d8b18eb6d6a34255f2d7089", partition_values={"date": "2021-04-28"}, size=778, stats= (r'{"numRecords":1,' r'"minValues":{"eventTime":"2021-04-28T23:36:47.599Z","type":"foo"},' r'"maxValues":{"eventTime":"2021-04-28T23:36:47.599Z","type":"foo"},' r'"nullCount":{"eventTime":0,"type":0}}'), ), AddFile( url=response.add_files[2].url, id="a892a55d770ee70b34ffb2ebf7dc2fd0", partition_values={"date": "2021-04-28"}, size=573, stats=(r'{"numRecords":1,' r'"minValues":{"eventTime":"2021-04-28T23:35:53.156Z"},' r'"maxValues":{"eventTime":"2021-04-28T23:35:53.156Z"},' r'"nullCount":{"eventTime":0}}'), ), ]
def test_query_table_metadata_partitioned(rest_client: DataSharingRestClient): response = rest_client.query_table_metadata( Table(name="table2", share="share2", schema="default")) assert response.protocol == Protocol(min_reader_version=1) assert response.metadata == Metadata( id="f8d5c169-3d01-4ca3-ad9e-7dc3355aedb2", format=Format(provider="parquet", options={}), schema_string= ('{"type":"struct","fields":[' '{"name":"eventTime","type":"timestamp","nullable":true,"metadata":{}},' '{"name":"date","type":"date","nullable":true,"metadata":{}}' "]}"), partition_columns=["date"], )
def test_query_table_metadata_non_partitioned( rest_client: DataSharingRestClient): response = rest_client.query_table_metadata( Table(name="table1", share="share1", schema="default")) assert response.protocol == Protocol(min_reader_version=1) assert response.metadata == Metadata( id="ed96aa41-1d81-4b7f-8fb5-846878b4b0cf", format=Format(provider="parquet", options={}), schema_string= ('{"type":"struct","fields":[' '{"name":"eventTime","type":"timestamp","nullable":true,"metadata":{}},' '{"name":"date","type":"date","nullable":true,"metadata":{}}' "]}"), partition_columns=[], )
def test_query_table_metadata_partitioned_different_schemas( rest_client: DataSharingRestClient): response = rest_client.query_table_metadata( Table(name="table3", share="share1", schema="default")) assert response.protocol == Protocol(min_reader_version=1) assert response.metadata == Metadata( id="7ba6d727-a578-4234-a138-953f790b427c", format=Format(provider="parquet", options={}), schema_string= ('{"type":"struct","fields":[' '{"name":"eventTime","type":"timestamp","nullable":true,"metadata":{}},' '{"name":"date","type":"date","nullable":true,"metadata":{}},' '{"name":"type","type":"string","nullable":true,"metadata":{}}' "]}"), partition_columns=["date"], )
def test_list_files_in_table_non_partitioned( rest_client: DataSharingRestClient): response = rest_client.list_files_in_table( Table(name="table1", share="share1", schema="default"), predicateHints=["date = '2021-01-31'"], ) assert response.protocol == Protocol(min_reader_version=1) assert response.metadata == Metadata( id="ed96aa41-1d81-4b7f-8fb5-846878b4b0cf", format=Format(provider="parquet", options={}), schema_string= ('{"type":"struct","fields":[' '{"name":"eventTime","type":"timestamp","nullable":true,"metadata":{}},' '{"name":"date","type":"date","nullable":true,"metadata":{}}' "]}"), partition_columns=[], ) assert response.add_files == [ AddFile( url=response.add_files[0].url, id="061cb3683a467066995f8cdaabd8667d", partition_values={}, size=781, stats= (r'{"numRecords":1,' r'"minValues":{"eventTime":"2021-04-28T06:32:22.421Z","date":"2021-04-28"},' r'"maxValues":{"eventTime":"2021-04-28T06:32:22.421Z","date":"2021-04-28"},' r'"nullCount":{"eventTime":0,"date":0}}'), ), AddFile( url=response.add_files[1].url, id="e268cbf70dbaa6143e7e9fa3e2d3b00e", partition_values={}, size=781, stats= (r'{"numRecords":1,' r'"minValues":{"eventTime":"2021-04-28T06:32:02.070Z","date":"2021-04-28"},' r'"maxValues":{"eventTime":"2021-04-28T06:32:02.070Z","date":"2021-04-28"},' r'"nullCount":{"eventTime":0,"date":0}}'), ), ]
def test_list_files_in_table_partitioned(rest_client: DataSharingRestClient): response = rest_client.list_files_in_table( Table(name="table2", share="share2", schema="default"), predicateHints=["date = '2021-01-31'"], limitHint=123, ) assert response.protocol == Protocol(min_reader_version=1) assert response.metadata == Metadata( id="f8d5c169-3d01-4ca3-ad9e-7dc3355aedb2", format=Format(provider="parquet", options={}), schema_string= ('{"type":"struct","fields":[' '{"name":"eventTime","type":"timestamp","nullable":true,"metadata":{}},' '{"name":"date","type":"date","nullable":true,"metadata":{}}' "]}"), partition_columns=["date"], ) assert response.add_files == [ AddFile( url=response.add_files[0].url, id="9f1a49539c5cffe1ea7f9e055d5c003c", partition_values={"date": "2021-04-28"}, size=573, stats=(r'{"numRecords":1,' r'"minValues":{"eventTime":"2021-04-28T23:33:57.955Z"},' r'"maxValues":{"eventTime":"2021-04-28T23:33:57.955Z"},' r'"nullCount":{"eventTime":0}}'), ), AddFile( url=response.add_files[1].url, id="cd2209b32f5ed5305922dd50f5908a75", partition_values={"date": "2021-04-28"}, size=573, stats=(r'{"numRecords":1,' r'"minValues":{"eventTime":"2021-04-28T23:33:48.719Z"},' r'"maxValues":{"eventTime":"2021-04-28T23:33:48.719Z"},' r'"nullCount":{"eventTime":0}}'), ), ]