def test_quick_query_iter_records_with_json_serialization_setting( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange data1 = b'{\"name\": \"owner\", \"id\": 1}' data2 = b'{\"name2\": \"owner2\"}' data = data1 + b'\n' + data2 + b'\n' + data1 # upload the json file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(data, overwrite=True) errors = [] def on_error(error): errors.append(error) input_format = DelimitedJsonDialect(delimiter='\n') output_format = DelimitedJsonDialect(delimiter=';') resp = file_client.query_file("SELECT name from BlobStorage", on_error=on_error, file_format=input_format, output_format=output_format) listdata = list(resp.records()) self.assertEqual(len(errors), 0) self.assertEqual(len(resp), len(data)) self.assertEqual( listdata, [b'{"name":"owner"}', b'{}', b'{"name":"owner"}', b''])
def test_quick_query_readall_with_json_serialization_setting(self): # Arrange data1 = b'{\"name\": \"owner\", \"id\": 1}' data2 = b'{\"name2\": \"owner2\"}' data = data1 + b'\n' + data2 + b'\n' + data1 # upload the json file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(data, overwrite=True) errors = [] def on_error(error): errors.append(error) input_format = DelimitedJsonDialect(delimiter='\n') output_format = DelimitedJsonDialect(delimiter=';') resp = file_client.query_file( "SELECT name from BlobStorage", on_error=on_error, file_format=input_format, output_format=output_format) query_result = resp.readall() self.assertEqual(len(errors), 0) self.assertEqual(len(resp), len(data)) self.assertEqual(query_result, b'{"name":"owner"};{};{"name":"owner"};')
def test_quick_query_iter_records_with_fatal_error_ignore( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange data1 = b'{name: owner}' data2 = b'{name2: owner2}' data3 = b'{version:0,begin:1601-01-01T00:00:00.000Z,intervalSecs:3600,status:Finalized,config:' \ b'{version:0,configVersionEtag:0x8d75ef460eb1a12,numShards:1,recordsFormat:avro,formatSchemaVersion:3,' \ b'shardDistFnVersion:1},chunkFilePaths:[$blobchangefeed/log/00/1601/01/01/0000/],storageDiagnostics:' \ b'{version:0,lastModifiedTime:2019-11-01T17:53:18.861Z,' \ b'data:{aid:d305317d-a006-0042-00dd-902bbb06fc56}}}' data = data1 + b'\n' + data2 + b'\n' + data1 # upload the json file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(data, overwrite=True) input_format = DelimitedJsonDialect() output_format = DelimitedTextDialect(delimiter=';', quotechar="'", lineterminator='.', escapechar='\\') resp = file_client.query_file("SELECT * from BlobStorage", file_format=input_format, output_format=output_format) for record in resp.records(): print(record)
def test_quick_query_readall_with_fatal_error_handler_raise(self): # Arrange data1 = b'{name: owner}' data2 = b'{name2: owner2}' data3 = b'{version:0,begin:1601-01-01T00:00:00.000Z,intervalSecs:3600,status:Finalized,config:' \ b'{version:0,configVersionEtag:0x8d75ef460eb1a12,numShards:1,recordsFormat:avro,formatSchemaVersion:3,' \ b'shardDistFnVersion:1},chunkFilePaths:[$blobchangefeed/log/00/1601/01/01/0000/],storageDiagnostics:' \ b'{version:0,lastModifiedTime:2019-11-01T17:53:18.861Z,' \ b'data:{aid:d305317d-a006-0042-00dd-902bbb06fc56}}}' data = data1 + b'\n' + data2 + b'\n' + data1 # upload the json file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(data, overwrite=True) errors = [] def on_error(error): raise Exception(error.description) input_format = DelimitedJsonDialect() output_format = DelimitedTextDialect( delimiter=';', quotechar="'", lineterminator='.', escapechar='\\' ) resp = file_client.query_file( "SELECT * from BlobStorage", on_error=on_error, file_format=input_format, output_format=output_format) with pytest.raises(Exception): query_result = resp.readall()
def main(): try: CONNECTION_STRING = os.environ['AZURE_STORAGE_CONNECTION_STRING'] except KeyError: print("AZURE_STORAGE_CONNECTION_STRING must be set.") sys.exit(1) datalake_service_client = DataLakeServiceClient.from_connection_string( CONNECTION_STRING) filesystem_name = "quickqueryfilesystem" filesystem_client = datalake_service_client.get_file_system_client( filesystem_name) try: filesystem_client.create_file_system() except: pass # [START query] errors = [] def on_error(error): errors.append(error) # upload the csv file file_client = datalake_service_client.get_file_client( filesystem_name, "csvfile") file_client.upload_data(CSV_DATA, overwrite=True) # select the second column of the csv file query_expression = "SELECT _2 from DataLakeStorage" input_format = DelimitedTextDialect(delimiter=',', quotechar='"', lineterminator='\n', escapechar="", has_header=False) output_format = DelimitedJsonDialect(delimiter='\n') reader = file_client.query_file(query_expression, on_error=on_error, file_format=input_format, output_format=output_format) content = reader.readall() # [END query] print(content) filesystem_client.delete_file_system()
def test_quick_query_readall_with_fatal_error_ignore( self, datalake_storage_account_name, datalake_storage_account_key): self._setUp(datalake_storage_account_name, datalake_storage_account_key) # Arrange data1 = b'{name: owner}' data2 = b'{name2: owner2}' data = data1 + b'\n' + data2 + b'\n' + data1 # upload the json file file_name = self._get_file_reference() file_client = self.dsc.get_file_client(self.filesystem_name, file_name) file_client.upload_data(data, overwrite=True) input_format = DelimitedJsonDialect() output_format = DelimitedTextDialect(delimiter=';', quotechar="'", lineterminator='.', escapechar='\\') resp = file_client.query_file("SELECT * from BlobStorage", file_format=input_format, output_format=output_format) query_result = resp.readall()