def test_quick_query_iter_records_with_json_serialization_setting(self, resource_group, location, storage_account, storage_account_key): # Arrange bsc = BlobServiceClient( self.account_url(storage_account, "blob"), credential=storage_account_key) self._setup(bsc) data1 = b'{\"name\": \"owner\", \"id\": 1}' data2 = b'{\"name2\": \"owner2\"}' data = data1 + b'\n' + data2 + b'\n' + data1 # upload the json file blob_name = self._get_blob_reference() blob_client = bsc.get_blob_client(self.container_name, blob_name) blob_client.upload_blob(data, overwrite=True) errors = [] def on_error(error): errors.append(error) input_format = DelimitedJsonDialect(delimiter='\n') output_format = DelimitedJsonDialect(delimiter=';') resp = blob_client.query_blob( "SELECT name from BlobStorage", on_error=on_error, blob_format=input_format, output_format=output_format) listdata = list(resp.records()) self.assertEqual(len(errors), 0) self.assertEqual(resp._size, len(data)) self.assertEqual(listdata, [b'{"name":"owner"}',b'{}',b'{"name":"owner"}', b'']) self._teardown(bsc)
def test_quick_query_readall_with_fatal_error_ignore(self, resource_group, location, storage_account, storage_account_key): # Arrange bsc = BlobServiceClient( self.account_url(storage_account, "blob"), credential=storage_account_key) self._setup(bsc) data1 = b'{name: owner}' data2 = b'{name2: owner2}' data = data1 + b'\n' + data2 + b'\n' + data1 # upload the json file blob_name = self._get_blob_reference() blob_client = bsc.get_blob_client(self.container_name, blob_name) blob_client.upload_blob(data, overwrite=True) input_format = DelimitedJsonDialect() output_format = DelimitedTextDialect( delimiter=';', quotechar="'", lineterminator='.', escapechar='\\' ) resp = blob_client.query_blob( "SELECT * from BlobStorage", blob_format=input_format, output_format=output_format) query_result = resp.readall() self._teardown(bsc)
def test_quick_query_iter_records_with_fatal_error_ignore( self, resource_group, location, storage_account, storage_account_key): # Arrange bsc = BlobServiceClient(self.account_url(storage_account, "blob"), credential=storage_account_key) self._setup(bsc) data1 = b'{name: owner}' data2 = b'{name2: owner2}' data3 = b'{version:0,begin:1601-01-01T00:00:00.000Z,intervalSecs:3600,status:Finalized,config:' \ b'{version:0,configVersionEtag:0x8d75ef460eb1a12,numShards:1,recordsFormat:avro,formatSchemaVersion:3,' \ b'shardDistFnVersion:1},chunkFilePaths:[$blobchangefeed/log/00/1601/01/01/0000/],storageDiagnostics:' \ b'{version:0,lastModifiedTime:2019-11-01T17:53:18.861Z,' \ b'data:{aid:d305317d-a006-0042-00dd-902bbb06fc56}}}' data = data1 + b'\n' + data2 + b'\n' + data1 # upload the json file blob_name = self._get_blob_reference() blob_client = bsc.get_blob_client(self.container_name, blob_name) blob_client.upload_blob(data, overwrite=True) input_format = DelimitedJsonDialect() output_format = DelimitedTextDialect(delimiter=';', quotechar="'", lineterminator='.', escapechar='\\') resp = blob_client.query_blob("SELECT * from BlobStorage", blob_format=input_format, output_format=output_format) for record in resp.records(): print(record) self._teardown(bsc)
def test_quick_query_with_only_input_json_serialization_setting(self, storage_account_name, storage_account_key): # Arrange bsc = BlobServiceClient( self.account_url(storage_account_name, "blob"), credential=storage_account_key) self._setup(bsc) data1 = b'{\"name\": \"owner\", \"id\": 1}' data2 = b'{\"name2\": \"owner2\"}' data = data1 + data2 + data1 # upload the json file blob_name = self._get_blob_reference() blob_client = bsc.get_blob_client(self.container_name, blob_name) blob_client.upload_blob(data, overwrite=True) errors = [] def on_error(error): errors.append(error) input_format = DelimitedJsonDialect(delimiter='\n') output_format = None resp = blob_client.query_blob( "SELECT name from BlobStorage", on_error=on_error, blob_format=input_format, output_format=output_format) query_result = resp.readall() self.assertEqual(len(errors), 0) self.assertEqual(resp._size, len(data)) self.assertEqual(query_result, b'{"name":"owner"}\n{}\n{"name":"owner"}\n') self._teardown(bsc)
def query(a_query=store_conn['query_sql'], a_blob_url=store_conn['file_csv'], a_sas_key=store_conn['access_key']): """ Helper to query json and/or csv files on a Blob/Datalake """ result_set = [] start = time.perf_counter() #Get the file extension/type a_file_name, a_file_type = get_ext(a_blob_url) blob_client = BlobClient.from_blob_url(blob_url=a_blob_url + a_sas_key) if a_file_type == '.csv': qa_reader = blob_client.query_blob( a_query, blob_format=DelimitedTextDialect(has_header=True), encoding='utf-8') elif a_file_type == '.json': qa_reader = blob_client.query_blob( a_query, blob_format=DelimitedJsonDialect(delimeter=' '), encoding='utf-8', output_format=DelimitedJsonDialect(delimiter='\n')) elif a_file_type == '.parquet': qa_reader = None print("We'll do something about this") else: qa_reader = None print(f"Sorry, can't query a {a_file_type} file type") end = time.perf_counter() #Show (sarcastic voice) *usefully accurate* elapsed seconds and return records print(f"Time taken to get results {end - start} seconds") if qa_reader is None: print("No result found. Sorry human, better luck nextime ¯\_(ツ)_/¯") else: for row in qa_reader.records(): if row: result_set.append(row) return result_set
def main(): try: CONNECTION_STRING = os.environ['AZURE_STORAGE_CONNECTION_STRING'] except KeyError: print("AZURE_STORAGE_CONNECTION_STRING must be set.") sys.exit(1) blob_service_client = BlobServiceClient.from_connection_string( CONNECTION_STRING) container_name = "quickquerycontainer" container_client = blob_service_client.get_container_client(container_name) try: container_client.create_container() except: pass # [START query] errors = [] def on_error(error): errors.append(error) # upload the csv file blob_client = blob_service_client.get_blob_client(container_name, "csvfile") with open("./sample-blobs/quick_query.csv", "rb") as stream: blob_client.upload_blob(stream, overwrite=True) # select the second column of the csv file query_expression = "SELECT _2 from BlobStorage" input_format = DelimitedTextDialect(delimiter=',', quotechar='"', lineterminator='\n', escapechar="", has_header=False) output_format = DelimitedJsonDialect(delimiter='\n') reader = blob_client.query_blob(query_expression, on_error=on_error, blob_format=input_format, output_format=output_format) content = reader.readall() # [END query] print(content) container_client.delete_container()