def test_simple_ingest_from_dataframe(self, mock_pid, mock_time, mock_uuid, mock_put_message_in_queue, mock_create_blob_from_path): responses.add_callback( responses.POST, "https://ingest-somecluster.kusto.windows.net/v1/rest/mgmt", callback=request_callback, content_type="application/json", ) ingest_client = KustoIngestClient( "https://ingest-somecluster.kusto.windows.net") ingestion_properties = IngestionProperties(database="database", table="table", dataFormat=DataFormat.csv) from pandas import DataFrame fields = ["id", "name", "value"] rows = [[1, "abc", 15.3], [2, "cde", 99.9]] df = DataFrame(data=rows, columns=fields) ingest_client.ingest_from_dataframe( df, ingestion_properties=ingestion_properties) # mock_put_message_in_queue assert mock_put_message_in_queue.call_count == 1 put_message_in_queue_mock_kwargs = mock_put_message_in_queue.call_args_list[ 0][1] assert put_message_in_queue_mock_kwargs[ "queue_name"] == "readyforaggregation-secured" queued_message = base64.b64decode( put_message_in_queue_mock_kwargs["content"].encode( "utf-8")).decode("utf-8") queued_message_json = json.loads(queued_message) # mock_create_blob_from_stream assert ( queued_message_json["BlobPath"] == "https://storageaccount.blob.core.windows.net/tempstorage/database__table__1111-111111-111111-1111__df_100_64.csv.gz?sas" ) assert queued_message_json["DatabaseName"] == "database" assert queued_message_json["IgnoreSizeLimit"] == False assert queued_message_json["AdditionalProperties"]["format"] == "csv" assert queued_message_json["FlushImmediately"] == False assert queued_message_json["TableName"] == "table" assert queued_message_json["RawDataSize"] > 0 assert queued_message_json["RetainBlobOnSuccess"] == True create_blob_from_path_mock_kwargs = mock_create_blob_from_path.call_args_list[ 0][1] import tempfile assert create_blob_from_path_mock_kwargs[ "container_name"] == "tempstorage" assert create_blob_from_path_mock_kwargs["file_path"] == os.path.join( tempfile.gettempdir(), "df_100_64.csv.gz") assert (create_blob_from_path_mock_kwargs["blob_name"] == "database__table__1111-111111-111111-1111__df_100_64.csv.gz")
def test_simple_ingest_from_dataframe(self, mock_pid, mock_time, mock_uuid, mock_put_message_in_queue, mock_upload_blob_from_stream): responses.add_callback( responses.POST, "https://ingest-somecluster.kusto.windows.net/v1/rest/mgmt", callback=request_callback, content_type="application/json") ingest_client = KustoIngestClient( "https://ingest-somecluster.kusto.windows.net") ingestion_properties = IngestionProperties(database="database", table="table", data_format=DataFormat.CSV) from pandas import DataFrame fields = ["id", "name", "value"] rows = [[1, "abc", 15.3], [2, "cde", 99.9]] df = DataFrame(data=rows, columns=fields) ingest_client.ingest_from_dataframe( df, ingestion_properties=ingestion_properties) # mock_put_message_in_queue assert mock_put_message_in_queue.call_count == 1 put_message_in_queue_mock_kwargs = mock_put_message_in_queue.call_args_list[ 0][1] queued_message_json = json.loads( put_message_in_queue_mock_kwargs["content"]) expected_url = "https://storageaccount.blob.core.windows.net/tempstorage/database__table__1111-111111-111111-1111__df_{0}_100_64.csv.gz?".format( id(df)) # mock_upload_blob_from_stream # not checking the query string because it can change order, just checking it's there assert queued_message_json["BlobPath"].startswith(expected_url) is True assert len(queued_message_json["BlobPath"]) > len(expected_url) assert queued_message_json["DatabaseName"] == "database" assert queued_message_json["IgnoreSizeLimit"] is False assert queued_message_json["AdditionalProperties"]["format"] == "csv" assert queued_message_json["FlushImmediately"] is False assert queued_message_json["TableName"] == "table" assert queued_message_json["RawDataSize"] > 0 assert queued_message_json["RetainBlobOnSuccess"] is True upload_blob_kwargs = mock_upload_blob_from_stream.call_args_list[0][1] assert type(upload_blob_kwargs["data"]) == io.BufferedReader
client.ingest_from_file("{filename}.csv", ingestion_properties=ingestion_props) # ingest from blob blob_descriptor = BlobDescriptor("https://{path_to_blob}.csv.gz?sas", 10) # 10 is the raw size of the data in bytes. client.ingest_from_blob(blob_descriptor, ingestion_properties=ingestion_props) # ingest from dataframe import pandas fields = ["id", "name", "value"] rows = [[1, "abc", 15.3], [2, "cde", 99.9]] df = pandas.DataFrame(data=rows, columns=fields) client.ingest_from_dataframe(df, ingestion_properties=ingestion_props) # ingest a whole folder. import os path = "folder/path" [client.ingest_from_file(f, ingestion_properties=ingestion_props) for f in os.listdir(path)] ################################################################## ## INGESTION STATUS ## ################################################################## # if status updates are required, something like this can be done import pprint import time from azure.kusto.ingest.status import KustoIngestStatusQueues