def test_sanity_ingest_from_file(self, mock_uuid, mock_put_message_in_queue, mock_upload_blob_from_stream, mock_aad): responses.add_callback( responses.POST, "https://ingest-somecluster.kusto.windows.net/v1/rest/mgmt", callback=request_callback, content_type="application/json" ) ingest_client = QueuedIngestClient("https://ingest-somecluster.kusto.windows.net") ingestion_properties = IngestionProperties(database="database", table="table", data_format=DataFormat.CSV) # ensure test can work when executed from within directories current_dir = os.getcwd() path_parts = ["azure-kusto-ingest", "tests", "input", "dataset.csv"] missing_path_parts = [] for path_part in path_parts: if path_part not in current_dir: missing_path_parts.append(path_part) file_path = os.path.join(current_dir, *missing_path_parts) ingest_client.ingest_from_file(file_path, ingestion_properties=ingestion_properties) # mock_put_message_in_queue assert mock_put_message_in_queue.call_count == 1 put_message_in_queue_mock_kwargs = mock_put_message_in_queue.call_args_list[0][1] queued_message_json = json.loads(put_message_in_queue_mock_kwargs["content"]) expected_url = "https://storageaccount.blob.core.windows.net/tempstorage/database__table__1111-111111-111111-1111__dataset.csv.gz?" # mock_upload_blob_from_stream # not checking the query string because it can change order, just checking it's there assert queued_message_json["BlobPath"].startswith(expected_url) is True assert len(queued_message_json["BlobPath"]) > len(expected_url) assert queued_message_json["DatabaseName"] == "database" assert queued_message_json["IgnoreSizeLimit"] is False assert queued_message_json["AdditionalProperties"]["format"] == "csv" assert queued_message_json["FlushImmediately"] is False assert queued_message_json["TableName"] == "table" assert queued_message_json["RawDataSize"] > 0 assert queued_message_json["RetainBlobOnSuccess"] is True upload_blob_kwargs = mock_upload_blob_from_stream.call_args_list[0][1] assert type(upload_blob_kwargs["data"]) == io.BytesIO
def test_ingest_from_file_wrong_endpoint(self): responses.add_callback( responses.POST, "https://somecluster.kusto.windows.net/v1/rest/mgmt", callback=request_error_callback, content_type="application/json" ) ingest_client = QueuedIngestClient("https://somecluster.kusto.windows.net") ingestion_properties = IngestionProperties(database="database", table="table", data_format=DataFormat.CSV) current_dir = os.getcwd() path_parts = ["azure-kusto-ingest", "tests", "input", "dataset.csv"] missing_path_parts = [] for path_part in path_parts: if path_part not in current_dir: missing_path_parts.append(path_part) file_path = os.path.join(current_dir, *missing_path_parts) with self.assertRaises(KustoInvalidEndpointError) as ex: ingest_client.ingest_from_file(file_path, ingestion_properties=ingestion_properties) self.assertEqual( ex.exception.args[0], "You are using 'DataManagement' client type, but the provided endpoint is of ServiceType 'Engine'. Initialize the client with the appropriate endpoint URI: 'https://ingest-somecluster.kusto.windows.net'", "Expected exception was not raised", )
# there are a lot of useful properties, make sure to go over docs and check them out ingestion_props = IngestionProperties( database="{database_name}", table="{table_name}", data_format=DataFormat.CSV, # in case status update for success are also required # report_level=ReportLevel.FailuresAndSuccesses, # in case a mapping is required # ingestion_mapping_reference="{json_mapping_that_already_exists_on_table}" # ingestion_mapping_type=IngestionMappingType.JSON ) # ingest from file file_descriptor = FileDescriptor( "{filename}.csv", 3333) # 3333 is the raw size of the data in bytes. client.ingest_from_file(file_descriptor, ingestion_properties=ingestion_props) client.ingest_from_file("{filename}.csv", ingestion_properties=ingestion_props) # ingest from blob blob_descriptor = BlobDescriptor( "https://{path_to_blob}.csv.gz?sp=rl&st=2020-05-20T13:38:37Z&se=2020-05-21T13:38:37Z&sv=2019-10-10&sr=c&sig=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", 10, ) # 10 is the raw size of the data in bytes. client.ingest_from_blob(blob_descriptor, ingestion_properties=ingestion_props) # ingest from dataframe import pandas fields = ["id", "name", "value"] rows = [[1, "abc", 15.3], [2, "cde", 99.9]]