def test_blob_ingestion(self, mock_uuid, mock_put_message_in_queue,
                            mock_aad):
        responses.add_callback(
            responses.POST,
            "https://ingest-somecluster.kusto.windows.net/v1/rest/mgmt",
            callback=queued_request_callback,
            content_type="application/json")

        ingest_client = ManagedStreamingIngestClient.from_dm_kcsb(
            "https://ingest-somecluster.kusto.windows.net")
        ingestion_properties = IngestionProperties(database="database",
                                                   table="table")

        blob_path = (
            "https://storageaccount.blob.core.windows.net/tempstorage/database__table__11111111-1111-1111-1111-111111111111__tmpbvk40leg?sp=rl&st=2020-05-20T13"
            "%3A38%3A37Z&se=2020-05-21T13%3A38%3A37Z&sv=2019-10-10&sr=c&sig=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx "
        )
        result = ingest_client.ingest_from_blob(
            BlobDescriptor(blob_path, 1),
            ingestion_properties=ingestion_properties)

        assert result.status == IngestionStatus.QUEUED

        assert_queued_upload(
            mock_put_message_in_queue,
            mock_upload_blob_from_stream=None,
            expected_url=
            "https://storageaccount.blob.core.windows.net/tempstorage/database__table__11111111-1111-1111-1111-111111111111__tmpbvk40leg?",
        )
    def test_fallback_big_file(self, mock_uuid, mock_put_message_in_queue,
                               mock_upload_blob_from_stream, mock_aad):
        responses.add_callback(
            responses.POST,
            "https://ingest-somecluster.kusto.windows.net/v1/rest/mgmt",
            callback=queued_request_callback,
            content_type="application/json")
        responses.add_callback(
            responses.POST,
            "https://somecluster.kusto.windows.net/v1/rest/ingest/database/table",
            callback=streaming_request_callback,
            content_type="application/json",
        )

        data_format = DataFormat.ORC  # Using orc to avoid compression
        ingest_client = ManagedStreamingIngestClient.from_dm_kcsb(
            "https://ingest-somecluster.kusto.windows.net")
        ingestion_properties = IngestionProperties(database="database",
                                                   table="table",
                                                   data_format=data_format)

        initial_bytes = bytearray(os.urandom(5 * 1024 * 1024))

        def check_bytes(data, **kwargs):
            assert kwargs["timeout"] == 10 * 60
            assert data.read() == initial_bytes

        mock_upload_blob_from_stream.side_effect = check_bytes

        f = NamedTemporaryFile(dir=".", mode="wb", delete=False)
        try:
            f.write(initial_bytes)
            f.close()
            result = ingest_client.ingest_from_file(
                f.name, ingestion_properties=ingestion_properties)
        finally:
            os.unlink(f.name)

        assert result.status == IngestionStatus.QUEUED

        assert_queued_upload(
            mock_put_message_in_queue,
            mock_upload_blob_from_stream,
            "https://storageaccount.blob.core.windows.net/tempstorage/database__table__11111111-1111-1111-1111-111111111111__{}?"
            .format(os.path.basename(f.name)),
            format=data_format.kusto_value,
        )

        mock_upload_blob_from_stream.assert_called()
    def test_fallback_transient_errors_limit(self, mock_uuid,
                                             mock_put_message_in_queue,
                                             mock_upload_blob_from_stream,
                                             mock_aad):
        total_attempts = 3

        responses.add_callback(
            responses.POST,
            "https://ingest-somecluster.kusto.windows.net/v1/rest/mgmt",
            callback=queued_request_callback,
            content_type="application/json")

        ingest_client = ManagedStreamingIngestClient.from_engine_kcsb(
            "https://somecluster.kusto.windows.net")
        ingest_client._set_retry_settings(0)
        ingestion_properties = IngestionProperties(database="database",
                                                   table="table")

        helper = TransientResponseHelper(times_to_fail=total_attempts)
        responses.add_callback(
            responses.POST,
            "https://somecluster.kusto.windows.net/v1/rest/ingest/database/table",
            callback=lambda request: transient_error_callback(helper, request),
            content_type="application/json",
        )

        # ensure test can work when executed from within directories
        current_dir = os.getcwd()
        path_parts = ["azure-kusto-ingest", "tests", "input", "dataset.csv"]
        missing_path_parts = []
        for path_part in path_parts:
            if path_part not in current_dir:
                missing_path_parts.append(path_part)

        file_path = os.path.join(current_dir, *missing_path_parts)

        result = ingest_client.ingest_from_file(
            file_path, ingestion_properties=ingestion_properties)

        assert result.status == IngestionStatus.QUEUED

        assert_queued_upload(
            mock_put_message_in_queue,
            mock_upload_blob_from_stream,
            "https://storageaccount.blob.core.windows.net/tempstorage/database__table__11111111-1111-1111-1111-111111111111__dataset.csv.gz?",
        )

        assert helper.total_calls == total_attempts
    def test_fallback_big_stream(self, mock_uuid, mock_put_message_in_queue,
                                 mock_upload_blob_from_stream, mock_aad):
        responses.add_callback(
            responses.POST,
            "https://ingest-somecluster.kusto.windows.net/v1/rest/mgmt",
            callback=queued_request_callback,
            content_type="application/json")
        responses.add_callback(
            responses.POST,
            "https://somecluster.kusto.windows.net/v1/rest/ingest/database/table",
            callback=streaming_request_callback,
            content_type="application/json",
        )

        data_format = DataFormat.ORC  # Using orc to avoid compression
        ingest_client = ManagedStreamingIngestClient.from_engine_kcsb(
            "https://somecluster.kusto.windows.net")
        ingestion_properties = IngestionProperties(database="database",
                                                   table="table",
                                                   data_format=data_format)

        initial_bytes = bytearray(os.urandom(5 * 1024 * 1024))
        stream = io.BytesIO(initial_bytes)

        def check_bytes(data, **kwargs):
            assert kwargs["timeout"] == 10 * 60
            assert data.read() == initial_bytes

        mock_upload_blob_from_stream.side_effect = check_bytes

        result = ingest_client.ingest_from_stream(
            stream, ingestion_properties=ingestion_properties)

        assert result.status == IngestionStatus.QUEUED

        assert_queued_upload(
            mock_put_message_in_queue,
            mock_upload_blob_from_stream,
            "https://storageaccount.blob.core.windows.net/tempstorage/database__table__11111111-1111-1111-1111-111111111111__stream?",
            format=data_format.kusto_value,
            check_raw_data=False,
        )

        mock_upload_blob_from_stream.assert_called()
    def test_permanent_error(self):
        responses.add(
            responses.POST,
            "https://somecluster.kusto.windows.net/v1/rest/ingest/database/table",
            status=400,
            json={
                "error": {
                    "code":
                    "BadRequest",
                    "message":
                    "Request is invalid and cannot be executed.",
                    "@type":
                    "Kusto.Common.Svc.Exceptions.AdminCommandWrongEndpointException",
                    "@message":
                    "Cannot get ingestion resources from this service endpoint. The appropriate endpoint is most likely "
                    "'https://ingest-somecluster.kusto.windows.net/'.",
                    "@context": {
                        "timestamp":
                        "2021-10-12T06:05:35.6602087Z",
                        "serviceAlias":
                        "SomeCluster",
                        "machineName":
                        "KEngine000000",
                        "processName":
                        "Kusto.WinSvc.Svc",
                        "processId":
                        2648,
                        "threadId":
                        472,
                        "appDomainName":
                        "Kusto.WinSvc.Svc.exe",
                        "clientRequestId":
                        "KPC.execute;a3dfb878-9d2b-49d6-89a5-e9b3a9f1f674",
                        "activityId":
                        "87eb8fc9-78b3-4580-bcc8-6c90482f9118",
                        "subActivityId":
                        "bbfb038b-4467-4f96-afd4-945904fc6278",
                        "activityType":
                        "DN.AdminCommand.IngestionResourcesGetCommand",
                        "parentActivityId":
                        "00e678e9-4204-4143-8c94-6afd94c27430",
                        "activityStack":
                        "(Activity stack: CRID=KPC.execute;a3dfb878-9d2b-49d6-89a5-e9b3a9f1f674 ARID=87eb8fc9-78b3-4580-bcc8-6c90482f9118 > DN.Admin.Client.ExecuteControlCommand/833dfb85-5d67-44b7-882d-eb2283e65780 > P.WCF.Service.ExecuteControlCommand..IInterNodeCommunicationAdminContract/3784e74f-1d89-4c15-adef-0a360c4c431e > DN.FE.ExecuteControlCommand/00e678e9-4204-4143-8c94-6afd94c27430 > DN.AdminCommand.IngestionResourcesGetCommand/bbfb038b-4467-4f96-afd4-945904fc6278)",
                    },
                    "@permanent":
                    True,
                }
            },
            content_type="application/json",
        )

        ingest_client = ManagedStreamingIngestClient.from_dm_kcsb(
            "https://ingest-somecluster.kusto.windows.net")
        ingestion_properties = IngestionProperties(database="database",
                                                   table="table",
                                                   data_format=DataFormat.CSV)

        current_dir = os.getcwd()
        path_parts = ["azure-kusto-ingest", "tests", "input", "dataset.csv"]
        missing_path_parts = []
        for path_part in path_parts:
            if path_part not in current_dir:
                missing_path_parts.append(path_part)

        file_path = os.path.join(current_dir, *missing_path_parts)

        with pytest.raises(KustoApiError) as ex:
            ingest_client.ingest_from_file(
                file_path, ingestion_properties=ingestion_properties)
            assert ex.value.get_api_error().permanent == True
Beispiel #6
0
str_stream = io.StringIO(str_sequence)
client.ingest_from_stream(str_stream,
                          ingestion_properties=ingestion_properties)

##################################################################
##                NANAGED STREAMING INGEST                      ##
##################################################################

# Managed streaming ingest client will try to use streaming ingestion for performance, but will fall back to queued ingestion if unable.
dm_cluster = "https://ingest-{cluster_name}.kusto.windows.net"

kcsb = KustoConnectionStringBuilder.with_aad_application_key_authentication(
    dm_cluster, client_id, client_secret, authority_id)

# Create it from a dm connection string
client = ManagedStreamingIngestClient.from_dm_kcsb(kcsb)
# or an engine connection string, like a streaming ingestion client with `from_engine_kcsb`
# or provide both: `ManagedStreamingIngestClient(engine_kcsb, dm_kcsb)`

# use client as you would a streaming or queued ingestion client

byte_sequence = b"56,56,56"
bytes_stream = io.BytesIO(byte_sequence)
client.ingest_from_stream(bytes_stream,
                          ingestion_properties=ingestion_properties)

ingestion_properties = IngestionProperties(database="{database_name}",
                                           table="{table_name}",
                                           data_format=DataFormat.CSV)

# ingest from file