예제 #1
0
    def test_pop_unbalanced_queues(self):
        client = QueuedIngestClient("some-cluster")

        fake_receive = fake_receive_factory(
            lambda queue_name, messages_per_page=1:
            [mock_message(success=False) for _ in range(0, messages_per_page)]
            if "1" in queue_name else [])
        with mock.patch.object(
                client._resource_manager,
                "get_successful_ingestions_queues"), mock.patch.object(
                    client._resource_manager, "get_failed_ingestions_queues"
                ) as mocked_get_failed_qs, mock.patch.object(
                    QueueClient,
                    "receive_messages",
                    autospec=True,
                    side_effect=fake_receive,
                ) as q_receive_mock, mock.patch.object(QueueClient,
                                                       "delete_message",
                                                       return_value=None):

            fake_failed_queue1 = _ResourceUri(
                "mocked_storage_account_f1",
                OBJECT_TYPE,
                "queue",
                "mocked_qf_1_name",
                ENDPOINT_SUFFIX,
            )
            fake_failed_queue2 = _ResourceUri(
                "mocked_storage_account_f2",
                OBJECT_TYPE,
                "queue",
                "mocked_qf_2_name",
                ENDPOINT_SUFFIX,
            )

            mocked_get_failed_qs.return_value = [
                fake_failed_queue1, fake_failed_queue2
            ]

            qs = KustoIngestStatusQueues(client)

            get_failure_actual = qs.failure.pop(6)

            assert len(get_failure_actual) == 6

            for m in get_failure_actual:
                assert isinstance(m, FailureMessage)

            assert q_receive_mock.call_count == 3

            actual = {}

            for call_args in q_receive_mock.call_args_list:
                actual[call_args[0][0].queue_name] = actual.get(
                    call_args[0][0].queue_name,
                    0) + call_args[1]["messages_per_page"]

            assert actual[fake_failed_queue2.object_name] + actual[
                fake_failed_queue1.object_name] == (4 + 4 + 6)
예제 #2
0
def getStatusQueue(client):
    statusQueue = None
    try:
        statusQueue = KustoIngestStatusQueues(client)
        logging.info("Initialized status queue successfully.")
    except Exception as e:
        logging.error("Error initializing status queue:%s"%e)
    return statusQueue
예제 #3
0
    def test_isempty(self):
        client = QueuedIngestClient("some-cluster")

        fake_peek = fake_peek_factory(lambda queue_name, num_messages=1: [
            mock_message(success=True) for _ in range(0, num_messages)
        ] if "qs" in queue_name else [])
        with mock.patch.object(client._resource_manager,
                               "get_successful_ingestions_queues"
                               ) as mocked_get_success_qs, mock.patch.object(
                                   client._resource_manager,
                                   "get_failed_ingestions_queues"
                               ) as mocked_get_failed_qs, mock.patch.object(
                                   QueueClient,
                                   "peek_messages",
                                   autospec=True,
                                   side_effect=fake_peek) as q_mock:
            fake_failed_queue = _ResourceUri(
                "mocked_storage_account1",
                OBJECT_TYPE,
                "queue",
                "mocked_qf_name",
                ENDPOINT_SUFFIX,
            )
            fake_success_queue = _ResourceUri(
                "mocked_storage_account2",
                OBJECT_TYPE,
                "queue",
                "mocked_qs_name",
                ENDPOINT_SUFFIX,
            )

            mocked_get_success_qs.return_value = [fake_success_queue]
            mocked_get_failed_qs.return_value = [fake_failed_queue]

            qs = KustoIngestStatusQueues(client)

            assert qs.success.is_empty() is False
            assert qs.failure.is_empty() is True

            assert q_mock.call_count == 2
            assert q_mock.call_args_list[0][1]["max_messages"] == 2
            assert q_mock.call_args_list[1][1]["max_messages"] == 2
    def test_isempty(self, mocked_q_peek_messages):
        client = KustoIngestClient("some-cluster")
        with mock.patch.object(
                client._resource_manager, "get_successful_ingestions_queues"
        ) as mocked_get_success_qs, mock.patch.object(
                client._resource_manager,
                "get_failed_ingestions_queues") as mocked_get_failed_qs:

            fake_failed_queue = _ResourceUri("mocked_storage_account1",
                                             "queue", "mocked_qf_name",
                                             "mocked_sas")
            fake_success_queue = _ResourceUri("mocked_storage_account2",
                                              "queue", "mocked_qs_name",
                                              "mocked_sas")

            mocked_get_success_qs.return_value = [fake_success_queue]
            mocked_get_failed_qs.return_value = [fake_failed_queue]

            mocked_q_peek_messages.side_effect = (
                lambda queue_name, num_messages=1: []
                if queue_name == fake_failed_queue.object_name else
                [QueueMessage() for _ in range(0, num_messages)])

            qs = KustoIngestStatusQueues(client)

            assert qs.success.is_empty() == False
            assert qs.failure.is_empty() == True

            assert mocked_q_peek_messages.call_count == 2
            assert mocked_q_peek_messages.call_args_list[0][0][
                0] == fake_success_queue.object_name
            assert mocked_q_peek_messages.call_args_list[0][1][
                "num_messages"] == 2

            assert mocked_q_peek_messages.call_args_list[1][0][
                0] == fake_failed_queue.object_name
            assert mocked_q_peek_messages.call_args_list[1][1][
                "num_messages"] == 2
예제 #5
0
                              jsonPath="$.xtime",
                              cslDataType="timespan"))
        mappings.append(
            JsonColumnMapping(columnName="xtextWithNulls",
                              jsonPath="$.xtextWithNulls",
                              cslDataType="string"))
        mappings.append(
            JsonColumnMapping(columnName="xdynamicWithNulls",
                              jsonPath="$.xdynamicWithNulls",
                              cslDataType="dynamic"))
        return mappings


client = KustoClient("https://toshetah.kusto.windows.net")
ingest_client = KustoIngestClient("https://ingest-toshetah.kusto.windows.net")
ingest_status_q = KustoIngestStatusQueues(ingest_client)
client.execute("PythonTest", ".drop table Deft ifexists")


@pytest.mark.run(order=1)
def test_csv_ingest_non_existing_table():
    csv_ingest_props = IngestionProperties(
        "PythonTest",
        "Deft",
        dataFormat=DataFormat.csv,
        mapping=Helpers.create_deft_table_csv_mappings(),
        reportLevel=ReportLevel.FailuresAndSuccesses,
    )
    csv_file_path = os.path.join(os.getcwd(), "azure-kusto-ingest", "tests",
                                 "input", "dataset.csv")
    zipped_csv_file_path = os.path.join(os.getcwd(), "azure-kusto-ingest",
예제 #6
0
# ingest a whole folder.
import os

path = "folder/path"
[client.ingest_from_file(f, ingestion_properties=ingestion_props) for f in os.listdir(path)]

##################################################################
##                        INGESTION STATUS                      ##
##################################################################

# if status updates are required, something like this can be done
import pprint
import time
from azure.kusto.ingest.status import KustoIngestStatusQueues

qs = KustoIngestStatusQueues(client)

MAX_BACKOFF = 180

backoff = 1
while True:
    ################### NOTICE ####################
    # in order to get success status updates,
    # make sure ingestion properties set the
    # reportLevel=ReportLevel.FailuresAndSuccesses.
    if qs.success.is_empty() and qs.failure.is_empty():
        time.sleep(backoff)
        backoff = min(backoff * 2, MAX_BACKOFF)
        print("No new messages. backing off for {} seconds".format(backoff))
        continue
예제 #7
0
def update_ADX_ingest_status(tc):

    KCSB_INGEST = KustoConnectionStringBuilder.with_aad_device_authentication(
        DATA_INGESTION_URI)
    KCSB_INGEST.authority_id = APP_AAD_TENANT_ID
    INGESTION_CLIENT = KustoIngestClient(KCSB_INGEST)
    qs = KustoIngestStatusQueues(INGESTION_CLIENT)

    run_id = (str(uuid.uuid4()))[31:].upper()
    MAX_BACKOFF = 8
    backoff = 1

    total_queue_success_messages = 0
    while True:
        ################### NOTICE ####################
        # in order to get success status updates,
        # make sure ingestion properties set the
        # reportLevel=ReportLevel.FailuresAndSuccesses.
        if qs.success.is_empty() and qs.failure.is_empty():
            time.sleep(backoff)

            if backoff == 1 and total_queue_success_messages != 0:
                print(
                    "{} RUN_ID:{}  Processed {} message in this batch ".format(
                        LOG_MESSAGE_HEADER, run_id,
                        total_queue_success_messages))

            backoff = min(backoff * 2, MAX_BACKOFF)
            if (backoff < MAX_BACKOFF):
                #print("{} No new messages. backing off for {} seconds".format(LOG_MESSAGE_HEADER,backoff))
                continue
            if (backoff == MAX_BACKOFF):
                #print("{} Reach max waiting time {}, exit.".format(LOG_MESSAGE_HEADER,backoff))
                break

        backoff = 1

        success_messages = qs.success.pop(15)
        failure_messages = qs.failure.pop(15)

        total_success = 0
        total_failure = 0
        if success_messages is not None:
            if (len(success_messages) > 0):
                tc.track_trace("{} Get {} success ingest messages ".format(
                    LOG_MESSAGE_HEADER, str(len(success_messages))))
                total_success = len(success_messages)
        if failure_messages is not None:
            if (len(failure_messages) > 0):
                tc.track_trace("{} Get {} failure  ingest messages ".format(
                    LOG_MESSAGE_HEADER, str(len(failure_messages))))
                total_failure = len(failure_messages)
        tc.flush()
        total_queue_success_messages += len(success_messages)
        count_success = 0
        count_faulure = 0
        for smsg in success_messages:
            file_path = get_file_path(smsg.IngestionSourcePath)
            container_name = get_container_name(smsg.IngestionSourcePath)
            count_success += 1
            log_msg = "{} SUCCESS TO INGEST TO ADX <{}> -[{}/{}/{}] , Time: {}, vm_uuid: {}, source_id:{},  file path: {}".format(
                LOG_MESSAGE_HEADER, run_id, str(count_success),
                str(total_success),
                str(total_queue_success_messages), smsg.SucceededOn,
                get_vm_uuid_from_filename(file_path), smsg.IngestionSourceId,
                file_path)
            tc.track_trace(log_msg)
            tc.track_event(
                APP_INSIGHT_INGEST_SUCCESS_EVENT_NAME, {
                    'MESSAGE': 'SUCCESS TO Ingest ADX',
                    'file_path': file_path,
                    'source_id': smsg.IngestionSourceId
                }, {})
            tc.flush()
            update_COSMOS_status(COSMOS_CLIENT, file_path, smsg.SucceededOn,
                                 SUCCESS_STATUS, str(smsg),
                                 get_vm_uuid_from_filename(file_path),
                                 smsg.IngestionSourceId, container_name, tc,
                                 count_success, run_id)

            telemetry_block_blob_service = BlockBlobService(
                account_name=SOURCE_TELEMETRY_BLOB_ACCOUNT,
                account_key=SOURCE_TELEMETRY_FILE_BLOB_KEY)

            target_file_path = ''
            if (PROCESSED_TELEMETRY_FOLDER.endswith('/')):
                target_file_path = PROCESSED_TELEMETRY_FOLDER + file_path
            else:
                target_file_path = PROCESSED_TELEMETRY_FOLDER + '/' + file_path

            move_processed_file(telemetry_block_blob_service, container_name,
                                file_path, container_name, target_file_path,
                                tc)
            tc.track_trace(
                '{} DONE ADX INGESTION PROCESS <{}> -[{}/{}/{}], File Moved to processed folder {} , vm_uuid: {}, file path: {}'
                .format(LOG_MESSAGE_HEADER, run_id, str(count_success),
                        str(total_success),
                        str(total_queue_success_messages), target_file_path,
                        get_vm_uuid_from_filename(file_path), file_path))
            tc.track_event(
                APP_INSIGHT_INGEST_SUCCESS_EVENT_NAME, {
                    'MESSAGE': 'DONE ADX INGESTION PROCESS',
                    'moved_file_path': target_file_path,
                    'source_file_path': file_path
                }, {})
            tc.flush()
            #smsgjson=json.loads(smsg)
            #print (smsgjson['IngestionSourcePath'])
            #print (smsgjson['SucceededOn'])
            print("{} IngestionSourcePath: {}".format(
                LOG_MESSAGE_HEADER, smsg.IngestionSourcePath))
            print(smsg.SucceededOn)
        for fmsg in failure_messages:
            container_name = get_container_name(fmsg.IngestionSourcePath)
            file_path = get_file_path(fmsg.IngestionSourcePath)
            count_faulure += 1
            log_msg = "{} FAILED TO INGEST TO ADX <{}> -[{}/{}] , Time: {}, vm_uuid: {}, source_id:{}, container:{},  file path: {}, message: {}".format(
                LOG_MESSAGE_HEADER, run_id, str(count_faulure),
                str(total_failure), fmsg.FailedOn,
                get_vm_uuid_from_filename(file_path), fmsg.IngestionSourceId,
                container_name, file_path, str(fmsg))
            tc.track_trace(log_msg)
            tc.track_event(
                APP_INSIGHT_INGEST_FAILURE_EVENT_NAME, {
                    'MESSAGE': 'FAILED TO Ingest ADX',
                    'file_path': file_path,
                    'source_id': fmsg.IngestionSourceId
                }, {})
            tc.flush()
            update_COSMOS_status(COSMOS_CLIENT, file_path, fmsg.FailedOn,
                                 FAILURE_STATUS, str(fmsg),
                                 get_vm_uuid_from_filename(file_path),
                                 fmsg.IngestionSourceId, container_name, tc,
                                 count_faulure, run_id)
예제 #8
0
        """Queued blob '{FILE_NAME}' ({FILE_SIZE} bytes) for ingestion into ADX table '{DESTINATION_TABLE}'"""
        .format(FILE_NAME=file_name,
                FILE_SIZE=file_size,
                DESTINATION_TABLE=destination_table))

    # query = """{} | count""".format(destination_table)

    # response = kusto_client.execute_query(kusto_database, query)

    # count_query_df = dataframe_from_result_table(response.primary_results[0])
    # print(count_query_df)
    #break

#NOTE: uncomment this to check the status message logs

qs = KustoIngestStatusQueues(ingestion_client)

MAX_BACKOFF = 180

backoff = 1
while True:
    ################### NOTICE ####################
    # in order to get success status updates,
    # make sure ingestion properties set the
    # reportLevel=ReportLevel.FailuresAndSuccesses.
    if qs.success.is_empty() and qs.failure.is_empty():
        time.sleep(backoff)
        backoff = min(backoff * 2, MAX_BACKOFF)
        print("No new messages. Backing off for {} seconds".format(backoff))
        continue
예제 #9
0
    def test_init(self):
        client = QueuedIngestClient("some-cluster")
        qs = KustoIngestStatusQueues(client)

        assert qs.success.message_cls == SuccessMessage
        assert qs.failure.message_cls == FailureMessage
예제 #10
0
    def test_pop(self):
        client = QueuedIngestClient("some-cluster")

        fake_receive = fake_receive_factory(
            lambda queue_name, num_messages=1: [
                mock_message(success=True) if "qs" in queue_name else
                mock_message(success=False) for _ in range(0, num_messages)
            ])

        with mock.patch.object(client._resource_manager,
                               "get_successful_ingestions_queues"
                               ) as mocked_get_success_qs, mock.patch.object(
                                   client._resource_manager,
                                   "get_failed_ingestions_queues"
                               ) as mocked_get_failed_qs, mock.patch.object(
                                   QueueClient,
                                   "receive_messages",
                                   autospec=True,
                                   side_effect=fake_receive,
                               ) as q_receive_mock, mock.patch.object(
                                   QueueClient,
                                   "delete_message",
                                   return_value=None) as q_del_mock:

            fake_failed_queue1 = _ResourceUri(
                "mocked_storage_account_f1",
                OBJECT_TYPE,
                "queue",
                "mocked_qf_1_name",
                ENDPOINT_SUFFIX,
            )
            fake_failed_queue2 = _ResourceUri(
                "mocked_storage_account_f2",
                OBJECT_TYPE,
                "queue",
                "mocked_qf_2_name",
                ENDPOINT_SUFFIX,
            )
            fake_success_queue = _ResourceUri(
                "mocked_storage_account2",
                OBJECT_TYPE,
                "queue",
                "mocked_qs_name",
                ENDPOINT_SUFFIX,
            )

            mocked_get_success_qs.return_value = [fake_success_queue]
            mocked_get_failed_qs.return_value = [
                fake_failed_queue1, fake_failed_queue2
            ]

            qs = KustoIngestStatusQueues(client)

            get_success_actual = qs.success.pop()
            get_failure_actual = qs.failure.pop(6)

            assert len(get_success_actual) == 1
            assert len(get_failure_actual) == 6

            for m in get_failure_actual:
                assert isinstance(m, FailureMessage)

            for m in get_success_actual:
                assert isinstance(m, SuccessMessage)

            assert q_receive_mock.call_count == 3
            assert q_del_mock.call_count == len(get_success_actual) + len(
                get_failure_actual)

            assert q_receive_mock.call_args_list[0][1][
                "messages_per_page"] == 2

            actual = {
                q_receive_mock.call_args_list[1][0][0].queue_name:
                q_receive_mock.call_args_list[1][1]["messages_per_page"],
                q_receive_mock.call_args_list[2][0][0].queue_name:
                q_receive_mock.call_args_list[2][1]["messages_per_page"],
            }

            assert actual[fake_failed_queue2.object_name] == 4
            assert actual[fake_failed_queue1.object_name] == 4
예제 #11
0
    def test_peek(self):
        client = QueuedIngestClient("some-cluster")

        fake_peek = fake_peek_factory(lambda queue_name, num_messages=1: [
            mock_message(success=True) if "qs" in queue_name else mock_message(
                success=False) for _ in range(0, num_messages)
        ])

        with mock.patch.object(client._resource_manager,
                               "get_successful_ingestions_queues"
                               ) as mocked_get_success_qs, mock.patch.object(
                                   client._resource_manager,
                                   "get_failed_ingestions_queues"
                               ) as mocked_get_failed_qs, mock.patch.object(
                                   QueueClient,
                                   "peek_messages",
                                   autospec=True,
                                   side_effect=fake_peek) as q_mock:

            fake_failed_queue1 = _ResourceUri(
                "mocked_storage_account_f1",
                OBJECT_TYPE,
                "queue",
                "mocked_qf_1_name",
                ENDPOINT_SUFFIX,
            )
            fake_failed_queue2 = _ResourceUri(
                "mocked_storage_account_f2",
                OBJECT_TYPE,
                "queue",
                "mocked_qf_2_name",
                ENDPOINT_SUFFIX,
            )
            fake_success_queue = _ResourceUri(
                "mocked_storage_account2",
                OBJECT_TYPE,
                "queue",
                "mocked_qs_name",
                ENDPOINT_SUFFIX,
            )

            mocked_get_success_qs.return_value = [fake_success_queue]
            mocked_get_failed_qs.return_value = [
                fake_failed_queue1, fake_failed_queue2
            ]

            qs = KustoIngestStatusQueues(client)

            peek_success_actual = qs.success.peek()
            peek_failure_actual = qs.failure.peek(6)

            assert len(peek_success_actual) == 1

            for m in peek_failure_actual:
                assert isinstance(m, FailureMessage) is True

            for m in peek_success_actual:
                assert isinstance(m, SuccessMessage) is True

            assert len(peek_failure_actual) == 6

            actual = {}

            assert len(QueueClient.peek_messages.call_args_list) == 3

            for call_args in q_mock.call_args_list:
                actual[call_args[0][0].queue_name] = actual.get(
                    call_args[0][0].queue_name,
                    0) + call_args[1]["max_messages"]

            assert actual[fake_failed_queue2.object_name] == 4
            assert actual[fake_failed_queue1.object_name] == 4
            assert actual[fake_success_queue.object_name] == 2
    def test_peek(self, mocked_q_peek_messages):
        client = KustoIngestClient("some-cluster")
        with mock.patch.object(
                client._resource_manager, "get_successful_ingestions_queues"
        ) as mocked_get_success_qs, mock.patch.object(
                client._resource_manager,
                "get_failed_ingestions_queues") as mocked_get_failed_qs:

            fake_failed_queue1 = _ResourceUri("mocked_storage_account_f1",
                                              "queue", "mocked_qf_1_name",
                                              "mocked_sas")
            fake_failed_queue2 = _ResourceUri("mocked_storage_account_f2",
                                              "queue", "mocked_qf_2_name",
                                              "mocked_sas")
            fake_success_queue = _ResourceUri("mocked_storage_account2",
                                              "queue", "mocked_qs_name",
                                              "mocked_sas")

            mocked_get_success_qs.return_value = [fake_success_queue]
            mocked_get_failed_qs.return_value = [
                fake_failed_queue1, fake_failed_queue2
            ]

            def mock_message(success):
                m = QueueMessage()
                m.id = uuid4()
                m.insertion_time = time.time()
                m.expiration_time = None
                m.dequeue_count = None

                if success:
                    content = {
                        "OperationId": str(m.id),
                        "Database": "db1",
                        "Table": "table1",
                        "IngestionSourceId": str(m.id),
                        "IngestionSourcePath": "blob/path",
                        "RootActivityId": "1",
                        "SucceededOn": time.time(),
                    }
                else:
                    content = {
                        "OperationId": str(m.id),
                        "Database": "db1",
                        "Table": "table1",
                        "IngestionSourceId": str(m.id),
                        "IngestionSourcePath": "blob/path",
                        "RootActivityId": "1",
                        "FailedOn": time.time(),
                        "Details": "",
                        "ErrorCode": "1",
                        "FailureStatus": "",
                        "OriginatesFromUpdatePolicy": "",
                        "ShouldRetry": False,
                    }

                m.content = str(
                    base64.b64encode(
                        json.dumps(content).encode("utf-8")).decode("utf-8"))
                m.pop_receipt = None
                m.time_next_visible = None

                return m

            mocked_q_peek_messages.side_effect = lambda queue_name, num_messages=1: [
                mock_message(success=True)
                if queue_name in [fake_success_queue.object_name] else
                mock_message(success=False) for i in range(0, num_messages)
            ]

            qs = KustoIngestStatusQueues(client)

            peek_success_actual = qs.success.peek()
            peek_failure_actual = qs.failure.peek(6)

            assert len(peek_success_actual) == 1

            for m in peek_failure_actual:
                assert isinstance(m, FailureMessage) == True

            for m in peek_success_actual:
                assert isinstance(m, SuccessMessage) == True

            assert len(peek_failure_actual) == 6

            actual = {}

            assert len(mocked_q_peek_messages.call_args_list) == 3

            for call_args in mocked_q_peek_messages.call_args_list:
                actual[call_args[0][0]] = actual.get(
                    call_args[0][0], 0) + call_args[1]["num_messages"]

            assert actual[fake_failed_queue2.object_name] == 4
            assert actual[fake_failed_queue1.object_name] == 4
            assert actual[fake_success_queue.object_name] == 2