def test_blob_info_json_mapping(self): """ Tests serialization of json ingestion blob info. """ validation_policy = ValidationPolicy( ValidationOptions.ValidateCsvInputConstantColumns, ValidationImplications.BestEffort) properties = IngestionProperties( database="database", table="table", dataFormat=DataFormat.json, mapping=[JsonColumnMapping("ColumnName", "jsonpath", "datatype")], additionalTags=["tag"], ingestIfNotExists=["ingestIfNotExistTags"], ingestByTags=["ingestByTags"], dropByTags=["dropByTags"], flushImmediately=True, reportLevel=ReportLevel.DoNotReport, reportMethod=ReportMethod.QueueAndTable, validationPolicy=validation_policy, ) blob = BlobDescriptor("somepath", 10) blob_info = _IngestionBlobInfo(blob, properties, deleteSourcesOnSuccess=True, authContext="authorizationContextText") self._verify_ingestion_blob_info_result(blob_info.to_json())
def test_blob_info_csv_mapping(self): """Tests serialization of csv ingestion blob info.""" validation_policy = ValidationPolicy( ValidationOptions.ValidateCsvInputConstantColumns, ValidationImplications.BestEffort) columnMapping = ColumnMapping("ColumnName", "cslDataType", ordinal=1) properties = IngestionProperties( database="database", table="table", dataFormat=DataFormat.CSV, ingestionMapping=[columnMapping], additionalTags=["tag"], ingestIfNotExists=["ingestIfNotExistTags"], ingestByTags=["ingestByTags"], dropByTags=["dropByTags"], flushImmediately=True, reportLevel=ReportLevel.DoNotReport, reportMethod=ReportMethod.Queue, validationPolicy=validation_policy, ) blob = BlobDescriptor("somepath", 10) blob_info = _IngestionBlobInfo(blob, properties, auth_context="authorizationContextText") self._verify_ingestion_blob_info_result(blob_info.to_json())
def test_blob_info_json_mapping(self): """Tests serialization of json ingestion blob info.""" validation_policy = ValidationPolicy( ValidationOptions.ValidateCsvInputConstantColumns, ValidationImplications.BestEffort) properties = IngestionProperties( database="database", table="table", data_format=DataFormat.JSON, column_mappings=[ ColumnMapping("ColumnName", "datatype", path="jsonpath") ], additional_tags=["tag"], ingest_if_not_exists=["ingestIfNotExistTags"], ingest_by_tags=["ingestByTags"], drop_by_tags=["dropByTags"], flush_immediately=True, report_level=ReportLevel.DoNotReport, report_method=ReportMethod.Queue, validation_policy=validation_policy, ) blob = BlobDescriptor("somepath", 10) blob_info = IngestionBlobInfo(blob, properties, auth_context="authorizationContextText") self._verify_ingestion_blob_info_result(blob_info.to_json())
def test_ingest_complicated_props(): validation_policy = ValidationPolicy( validation_options=ValidationOptions.ValidateCsvInputConstantColumns, validation_implications=ValidationImplications.Fail) json_ingestion_props = IngestionProperties( test_db, test_table, data_format=DataFormat.JSON, ingestion_mapping=TestData.test_table_json_mappings(), additional_tags=["a", "b"], ingest_if_not_exists=["aaaa", "bbbb"], ingest_by_tags=["ingestByTag"], drop_by_tags=["drop", "drop-by"], flush_immediately=False, report_level=ReportLevel.FailuresAndSuccesses, report_method=ReportMethod.Queue, validation_policy=validation_policy, ) file_paths = [json_file_path, zipped_json_file_path] fds = [FileDescriptor(fp, 0, uuid.uuid4()) for fp in file_paths] for fd in fds: ingest_client.ingest_from_file(fd, json_ingestion_props) assert_rows_added(4)
def test_ingest_complicated_props(): validation_policy = ValidationPolicy( validationOptions=ValidationOptions.ValidateCsvInputConstantColumns, validationImplications=ValidationImplications.Fail) json_ingestion_props = IngestionProperties( db_name, table_name, dataFormat=DataFormat.JSON, ingestionMapping=Helpers.create_test_table_json_mappings(), additionalTags=["a", "b"], ingestIfNotExists=["aaaa", "bbbb"], ingestByTags=["ingestByTag"], dropByTags=["drop", "drop-by"], flushImmediately=False, reportLevel=ReportLevel.FailuresAndSuccesses, reportMethod=ReportMethod.Queue, validationPolicy=validation_policy, ) file_paths = [json_file_path, zipped_json_file_path] fds = [FileDescriptor(fp, 0, uuid.uuid4()) for fp in file_paths] for fd in fds: ingest_client.ingest_from_file(fd, json_ingestion_props) assert_success_mesagges_count(2) assert_row_count(4)
def test_ingest_complicated_props(): # Test ingest with complicated ingestion properties validation_policy = ValidationPolicy( validationOptions=ValidationOptions.ValidateCsvInputConstantColumns, validationImplications=ValidationImplications.Fail, ) json_ingestion_props = IngestionProperties( db_name, table_name, dataFormat=DataFormat.json, mapping=Helpers.create_deft_table_json_mappings(), additionalTags=["a", "b"], ingestIfNotExists=["aaaa", "bbbb"], ingestByTags=["ingestByTag"], dropByTags=["drop", "drop-by"], flushImmediately=False, reportLevel=ReportLevel.FailuresAndSuccesses, reportMethod=ReportMethod.Queue, validationPolicy=validation_policy, ) file_paths = [json_file_path, zipped_json_file_path] fds = [FileDescriptor(fp, 0, uuid.uuid4()) for fp in file_paths] source_ids = ["{}".format(fd.source_id) for fd in fds] for fd in fds: ingest_client.ingest_from_file(fd, json_ingestion_props) successes = 0 timeout = 60 while successes != 2 and timeout > 0: while ingest_status_q.success.is_empty() and timeout > 0: time.sleep(1) timeout -= 1 success_message = ingest_status_q.success.pop() if success_message[0].IngestionSourceId in source_ids: assert success_message[0].Database == db_name assert success_message[0].Table == table_name successes += 1 assert successes == 2 # TODO: status queues only mark ingestion was successful, but takes time for data to become available time.sleep(20) response = client.execute(db_name, "{} | count".format(table_name)) for row in response.primary_results[0]: assert int(row["Count"]) == 28, "{0} | count = {1}".format( table_name, text_type(row["Count"]))
def test_ingest_complicated_props(): # Test ingest with complicated ingestion properties validation_policy = ValidationPolicy( validationOptions=ValidationOptions.ValidateCsvInputConstantColumns, validationImplications=ValidationImplications.Fail, ) json_ingestion_props = IngestionProperties( "PythonTest", "Deft", dataFormat=DataFormat.json, mapping=Helpers.create_deft_table_json_mappings(), additionalTags=["a", "b"], ingestIfNotExists=["aaaa", "bbbb"], ingestByTags=["ingestByTag"], dropByTags=["drop", "drop-by"], flushImmediately=False, reportLevel=ReportLevel.FailuresAndSuccesses, reportMethod=ReportMethod.Queue, validationPolicy=validation_policy, ) for f in [json_file_path, zipped_json_file_path]: ingest_client.ingest_from_file(f, json_ingestion_props) successes = 0 timeout = 60 while successes != 2 and timeout > 0: while ingest_status_q.success.is_empty() and timeout > 0: time.sleep(1) timeout -= 1 success_message = ingest_status_q.success.pop() assert success_message[0].Database == "PythonTest" assert success_message[0].Table == "Deft" successes += 1 assert successes == 2 # TODO: status queues only mark ingestion was successful, but takes time for data to become available time.sleep(20) response = client.execute("PythonTest", "Deft | count") for row in response.primary_results[0]: assert int(row["Count"]) == 28, "Deft | count = " + str(row["Count"])
def test_blob_json_mapping_reference(self): """Tests serialization of ingestion blob info with json mapping reference.""" validation_policy = ValidationPolicy( ValidationOptions.ValidateCsvInputConstantColumns, ValidationImplications.BestEffort) properties = IngestionProperties( database="database", table="table", dataFormat=DataFormat.JSON, mappingReference="jsonMappingReference", additionalTags=["tag"], ingestIfNotExists=["ingestIfNotExistTags"], ingestByTags=["ingestByTags"], dropByTags=["dropByTags"], flushImmediately=True, reportLevel=ReportLevel.DoNotReport, reportMethod=ReportMethod.Queue, validationPolicy=validation_policy, ) blob = BlobDescriptor("somepath", 10) blob_info = _IngestionBlobInfo(blob, properties, auth_context="authorizationContextText") self._verify_ingestion_blob_info_result(blob_info.to_json())
"input", "dataset.json") ZIPPED_JSON_FILE_PATH = os.path.join(os.getcwd(), "azure-kusto-ingest", "tests", "input", "dataset.jsonz.gz") KUSTO_INGEST_CLIENT.ingest_from_multiple_files( [JSON_FILE_PATH, ZIPPED_JSON_FILE_PATH], False, JSON_INGESTION_PROPERTIES) time.sleep(60) RESPONSE = KUSTO_CLIENT.execute("PythonTest", "Deft | count") for row in RESPONSE.iter_all(): if int(row['Count']) == 24: print("Completed ingest from json mapping successfully.") else: print("Deft | count = " + str(row['Count'])) # Test ingest with complicated ingestion properties VALIDATION_POLICY = ValidationPolicy( validationOptions=ValidationOptions.ValidateCsvInputConstantColumns, validationImplications=ValidationImplications.Fail) JSON_INGESTION_PROPERTIES = IngestionProperties( "PythonTest", "Deft", dataFormat=DataFormat.json, mapping=Helpers.create_deft_table_json_mappings(), additionalTags=["a", "b"], ingestIfNotExists=["aaaa", "bbbb"], ingestByTags=["ingestByTag"], dropByTags=["drop", "drop-by"], flushImmediately=False, reportLevel=ReportLevel.FailuresAndSuccesses, reportMethod=ReportMethod.QueueAndTable, validationPolicy=VALIDATION_POLICY) KUSTO_INGEST_CLIENT.ingest_from_multiple_files(