def test_streaming_ingest_from_dataframe(self): responses.add_callback(responses.POST, "https://somecluster.kusto.windows.net/v1/rest/ingest/database/table", callback=request_callback) ingest_client = KustoStreamingIngestClient("https://somecluster.kusto.windows.net") ingestion_properties = IngestionProperties(database="database", table="table", dataFormat=DataFormat.CSV) from pandas import DataFrame fields = ["id", "name", "value"] rows = [[1, "abc", 15.3], [2, "cde", 99.9]] df = DataFrame(data=rows, columns=fields) ingest_client.ingest_from_dataframe(df, ingestion_properties)
def setup_class(cls): # DM CS can be composed from engine CS cls.engine_cs = os.environ.get("ENGINE_CONNECTION_STRING") cls.dm_cs = os.environ.get( "DM_CONNECTION_STRING") or cls.engine_cs.replace( "//", "//ingest-") cls.app_id = os.environ.get("APP_ID") cls.app_key = os.environ.get("APP_KEY") cls.auth_id = os.environ.get("AUTH_ID") cls.test_db = os.environ.get("TEST_DATABASE") if not all([ cls.engine_cs, cls.dm_cs, cls.app_id, cls.app_key, cls.auth_id, cls.test_db ]): raise unittest.SkipTest("E2E environment is missing") # Init clients python_version = "_".join([str(v) for v in sys.version_info[:3]]) cls.test_table = "python_test_{0}_{1}_{2}".format( python_version, str(int(time.time())), random.randint(1, 100000)) cls.client = KustoClient(cls.engine_kcsb_from_env()) cls.ingest_client = KustoIngestClient(cls.dm_kcsb_from_env()) cls.streaming_ingest_client = KustoStreamingIngestClient( cls.engine_kcsb_from_env()) cls.input_folder_path = cls.get_file_path() cls.csv_file_path = os.path.join(cls.input_folder_path, "dataset.csv") cls.tsv_file_path = os.path.join(cls.input_folder_path, "dataset.tsv") cls.zipped_csv_file_path = os.path.join(cls.input_folder_path, "dataset.csv.gz") cls.json_file_path = os.path.join(cls.input_folder_path, "dataset.json") cls.zipped_json_file_path = os.path.join(cls.input_folder_path, "dataset.jsonz.gz") cls.current_count = 0 cls.client.execute( cls.test_db, ".create table {0} (rownumber: int, rowguid: string, xdouble: real, xfloat: real, xbool: bool, xint16: int, xint32: int, xint64: long, xuint8: long, xuint16: long, xuint32: long, xuint64: long, xdate: datetime, xsmalltext: string, xtext: string, xnumberAsText: string, xtime: timespan, xtextWithNulls: string, xdynamicWithNulls: dynamic)" .format(cls.test_table), ) cls.client.execute( cls.test_db, ".create table {0} ingestion json mapping 'JsonMapping' {1}". format(cls.test_table, cls.test_table_json_mapping_reference()))
def test_streaming_ingest_from_file(self): responses.add_callback( responses.POST, "https://somecluster.kusto.windows.net/v1/rest/ingest/database/table", callback=request_callback) ingest_client = KustoStreamingIngestClient( "https://somecluster.kusto.windows.net") ingestion_properties = IngestionProperties(database="database", table="table", data_format=DataFormat.CSV) # ensure test can work when executed from within directories current_dir = os.getcwd() path_parts = ["azure-kusto-ingest", "tests", "input", "dataset.csv"] missing_path_parts = [] for path_part in path_parts: if path_part not in current_dir: missing_path_parts.append(path_part) file_path = os.path.join(current_dir, *missing_path_parts) ingest_client.ingest_from_file( file_path, ingestion_properties=ingestion_properties) path_parts = ["azure-kusto-ingest", "tests", "input", "dataset.csv.gz"] missing_path_parts = [] for path_part in path_parts: if path_part not in current_dir: missing_path_parts.append(path_part) file_path = os.path.join(current_dir, *missing_path_parts) ingest_client.ingest_from_file( file_path, ingestion_properties=ingestion_properties) ingestion_properties = IngestionProperties( database="database", table="table", data_format=DataFormat.JSON, ingestion_mapping_reference="JsonMapping") path_parts = ["azure-kusto-ingest", "tests", "input", "dataset.json"] missing_path_parts = [] for path_part in path_parts: if path_part not in current_dir: missing_path_parts.append(path_part) file_path = os.path.join(current_dir, *missing_path_parts) ingest_client.ingest_from_file( file_path, ingestion_properties=ingestion_properties) path_parts = [ "azure-kusto-ingest", "tests", "input", "dataset.jsonz.gz" ] missing_path_parts = [] for path_part in path_parts: if path_part not in current_dir: missing_path_parts.append(path_part) file_path = os.path.join(current_dir, *missing_path_parts) ingest_client.ingest_from_file( file_path, ingestion_properties=ingestion_properties) ingestion_properties = IngestionProperties(database="database", table="table", data_format=DataFormat.TSV) path_parts = ["azure-kusto-ingest", "tests", "input", "dataset.tsv"] missing_path_parts = [] for path_part in path_parts: if path_part not in current_dir: missing_path_parts.append(path_part) file_path = os.path.join(current_dir, *missing_path_parts) ingest_client.ingest_from_file( file_path, ingestion_properties=ingestion_properties)
def test_streaming_ingest_from_stream(self): responses.add_callback( responses.POST, "https://somecluster.kusto.windows.net/v1/rest/ingest/database/table", callback=request_callback) ingest_client = KustoStreamingIngestClient( "https://somecluster.kusto.windows.net") ingestion_properties = IngestionProperties(database="database", table="table", data_format=DataFormat.CSV) byte_sequence = b"56,56,56" bytes_stream = io.BytesIO(byte_sequence) ingest_client.ingest_from_stream( bytes_stream, ingestion_properties=ingestion_properties) str_sequence = u"57,57,57" str_stream = io.StringIO(str_sequence) ingest_client.ingest_from_stream( str_stream, ingestion_properties=ingestion_properties) byte_sequence = b'{"Name":"Ben","Age":"56","Weight":"75"}' bytes_stream = io.BytesIO(byte_sequence) ingestion_properties.format = DataFormat.JSON try: ingest_client.ingest_from_stream( bytes_stream, ingestion_properties=ingestion_properties) except KustoMissingMappingReferenceError: pass ingestion_properties.ingestion_mapping_reference = "JsonMapping" ingest_client.ingest_from_stream( bytes_stream, ingestion_properties=ingestion_properties) str_sequence = u'{"Name":"Ben","Age":"56","Weight":"75"}' str_stream = io.StringIO(str_sequence) ingest_client.ingest_from_stream( str_stream, ingestion_properties=ingestion_properties)
with open("successes.log", "w+") as sf: for sm in success_messages: sf.write(str(sm)) with open("failures.log", "w+") as ff: for fm in failure_messages: ff.write(str(fm)) ################################################################## ## STREAMING INGEST ## ################################################################## # Authenticate against this cluster endpoint as shows in the Auth section cluster = "https://{cluster_name}.kusto.windows.net" client = KustoStreamingIngestClient(kcsb) ingestion_props = IngestionProperties(database="{database_name}", table="{table_name}", dataFormat=DataFormat.CSV) # ingest from file file_descriptor = FileDescriptor("{filename}.csv", 3333) # 3333 is the raw size of the data in bytes. client.ingest_from_file(file_descriptor, ingestion_properties=ingestion_props) client.ingest_from_file("{filename}.csv", ingestion_properties=ingestion_props) # ingest from dataframe import pandas fields = ["id", "name", "value"] rows = [[1, "abc", 15.3], [2, "cde", 99.9]] df = pandas.DataFrame(data=rows, columns=fields)
cluster = "Dadubovs1.westus" # "toshetah" db_name = "TestingDatabase" # "PythonTest" table_name = "Deft" engine_kcsb = KustoConnectionStringBuilder.with_aad_device_authentication( "https://{}.kusto.windows.net".format(cluster) ) dm_kcsb = KustoConnectionStringBuilder.with_aad_device_authentication( "https://ingest-{}.kusto.windows.net".format(cluster) ) client = KustoClient(engine_kcsb) ingest_client = KustoIngestClient(dm_kcsb) ingest_status_q = KustoIngestStatusQueues(ingest_client) streaming_ingest_client = KustoStreamingIngestClient(engine_kcsb) client.execute(db_name, ".drop table {} ifexists".format(table_name)) @pytest.mark.run(order=1) def test_csv_ingest_non_existing_table(): csv_ingest_props = IngestionProperties( db_name, table_name, dataFormat=DataFormat.CSV, mapping=Helpers.create_deft_table_csv_mappings(), reportLevel=ReportLevel.FailuresAndSuccesses, ) csv_file_path = os.path.join(os.getcwd(), "azure-kusto-ingest", "tests", "input", "dataset.csv") zipped_csv_file_path = os.path.join(os.getcwd(), "azure-kusto-ingest", "tests", "input", "dataset.csv.gz")
missing_path_parts = [] for path_part in path_parts: if path_part not in current_dir: missing_path_parts.append(path_part) return os.path.join(current_dir, *missing_path_parts) # Init clients test_db = os.environ.get("TEST_DATABASE") python_version = "_".join([str(v) for v in sys.version_info[:3]]) test_table = "python_test_{0}_{1}".format(python_version, str(int(time.time()))) client = KustoClient(engine_kcsb_from_env()) ingest_client = KustoIngestClient(dm_kcsb_from_env()) streaming_ingest_client = KustoStreamingIngestClient(engine_kcsb_from_env()) start_time = datetime.datetime.now(datetime.timezone.utc) clean_previous_tests(client, test_db, test_table) input_folder_path = get_file_path() csv_file_path = os.path.join(input_folder_path, "dataset.csv") tsv_file_path = os.path.join(input_folder_path, "dataset.tsv") zipped_csv_file_path = os.path.join(input_folder_path, "dataset.csv.gz") json_file_path = os.path.join(input_folder_path, "dataset.json") zipped_json_file_path = os.path.join(input_folder_path, "dataset.jsonz.gz") current_count = 0 client.execute(