def test_streaming_ingest_from_dataframe(self):
        responses.add_callback(responses.POST, "https://somecluster.kusto.windows.net/v1/rest/ingest/database/table", callback=request_callback)

        ingest_client = KustoStreamingIngestClient("https://somecluster.kusto.windows.net")
        ingestion_properties = IngestionProperties(database="database", table="table", dataFormat=DataFormat.CSV)

        from pandas import DataFrame

        fields = ["id", "name", "value"]
        rows = [[1, "abc", 15.3], [2, "cde", 99.9]]
        df = DataFrame(data=rows, columns=fields)

        ingest_client.ingest_from_dataframe(df, ingestion_properties)
예제 #2
0
    def setup_class(cls):
        # DM CS can be composed from engine CS
        cls.engine_cs = os.environ.get("ENGINE_CONNECTION_STRING")
        cls.dm_cs = os.environ.get(
            "DM_CONNECTION_STRING") or cls.engine_cs.replace(
                "//", "//ingest-")
        cls.app_id = os.environ.get("APP_ID")
        cls.app_key = os.environ.get("APP_KEY")
        cls.auth_id = os.environ.get("AUTH_ID")
        cls.test_db = os.environ.get("TEST_DATABASE")

        if not all([
                cls.engine_cs, cls.dm_cs, cls.app_id, cls.app_key, cls.auth_id,
                cls.test_db
        ]):
            raise unittest.SkipTest("E2E environment is missing")

        # Init clients
        python_version = "_".join([str(v) for v in sys.version_info[:3]])
        cls.test_table = "python_test_{0}_{1}_{2}".format(
            python_version, str(int(time.time())), random.randint(1, 100000))
        cls.client = KustoClient(cls.engine_kcsb_from_env())
        cls.ingest_client = KustoIngestClient(cls.dm_kcsb_from_env())
        cls.streaming_ingest_client = KustoStreamingIngestClient(
            cls.engine_kcsb_from_env())

        cls.input_folder_path = cls.get_file_path()

        cls.csv_file_path = os.path.join(cls.input_folder_path, "dataset.csv")
        cls.tsv_file_path = os.path.join(cls.input_folder_path, "dataset.tsv")
        cls.zipped_csv_file_path = os.path.join(cls.input_folder_path,
                                                "dataset.csv.gz")
        cls.json_file_path = os.path.join(cls.input_folder_path,
                                          "dataset.json")
        cls.zipped_json_file_path = os.path.join(cls.input_folder_path,
                                                 "dataset.jsonz.gz")

        cls.current_count = 0

        cls.client.execute(
            cls.test_db,
            ".create table {0} (rownumber: int, rowguid: string, xdouble: real, xfloat: real, xbool: bool, xint16: int, xint32: int, xint64: long, xuint8: long, xuint16: long, xuint32: long, xuint64: long, xdate: datetime, xsmalltext: string, xtext: string, xnumberAsText: string, xtime: timespan, xtextWithNulls: string, xdynamicWithNulls: dynamic)"
            .format(cls.test_table),
        )
        cls.client.execute(
            cls.test_db,
            ".create table {0} ingestion json mapping 'JsonMapping' {1}".
            format(cls.test_table, cls.test_table_json_mapping_reference()))
    def test_streaming_ingest_from_file(self):
        responses.add_callback(
            responses.POST,
            "https://somecluster.kusto.windows.net/v1/rest/ingest/database/table",
            callback=request_callback)

        ingest_client = KustoStreamingIngestClient(
            "https://somecluster.kusto.windows.net")
        ingestion_properties = IngestionProperties(database="database",
                                                   table="table",
                                                   data_format=DataFormat.CSV)

        # ensure test can work when executed from within directories
        current_dir = os.getcwd()
        path_parts = ["azure-kusto-ingest", "tests", "input", "dataset.csv"]
        missing_path_parts = []
        for path_part in path_parts:
            if path_part not in current_dir:
                missing_path_parts.append(path_part)

        file_path = os.path.join(current_dir, *missing_path_parts)

        ingest_client.ingest_from_file(
            file_path, ingestion_properties=ingestion_properties)

        path_parts = ["azure-kusto-ingest", "tests", "input", "dataset.csv.gz"]
        missing_path_parts = []
        for path_part in path_parts:
            if path_part not in current_dir:
                missing_path_parts.append(path_part)

        file_path = os.path.join(current_dir, *missing_path_parts)

        ingest_client.ingest_from_file(
            file_path, ingestion_properties=ingestion_properties)

        ingestion_properties = IngestionProperties(
            database="database",
            table="table",
            data_format=DataFormat.JSON,
            ingestion_mapping_reference="JsonMapping")

        path_parts = ["azure-kusto-ingest", "tests", "input", "dataset.json"]
        missing_path_parts = []
        for path_part in path_parts:
            if path_part not in current_dir:
                missing_path_parts.append(path_part)

        file_path = os.path.join(current_dir, *missing_path_parts)

        ingest_client.ingest_from_file(
            file_path, ingestion_properties=ingestion_properties)

        path_parts = [
            "azure-kusto-ingest", "tests", "input", "dataset.jsonz.gz"
        ]
        missing_path_parts = []
        for path_part in path_parts:
            if path_part not in current_dir:
                missing_path_parts.append(path_part)

        file_path = os.path.join(current_dir, *missing_path_parts)

        ingest_client.ingest_from_file(
            file_path, ingestion_properties=ingestion_properties)

        ingestion_properties = IngestionProperties(database="database",
                                                   table="table",
                                                   data_format=DataFormat.TSV)

        path_parts = ["azure-kusto-ingest", "tests", "input", "dataset.tsv"]
        missing_path_parts = []
        for path_part in path_parts:
            if path_part not in current_dir:
                missing_path_parts.append(path_part)

        file_path = os.path.join(current_dir, *missing_path_parts)

        ingest_client.ingest_from_file(
            file_path, ingestion_properties=ingestion_properties)
    def test_streaming_ingest_from_stream(self):
        responses.add_callback(
            responses.POST,
            "https://somecluster.kusto.windows.net/v1/rest/ingest/database/table",
            callback=request_callback)

        ingest_client = KustoStreamingIngestClient(
            "https://somecluster.kusto.windows.net")
        ingestion_properties = IngestionProperties(database="database",
                                                   table="table",
                                                   data_format=DataFormat.CSV)

        byte_sequence = b"56,56,56"
        bytes_stream = io.BytesIO(byte_sequence)
        ingest_client.ingest_from_stream(
            bytes_stream, ingestion_properties=ingestion_properties)

        str_sequence = u"57,57,57"
        str_stream = io.StringIO(str_sequence)
        ingest_client.ingest_from_stream(
            str_stream, ingestion_properties=ingestion_properties)

        byte_sequence = b'{"Name":"Ben","Age":"56","Weight":"75"}'
        bytes_stream = io.BytesIO(byte_sequence)
        ingestion_properties.format = DataFormat.JSON
        try:
            ingest_client.ingest_from_stream(
                bytes_stream, ingestion_properties=ingestion_properties)
        except KustoMissingMappingReferenceError:
            pass

        ingestion_properties.ingestion_mapping_reference = "JsonMapping"
        ingest_client.ingest_from_stream(
            bytes_stream, ingestion_properties=ingestion_properties)

        str_sequence = u'{"Name":"Ben","Age":"56","Weight":"75"}'
        str_stream = io.StringIO(str_sequence)
        ingest_client.ingest_from_stream(
            str_stream, ingestion_properties=ingestion_properties)
예제 #5
0
    with open("successes.log", "w+") as sf:
        for sm in success_messages:
            sf.write(str(sm))

    with open("failures.log", "w+") as ff:
        for fm in failure_messages:
            ff.write(str(fm))

##################################################################
##                        STREAMING INGEST                      ##
##################################################################

# Authenticate against this cluster endpoint as shows in the Auth section
cluster = "https://{cluster_name}.kusto.windows.net"

client = KustoStreamingIngestClient(kcsb)

ingestion_props = IngestionProperties(database="{database_name}", table="{table_name}", dataFormat=DataFormat.CSV)

# ingest from file
file_descriptor = FileDescriptor("{filename}.csv", 3333)  # 3333 is the raw size of the data in bytes.
client.ingest_from_file(file_descriptor, ingestion_properties=ingestion_props)
client.ingest_from_file("{filename}.csv", ingestion_properties=ingestion_props)

# ingest from dataframe
import pandas

fields = ["id", "name", "value"]
rows = [[1, "abc", 15.3], [2, "cde", 99.9]]

df = pandas.DataFrame(data=rows, columns=fields)
예제 #6
0
cluster = "Dadubovs1.westus"  # "toshetah"
db_name = "TestingDatabase"  # "PythonTest"
table_name = "Deft"


engine_kcsb = KustoConnectionStringBuilder.with_aad_device_authentication(
    "https://{}.kusto.windows.net".format(cluster)
)
dm_kcsb = KustoConnectionStringBuilder.with_aad_device_authentication(
    "https://ingest-{}.kusto.windows.net".format(cluster)
)
client = KustoClient(engine_kcsb)
ingest_client = KustoIngestClient(dm_kcsb)
ingest_status_q = KustoIngestStatusQueues(ingest_client)

streaming_ingest_client = KustoStreamingIngestClient(engine_kcsb)

client.execute(db_name, ".drop table {} ifexists".format(table_name))


@pytest.mark.run(order=1)
def test_csv_ingest_non_existing_table():
    csv_ingest_props = IngestionProperties(
        db_name,
        table_name,
        dataFormat=DataFormat.CSV,
        mapping=Helpers.create_deft_table_csv_mappings(),
        reportLevel=ReportLevel.FailuresAndSuccesses,
    )
    csv_file_path = os.path.join(os.getcwd(), "azure-kusto-ingest", "tests", "input", "dataset.csv")
    zipped_csv_file_path = os.path.join(os.getcwd(), "azure-kusto-ingest", "tests", "input", "dataset.csv.gz")
예제 #7
0
    missing_path_parts = []
    for path_part in path_parts:
        if path_part not in current_dir:
            missing_path_parts.append(path_part)
    return os.path.join(current_dir, *missing_path_parts)


# Init clients
test_db = os.environ.get("TEST_DATABASE")

python_version = "_".join([str(v) for v in sys.version_info[:3]])
test_table = "python_test_{0}_{1}".format(python_version,
                                          str(int(time.time())))
client = KustoClient(engine_kcsb_from_env())
ingest_client = KustoIngestClient(dm_kcsb_from_env())
streaming_ingest_client = KustoStreamingIngestClient(engine_kcsb_from_env())

start_time = datetime.datetime.now(datetime.timezone.utc)

clean_previous_tests(client, test_db, test_table)
input_folder_path = get_file_path()

csv_file_path = os.path.join(input_folder_path, "dataset.csv")
tsv_file_path = os.path.join(input_folder_path, "dataset.tsv")
zipped_csv_file_path = os.path.join(input_folder_path, "dataset.csv.gz")
json_file_path = os.path.join(input_folder_path, "dataset.json")
zipped_json_file_path = os.path.join(input_folder_path, "dataset.jsonz.gz")

current_count = 0

client.execute(