Exemple #1
0
def test_copy_into_operation_failure(mock_redshift):
    expected = [
        RedshiftOperation(
            extracted_schema={
                "s3://test/test.json.*": [
                    Column(
                        dataset_name="s3://test/test.json",
                        name="*",
                        alias="s3://test/test.json.*",
                        is_file=True,
                        is_stage=False,
                    )
                ]
            },
            dtypes=None,
            records_count=NUMBER_OF_ROWS_INSERTED,
            query=
            "copy into FAIL from s3://test/test.json CREDENTIALS = (AWS_KEY_ID = 'test' AWS_SECRET_KEY = 'test');",
            query_id=None,
            success=False,
            op_type=DbndTargetOperationType.read,
            error="Exception Mock",
            source_name="s3://test/test.json",
            target_name="FAIL",
        ),
        RedshiftOperation(
            extracted_schema={
                "FAIL.*": [
                    Column(
                        dataset_name="FAIL",
                        name="*",
                        alias="FAIL.*",
                        is_file=False,
                        is_stage=False,
                    )
                ]
            },
            dtypes=None,
            records_count=NUMBER_OF_ROWS_INSERTED,
            query=
            "copy into FAIL from s3://test/test.json CREDENTIALS = (AWS_KEY_ID = 'test' AWS_SECRET_KEY = 'test');",
            query_id=None,
            success=False,
            op_type=DbndTargetOperationType.write,
            error="Exception Mock",
            source_name="s3://test/test.json",
            target_name="FAIL",
        ),
    ]
    with pytest.raises(Exception) as e:
        run_tracker_custom_query(COPY_INTO_TABLE_FAIL_QUERY, expected)
    assert str(e.value) == ERROR_MESSAGE
Exemple #2
0
def test_copy_into_s3_graceful_failure(mock_redshift):
    expected = [
        RedshiftOperation(
            extracted_schema={
                "s3://my/bucket/file.csv.*": [
                    Column(
                        dataset_name="s3://my/bucket/file.csv",
                        name="*",
                        alias="s3://my/bucket/file.csv.*",
                        is_file=True,
                        is_stage=False,
                    )
                ]
            },
            dtypes=None,
            records_count=NUMBER_OF_ROWS_INSERTED,
            query=
            "COPY \"schema_fail\" from 's3://my/bucket/file.csv' iam_role 'arn:aws:iam::12345:role/myRole' csv",
            query_id=None,
            success=True,
            op_type=DbndTargetOperationType.read,
            error=None,
            source_name="s3://my/bucket/file.csv",
            target_name="schema_fail",
        ),
        RedshiftOperation(
            extracted_schema={
                '"schema_fail".*': [
                    Column(
                        dataset_name='"schema_fail"',
                        name="*",
                        alias='"schema_fail".*',
                        is_file=False,
                        is_stage=False,
                    )
                ]
            },
            dtypes=None,
            records_count=NUMBER_OF_ROWS_INSERTED,
            query=
            "COPY \"schema_fail\" from 's3://my/bucket/file.csv' iam_role 'arn:aws:iam::12345:role/myRole' csv",
            query_id=None,
            success=True,
            op_type=DbndTargetOperationType.write,
            error=None,
            source_name="s3://my/bucket/file.csv",
            target_name="schema_fail",
        ),
    ]
    return run_tracker_custom_query(
        COPY_INTO_TABLE_FROM_S3_FILE_GRACEFUL_FAIL_QUERY, expected)
Exemple #3
0
def test_copy_into_failure(mock_snowflake):
    expected = [
        SqlOperation(
            extracted_schema={
                "s3://test/test.json.*": [
                    Column(
                        dataset_name="s3://test/test.json",
                        alias="s3://test/test.json.*",
                        name="*",
                        is_file=True,
                        is_stage=False,
                    )
                ]
            },
            dtypes=None,
            records_count=0,
            query=COPY_INTO_TABLE_FAIL_QUERY,
            query_id=SFQID,
            success=False,
            op_type=DbndTargetOperationType.read,
            error=ERROR_MESSAGE,
        ),
        SqlOperation(
            extracted_schema={
                "FAIL.*": [
                    Column(
                        dataset_name="FAIL",
                        alias="FAIL.*",
                        name="*",
                        is_file=False,
                        is_stage=False,
                    )
                ]
            },
            dtypes=None,
            records_count=0,
            query=COPY_INTO_TABLE_FAIL_QUERY,
            query_id=SFQID,
            success=False,
            op_type=DbndTargetOperationType.write,
            error=ERROR_MESSAGE,
        ),
    ]
    with pytest.raises(Exception) as e:
        run_tracker_custom_query(mock_snowflake, COPY_INTO_TABLE_FAIL_QUERY,
                                 expected)
    assert str(e.value) == ERROR_MESSAGE
def generate_sql_operation_mock(op_type, table, is_file, is_stage):
    return SqlOperation(
        extracted_schema={
            table: [
                Column(
                    dataset_name=table,
                    alias=table,
                    name="*",
                    is_file=is_file,
                    is_stage=is_stage,
                )
            ]
        },
        dtypes=None,
        records_count=1,
        query="",
        query_id=1,
        success=True,
        op_type=op_type,
        error=None,
    )
Exemple #5
0
def test_multiple_psycopg2_connections(mock_redshift):
    expected1 = [
        RedshiftOperation(
            extracted_schema={
                "s3://my/bucket/file.csv.*": [
                    Column(
                        dataset_name="s3://my/bucket/file.csv",
                        name="*",
                        alias="s3://my/bucket/file.csv.*",
                        is_file=True,
                        is_stage=False,
                    )
                ]
            },
            dtypes=None,
            records_count=NUMBER_OF_ROWS_INSERTED,
            query=
            "COPY \"MY_TABLE\" from 's3://my/bucket/file.csv' iam_role 'arn:aws:iam::12345:role/myRole' csv",
            query_id=None,
            success=True,
            op_type=DbndTargetOperationType.read,
            error=None,
            source_name="s3://my/bucket/file.csv",
            target_name="MY_TABLE",
        ),
        RedshiftOperation(
            extracted_schema={
                '"MY_TABLE".*': [
                    Column(
                        dataset_name='"MY_TABLE"',
                        name="*",
                        alias='"MY_TABLE".*',
                        is_file=False,
                        is_stage=False,
                    )
                ]
            },
            dtypes=None,
            records_count=NUMBER_OF_ROWS_INSERTED,
            query=
            "COPY \"MY_TABLE\" from 's3://my/bucket/file.csv' iam_role 'arn:aws:iam::12345:role/myRole' csv",
            query_id=None,
            success=True,
            op_type=DbndTargetOperationType.write,
            error=None,
            source_name="s3://my/bucket/file.csv",
            target_name="MY_TABLE",
        ),
    ]
    expected2 = [
        RedshiftOperation(
            extracted_schema={
                "s3://my/bucket/file2.csv.*": [
                    Column(
                        dataset_name="s3://my/bucket/file2.csv",
                        name="*",
                        alias="s3://my/bucket/file2.csv.*",
                        is_file=True,
                        is_stage=False,
                    )
                ]
            },
            dtypes=None,
            records_count=NUMBER_OF_ROWS_INSERTED,
            query=
            "COPY \"MY_TABLE\" from 's3://my/bucket/file2.csv' iam_role 'arn:aws:iam::12345:role/myRole' csv",
            query_id=None,
            success=True,
            op_type=DbndTargetOperationType.read,
            error=None,
            source_name="s3://my/bucket/file2.csv",
            target_name="MY_TABLE",
        ),
        RedshiftOperation(
            extracted_schema={
                '"MY_TABLE".*': [
                    Column(
                        dataset_name='"MY_TABLE"',
                        name="*",
                        alias='"MY_TABLE".*',
                        is_file=False,
                        is_stage=False,
                    )
                ]
            },
            dtypes=None,
            records_count=NUMBER_OF_ROWS_INSERTED,
            query=
            "COPY \"MY_TABLE\" from 's3://my/bucket/file2.csv' iam_role 'arn:aws:iam::12345:role/myRole' csv",
            query_id=None,
            success=True,
            op_type=DbndTargetOperationType.write,
            error=None,
            source_name="s3://my/bucket/file2.csv",
            target_name="MY_TABLE",
        ),
    ]

    COPY_INTO_TABLE_FROM_S3_FILE_QUERY_2 = """COPY "MY_TABLE" from 's3://my/bucket/file2.csv' iam_role 'arn:aws:iam::12345:role/myRole' csv"""

    redshift_tracker = RedshiftTracker()

    with redshift_tracker as tracker:
        with _redshift_connect() as con1, _redshift_connect() as con2:
            c1 = con1.cursor()
            c2 = con2.cursor()

            try:
                c1.execute(COPY_INTO_TABLE_FROM_S3_FILE_QUERY)
                c2.execute(COPY_INTO_TABLE_FROM_S3_FILE_QUERY_2)
            finally:
                assert compare_eq_operations(
                    redshift_tracker.connections.get_operations(c1), expected1)
                assert compare_eq_operations(
                    redshift_tracker.connections.get_operations(c2), expected2)
        # flush operations
    assert redshift_tracker.connections.get_operations(c1) == []
    assert redshift_tracker.connections.get_operations(c2) == []
Exemple #6
0
def parse_first_query(sqlquery):
    return sqlparse.parse(sqlquery)[0]


@pytest.mark.parametrize(
    "sqlquery, expected",
    [(
        """
copy into JSON_TABLE from s3://bucket/file.json CREDENTIALS = (AWS_KEY_ID = '12345' AWS_SECRET_KEY = '12345')
        """,
        {
            DbndTargetOperationType.read: {
                "s3://bucket/file.json.*": [
                    Column(
                        dataset_name="s3://bucket/file.json",
                        name="*",
                        alias="s3://bucket/file.json.*",
                        is_file=True,
                    )
                ]
            },
            DbndTargetOperationType.write: {
                "JSON_TABLE.*": [
                    Column(
                        dataset_name="JSON_TABLE",
                        name="*",
                        alias="JSON_TABLE.*",
                        is_file=False,
                        is_stage=False,
                    )
                ]
            },
Exemple #7
0
    )


@pytest.mark.parametrize(
    "query, expected",
    [
        (
            COPY_INTO_TABLE_FROM_S3_FILE_QUERY,
            [
                SqlOperation(
                    extracted_schema={
                        "s3://test/test.json.*": [
                            Column(
                                dataset_name="s3://test/test.json",
                                alias="s3://test/test.json.*",
                                name="*",
                                is_file=True,
                                is_stage=False,
                            )
                        ]
                    },
                    dtypes=None,
                    records_count=NUMBER_OF_ROWS_INSERTED,
                    query=COPY_INTO_TABLE_FROM_S3_FILE_QUERY,
                    query_id=SFQID,
                    success=True,
                    op_type=DbndTargetOperationType.read,
                    error=None,
                ),
                SqlOperation(
                    extracted_schema={