Ejemplo n.º 1
0
    def test_load_df_with_data_types(self, mock_run_cli):
        ord_dict = OrderedDict()
        ord_dict['b'] = [True]
        ord_dict['i'] = [-1]
        ord_dict['t'] = [1]
        ord_dict['f'] = [0.0]
        ord_dict['c'] = ['c']
        ord_dict['M'] = [datetime.datetime(2018, 1, 1)]
        ord_dict['O'] = [object()]
        ord_dict['S'] = [b'STRING']
        ord_dict['U'] = ['STRING']
        ord_dict['V'] = [None]
        df = pd.DataFrame(ord_dict)

        hook = MockHiveCliHook()
        hook.load_df(df, 't')

        query = """
            CREATE TABLE IF NOT EXISTS t (
                `b` BOOLEAN,
                `i` BIGINT,
                `t` BIGINT,
                `f` DOUBLE,
                `c` STRING,
                `M` TIMESTAMP,
                `O` STRING,
                `S` STRING,
                `U` STRING,
                `V` STRING)
            ROW FORMAT DELIMITED
            FIELDS TERMINATED BY ','
            STORED AS textfile
            ;
        """
        assert_equal_ignore_multiple_spaces(self, mock_run_cli.call_args_list[0][0][0], query)
Ejemplo n.º 2
0
    def test_execute(self, mock_run):
        s3_keys = ['1.csv', '2.csv']
        table = 'table'
        stage = 'stage'
        file_format = 'file_format'
        schema = 'schema'

        S3ToSnowflakeTransferOperator(s3_keys=s3_keys,
                                      table=table,
                                      stage=stage,
                                      file_format=file_format,
                                      schema=schema,
                                      columns_array=None,
                                      task_id="task_id",
                                      dag=None).execute(None)

        files = str(s3_keys)
        files = files.replace('[', '(')
        files = files.replace(']', ')')
        base_sql = """
                FROM @{stage}/
                files={files}
                file_format={file_format}
            """.format(stage=stage, files=files, file_format=file_format)

        copy_query = """
                COPY INTO {schema}.{table} {base_sql}
            """.format(schema=schema, table=table, base_sql=base_sql)

        assert mock_run.call_count == 1
        assert_equal_ignore_multiple_spaces(self, mock_run.call_args[0][0],
                                            copy_query)
    def test_execute(self, mock_run, mock_session):
        access_key = "aws_access_key_id"
        secret_key = "aws_secret_access_key"
        mock_session.return_value = Session(access_key, secret_key)

        schema = "schema"
        table = "table"
        s3_bucket = "bucket"
        s3_key = "key"
        copy_options = ""

        op = S3ToRedshiftOperator(
            schema=schema,
            table=table,
            s3_bucket=s3_bucket,
            s3_key=s3_key,
            copy_options=copy_options,
            redshift_conn_id="redshift_conn_id",
            aws_conn_id="aws_conn_id",
            task_id="task_id",
            dag=None,
        )
        op.execute(None)

        copy_query = f"""
            COPY {schema}.{table}
            FROM 's3://{s3_bucket}/{s3_key}'
            with credentials
            'aws_access_key_id={access_key};aws_secret_access_key={secret_key}'
            {copy_options};
        """

        assert mock_run.call_count == 1
        assert_equal_ignore_multiple_spaces(self, mock_run.call_args[0][0], copy_query)
Ejemplo n.º 4
0
    def test_execute(self, mock_run, mock_session):
        access_key = "aws_access_key_id"
        secret_key = "aws_secret_access_key"
        mock_session.return_value = Session(access_key, secret_key)
        mock_session.return_value.access_key = access_key
        mock_session.return_value.secret_key = secret_key
        mock_session.return_value.token = None

        schema = "schema"
        table = "table"
        s3_bucket = "bucket"
        s3_key = "key"
        copy_options = ""

        op = S3ToRedshiftOperator(
            schema=schema,
            table=table,
            s3_bucket=s3_bucket,
            s3_key=s3_key,
            copy_options=copy_options,
            redshift_conn_id="redshift_conn_id",
            aws_conn_id="aws_conn_id",
            task_id="task_id",
            dag=None,
        )
        op.execute(None)

        credentials_block = build_credentials_block(mock_session.return_value)
        copy_query = op._build_copy_query(credentials_block, copy_options)

        assert mock_run.call_count == 1
        assert access_key in copy_query
        assert secret_key in copy_query
        assert_equal_ignore_multiple_spaces(self, mock_run.call_args[0][0],
                                            copy_query)
Ejemplo n.º 5
0
    def test_execute(
        self,
        table_as_file_name,
        expected_s3_key,
        mock_run,
        mock_session,
    ):
        access_key = "aws_access_key_id"
        secret_key = "aws_secret_access_key"
        mock_session.return_value = Session(access_key, secret_key)
        schema = "schema"
        table = "table"
        s3_bucket = "bucket"
        s3_key = "key"
        unload_options = [
            'HEADER',
        ]

        RedshiftToS3Operator(
            schema=schema,
            table=table,
            s3_bucket=s3_bucket,
            s3_key=s3_key,
            unload_options=unload_options,
            include_header=True,
            redshift_conn_id="redshift_conn_id",
            aws_conn_id="aws_conn_id",
            task_id="task_id",
            table_as_file_name=table_as_file_name,
            dag=None,
        ).execute(None)

        unload_options = '\n\t\t\t'.join(unload_options)
        select_query = "SELECT * FROM {schema}.{table}".format(schema=schema,
                                                               table=table)
        unload_query = """
                    UNLOAD ('{select_query}')
                    TO 's3://{s3_bucket}/{s3_key}'
                    with credentials
                    'aws_access_key_id={access_key};aws_secret_access_key={secret_key}'
                    {unload_options};
                    """.format(
            select_query=select_query,
            s3_bucket=s3_bucket,
            s3_key=expected_s3_key,
            access_key=access_key,
            secret_key=secret_key,
            unload_options=unload_options,
        )

        assert mock_run.call_count == 1
        assert_equal_ignore_multiple_spaces(self, mock_run.call_args[0][0],
                                            unload_query)
Ejemplo n.º 6
0
    def test_execute_sts_token(
        self,
        table_as_file_name,
        expected_s3_key,
        mock_run,
        mock_session,
    ):
        access_key = "ASIA_aws_access_key_id"
        secret_key = "aws_secret_access_key"
        token = "token"
        mock_session.return_value = Session(access_key, secret_key, token)
        mock_session.return_value.access_key = access_key
        mock_session.return_value.secret_key = secret_key
        mock_session.return_value.token = token
        schema = "schema"
        table = "table"
        s3_bucket = "bucket"
        s3_key = "key"
        unload_options = [
            'HEADER',
        ]

        op = RedshiftToS3Operator(
            schema=schema,
            table=table,
            s3_bucket=s3_bucket,
            s3_key=s3_key,
            unload_options=unload_options,
            include_header=True,
            redshift_conn_id="redshift_conn_id",
            aws_conn_id="aws_conn_id",
            task_id="task_id",
            table_as_file_name=table_as_file_name,
            dag=None,
        )

        op.execute(None)

        unload_options = '\n\t\t\t'.join(unload_options)
        select_query = f"SELECT * FROM {schema}.{table}"
        credentials_block = build_credentials_block(mock_session.return_value)

        unload_query = op._build_unload_query(
            credentials_block, select_query, expected_s3_key, unload_options
        )

        assert mock_run.call_count == 1
        assert access_key in unload_query
        assert secret_key in unload_query
        assert token in unload_query
        assert_equal_ignore_multiple_spaces(self, mock_run.call_args[0][0], unload_query)
Ejemplo n.º 7
0
    def test_custom_select_query_unloading(
        self,
        table,
        table_as_file_name,
        expected_s3_key,
        mock_run,
        mock_session,
    ):
        access_key = "aws_access_key_id"
        secret_key = "aws_secret_access_key"
        mock_session.return_value = Session(access_key, secret_key)
        mock_session.return_value.access_key = access_key
        mock_session.return_value.secret_key = secret_key
        mock_session.return_value.token = None
        s3_bucket = "bucket"
        s3_key = "key"
        unload_options = [
            'HEADER',
        ]
        select_query = "select column from table"

        op = RedshiftToS3Operator(
            select_query=select_query,
            table=table,
            table_as_file_name=table_as_file_name,
            s3_bucket=s3_bucket,
            s3_key=s3_key,
            unload_options=unload_options,
            include_header=True,
            redshift_conn_id="redshift_conn_id",
            aws_conn_id="aws_conn_id",
            task_id="task_id",
            dag=None,
        )

        op.execute(None)

        unload_options = '\n\t\t\t'.join(unload_options)
        credentials_block = build_credentials_block(mock_session.return_value)

        unload_query = op._build_unload_query(credentials_block, select_query,
                                              expected_s3_key, unload_options)

        assert mock_run.call_count == 1
        assert access_key in unload_query
        assert secret_key in unload_query
        assert_equal_ignore_multiple_spaces(self, mock_run.call_args[0][0],
                                            unload_query)
Ejemplo n.º 8
0
    def test_mysql_hook_test_bulk_dump_mock(self, mock_get_conn):
        mock_execute = mock.MagicMock()
        mock_get_conn.return_value.cursor.return_value.execute = mock_execute

        hook = MySqlHook('airflow_db')
        table = "INFORMATION_SCHEMA.TABLES"
        tmp_file = "/path/to/output/file"
        hook.bulk_dump(table, tmp_file)

        from tests.test_utils.asserts import assert_equal_ignore_multiple_spaces
        assert mock_execute.call_count == 1
        query = """
            SELECT * INTO OUTFILE '{tmp_file}'
            FROM {table}
        """.format(tmp_file=tmp_file, table=table)
        assert_equal_ignore_multiple_spaces(self, mock_execute.call_args[0][0], query)
Ejemplo n.º 9
0
    def test_truncate(self, mock_run, mock_session):
        access_key = "aws_access_key_id"
        secret_key = "aws_secret_access_key"
        mock_session.return_value = Session(access_key, secret_key)

        schema = "schema"
        table = "table"
        s3_bucket = "bucket"
        s3_key = "key"
        copy_options = ""

        op = S3ToRedshiftOperator(
            schema=schema,
            table=table,
            s3_bucket=s3_bucket,
            s3_key=s3_key,
            copy_options=copy_options,
            truncate_table=True,
            redshift_conn_id="redshift_conn_id",
            aws_conn_id="aws_conn_id",
            task_id="task_id",
            dag=None,
        )
        op.execute(None)

        copy_statement = f"""
                    COPY {schema}.{table}
                    FROM 's3://{s3_bucket}/{s3_key}'
                    with credentials
                    'aws_access_key_id={access_key};aws_secret_access_key={secret_key}'
                    {copy_options};
                """

        truncate_statement = f'TRUNCATE TABLE {schema}.{table};'
        transaction = f"""
                    BEGIN;
                    {truncate_statement}
                    {copy_statement}
                    COMMIT
                    """
        assert_equal_ignore_multiple_spaces(self, mock_run.call_args[0][0],
                                            transaction)

        assert mock_run.call_count == 1
Ejemplo n.º 10
0
    def test_truncate(self, mock_run, mock_session):
        access_key = "aws_access_key_id"
        secret_key = "aws_secret_access_key"
        mock_session.return_value = Session(access_key, secret_key)
        mock_session.return_value.access_key = access_key
        mock_session.return_value.secret_key = secret_key
        mock_session.return_value.token = None

        schema = "schema"
        table = "table"
        s3_bucket = "bucket"
        s3_key = "key"
        copy_options = ""

        op = S3ToRedshiftOperator(
            schema=schema,
            table=table,
            s3_bucket=s3_bucket,
            s3_key=s3_key,
            copy_options=copy_options,
            truncate_table=True,
            redshift_conn_id="redshift_conn_id",
            aws_conn_id="aws_conn_id",
            task_id="task_id",
            dag=None,
        )
        op.execute(None)

        credentials_block = build_credentials_block(mock_session.return_value)
        copy_statement = op._build_copy_query(credentials_block, copy_options)

        truncate_statement = f'TRUNCATE TABLE {schema}.{table};'
        transaction = f"""
                    BEGIN;
                    {truncate_statement}
                    {copy_statement}
                    COMMIT
                    """
        assert_equal_ignore_multiple_spaces(self, mock_run.call_args[0][0],
                                            transaction)

        assert mock_run.call_count == 1