コード例 #1
0
    def test_load_df_with_data_types(self, mock_run_cli):
        d = OrderedDict()
        d['b'] = [True]
        d['i'] = [-1]
        d['t'] = [1]
        d['f'] = [0.0]
        d['c'] = ['c']
        d['M'] = [datetime.datetime(2018, 1, 1)]
        d['O'] = [object()]
        d['S'] = [b'STRING']
        d['U'] = ['STRING']
        d['V'] = [None]
        df = pd.DataFrame(d)

        hook = HiveCliHook()
        hook.load_df(df, 't')

        query = """
            CREATE TABLE IF NOT EXISTS t (
                b BOOLEAN,
                i BIGINT,
                t BIGINT,
                f DOUBLE,
                c STRING,
                M TIMESTAMP,
                O STRING,
                S STRING,
                U STRING,
                V STRING)
            ROW FORMAT DELIMITED
            FIELDS TERMINATED BY ','
            STORED AS textfile
            ;
        """
        assertEqualIgnoreMultipleSpaces(self, mock_run_cli.call_args_list[0][0][0], query)
コード例 #2
0
    def test_load_df_with_data_types(self, mock_run_cli):
        d = OrderedDict()
        d['b'] = [True]
        d['i'] = [-1]
        d['t'] = [1]
        d['f'] = [0.0]
        d['c'] = ['c']
        d['M'] = [datetime.datetime(2018, 1, 1)]
        d['O'] = [object()]
        d['S'] = ['STRING'.encode('utf-8')]
        d['U'] = ['STRING']
        d['V'] = [None]
        df = pd.DataFrame(d)

        hook = HiveCliHook()
        hook.load_df(df, 't')

        query = """
            CREATE TABLE IF NOT EXISTS t (
                b BOOLEAN,
                i BIGINT,
                t BIGINT,
                f DOUBLE,
                c STRING,
                M TIMESTAMP,
                O STRING,
                S STRING,
                U STRING,
                V STRING)
            ROW FORMAT DELIMITED
            FIELDS TERMINATED BY ','
            STORED AS textfile
            ;
        """
        assertEqualIgnoreMultipleSpaces(self, mock_run_cli.call_args_list[0][0][0], query)
コード例 #3
0
    def test_execute(self, mock_run):
        s3_keys = ['1.csv', '2.csv']
        table = 'table'
        stage = 'stage'
        file_format = 'file_format'
        schema = 'schema'

        S3ToSnowflakeTransfer(s3_keys=s3_keys,
                              table=table,
                              stage=stage,
                              file_format=file_format,
                              schema=schema,
                              columns_array=None,
                              task_id="task_id",
                              dag=None).execute(None)

        files = str(s3_keys)
        files = files.replace('[', '(')
        files = files.replace(']', ')')
        base_sql = """
                FROM @{stage}/
                files={files}
                file_format={file_format}
            """.format(stage=stage, files=files, file_format=file_format)

        copy_query = """
                COPY INTO {schema}.{table} {base_sql}
            """.format(schema=schema, table=table, base_sql=base_sql)

        assert mock_run.call_count == 1
        assertEqualIgnoreMultipleSpaces(self, mock_run.call_args[0][0],
                                        copy_query)
コード例 #4
0
ファイル: test_tests.py プロジェクト: Fokko/incubator-airflow
    def test_assertEqualIgnoreMultipleSpaces_passes(self):
        str1 = 'w oo f'
        str2 = """
            w
            oo    f
        """

        assertEqualIgnoreMultipleSpaces(self, str1, str2)
コード例 #5
0
    def test_assertEqualIgnoreMultipleSpaces_passes(self):
        str1 = 'w oo f'
        str2 = """
            w
            oo    f
        """

        assertEqualIgnoreMultipleSpaces(self, str1, str2)
コード例 #6
0
    def test_execute(
        self,
        table_as_file_name,
        expected_s3_key,
        mock_run,
        mock_session,
    ):
        access_key = "aws_access_key_id"
        secret_key = "aws_secret_access_key"
        mock_session.return_value = Session(access_key, secret_key)
        schema = "schema"
        table = "table"
        s3_bucket = "bucket"
        s3_key = "key"
        unload_options = [
            'HEADER',
        ]

        RedshiftToS3Transfer(schema=schema,
                             table=table,
                             s3_bucket=s3_bucket,
                             s3_key=s3_key,
                             unload_options=unload_options,
                             include_header=True,
                             redshift_conn_id="redshift_conn_id",
                             aws_conn_id="aws_conn_id",
                             task_id="task_id",
                             table_as_file_name=table_as_file_name,
                             dag=None).execute(None)

        unload_options = '\n\t\t\t'.join(unload_options)
        select_query = "SELECT * FROM {schema}.{table}".format(schema=schema,
                                                               table=table)
        unload_query = """
                    UNLOAD ('{select_query}')
                    TO 's3://{s3_bucket}/{s3_key}'
                    with credentials
                    'aws_access_key_id={access_key};aws_secret_access_key={secret_key}'
                    {unload_options};
                    """.format(select_query=select_query,
                               s3_bucket=s3_bucket,
                               s3_key=expected_s3_key,
                               access_key=access_key,
                               secret_key=secret_key,
                               unload_options=unload_options)

        assert mock_run.call_count == 1
        assertEqualIgnoreMultipleSpaces(self, mock_run.call_args[0][0],
                                        unload_query)
コード例 #7
0
    def test_assertEqualIgnoreMultipleSpaces_raises(self):
        str1 = 'w oo f'
        str2 = 'meow'

        self.assertRaises(
            AssertionError,
            lambda: assertEqualIgnoreMultipleSpaces(self, str1, str2))
コード例 #8
0
    def test_mysql_hook_test_bulk_dump_mock(self, mock_get_conn):
        mock_execute = mock.MagicMock()
        mock_get_conn.return_value.cursor.return_value.execute = mock_execute

        from airflow.hooks.mysql_hook import MySqlHook
        hook = MySqlHook('airflow_ci')
        table = "INFORMATION_SCHEMA.TABLES"
        tmp_file = "/path/to/output/file"
        hook.bulk_dump(table, tmp_file)

        from airflow.utils.tests import assertEqualIgnoreMultipleSpaces
        mock_execute.assert_called_once()
        query = """
            SELECT * INTO OUTFILE '{tmp_file}'
            FROM {table}
        """.format(tmp_file=tmp_file, table=table)
        assertEqualIgnoreMultipleSpaces(self, mock_execute.call_args[0][0], query)
コード例 #9
0
    def test_execute(self, mock_run, mock_Session):
        access_key = "aws_access_key_id"
        secret_key = "aws_secret_access_key"
        mock_Session.return_value = Session(access_key, secret_key)

        schema = "schema"
        table = "table"
        s3_bucket = "bucket"
        s3_key = "key"
        copy_options = ""

        t = S3ToRedshiftTransfer(
            schema=schema,
            table=table,
            s3_bucket=s3_bucket,
            s3_key=s3_key,
            copy_options=copy_options,
            redshift_conn_id="redshift_conn_id",
            aws_conn_id="aws_conn_id",
            task_id="task_id",
            dag=None)
        t.execute(None)

        copy_query = """
            COPY {schema}.{table}
            FROM 's3://{s3_bucket}/{s3_key}/{table}'
            with credentials
            'aws_access_key_id={access_key};aws_secret_access_key={secret_key}'
            {copy_options};
        """.format(schema=schema,
                   table=table,
                   s3_bucket=s3_bucket,
                   s3_key=s3_key,
                   access_key=access_key,
                   secret_key=secret_key,
                   copy_options=copy_options)

        assert mock_run.call_count == 1
        assertEqualIgnoreMultipleSpaces(self, mock_run.call_args[0][0], copy_query)
    def test_execute(self, mock_get_conn, mock_run, mock_Session):
        column_name = "col"
        cur = mock.MagicMock()
        cur.fetchall.return_value = [(column_name, )]
        mock_get_conn.return_value.cursor.return_value = cur

        access_key = "aws_access_key_id"
        secret_key = "aws_secret_access_key"
        mock_Session.return_value = Session(access_key, secret_key)

        schema = "schema"
        table = "table"
        s3_bucket = "bucket"
        s3_key = "key"
        unload_options = ('PARALLEL OFF', )

        t = RedshiftToS3Transfer(schema=schema,
                                 table=table,
                                 s3_bucket=s3_bucket,
                                 s3_key=s3_key,
                                 unload_options=unload_options,
                                 include_header=True,
                                 redshift_conn_id="redshift_conn_id",
                                 aws_conn_id="aws_conn_id",
                                 task_id="task_id",
                                 dag=None)
        t.execute(None)

        unload_options = '\n\t\t\t'.join(unload_options)

        columns_query = """
            SELECT column_name
            FROM information_schema.columns
            WHERE table_schema = '{schema}'
            AND   table_name = '{table}'
            ORDER BY ordinal_position
            """.format(schema=schema, table=table)

        unload_query = """
                UNLOAD ('SELECT {column_name} FROM
                            (SELECT 2 sort_order,
                             CAST({column_name} AS text) AS {column_name}
                            FROM {schema}.{table}
                            UNION ALL
                            SELECT 1 sort_order, \\'{column_name}\\')
                         ORDER BY sort_order')
                TO 's3://{s3_bucket}/{s3_key}/{table}_'
                with credentials
                'aws_access_key_id={access_key};aws_secret_access_key={secret_key}'
                {unload_options};
                """.format(column_name=column_name,
                           schema=schema,
                           table=table,
                           s3_bucket=s3_bucket,
                           s3_key=s3_key,
                           access_key=access_key,
                           secret_key=secret_key,
                           unload_options=unload_options)

        assert cur.execute.call_count == 1
        assertEqualIgnoreMultipleSpaces(self, cur.execute.call_args[0][0],
                                        columns_query)

        assert mock_run.call_count == 1
        assertEqualIgnoreMultipleSpaces(self, mock_run.call_args[0][0],
                                        unload_query)
コード例 #11
0
    def test_execute(self, mock_get_conn, mock_run, mock_Session):
        column_name = "col"
        cur = mock.MagicMock()
        cur.fetchall.return_value = [(column_name, )]
        mock_get_conn.return_value.cursor.return_value = cur

        access_key = "aws_access_key_id"
        secret_key = "aws_secret_access_key"
        mock_Session.return_value = Session(access_key, secret_key)

        schema = "schema"
        table = "table"
        s3_bucket = "bucket"
        s3_key = "key"
        unload_options = ('PARALLEL OFF',)

        t = RedshiftToS3Transfer(
            schema=schema,
            table=table,
            s3_bucket=s3_bucket,
            s3_key=s3_key,
            unload_options=unload_options,
            include_header=True,
            redshift_conn_id="redshift_conn_id",
            aws_conn_id="aws_conn_id",
            task_id="task_id",
            dag=None)
        t.execute(None)

        unload_options = '\n\t\t\t'.join(unload_options)

        columns_query = """
            SELECT column_name
            FROM information_schema.columns
            WHERE table_schema = '{schema}'
            AND   table_name = '{table}'
            ORDER BY ordinal_position
            """.format(schema=schema,
                       table=table)

        unload_query = """
                UNLOAD ('SELECT {column_name} FROM
                            (SELECT 2 sort_order,
                             CAST({column_name} AS text) AS {column_name}
                            FROM {schema}.{table}
                            UNION ALL
                            SELECT 1 sort_order, \\'{column_name}\\')
                         ORDER BY sort_order')
                TO 's3://{s3_bucket}/{s3_key}/{table}_'
                with credentials
                'aws_access_key_id={access_key};aws_secret_access_key={secret_key}'
                {unload_options};
                """.format(column_name=column_name,
                           schema=schema,
                           table=table,
                           s3_bucket=s3_bucket,
                           s3_key=s3_key,
                           access_key=access_key,
                           secret_key=secret_key,
                           unload_options=unload_options)

        assert cur.execute.call_count == 1
        assertEqualIgnoreMultipleSpaces(self, cur.execute.call_args[0][0], columns_query)

        assert mock_run.call_count == 1
        assertEqualIgnoreMultipleSpaces(self, mock_run.call_args[0][0], unload_query)
コード例 #12
0
ファイル: test_tests.py プロジェクト: Fokko/incubator-airflow
    def test_assertEqualIgnoreMultipleSpaces_raises(self):
        str1 = 'w oo f'
        str2 = 'meow'

        self.assertRaises(AssertionError, lambda: assertEqualIgnoreMultipleSpaces(self, str1, str2))