예제 #1
0
    def test_mysql_to_hive_tblproperties(self, mock_popen, mock_temp_dir):
        mock_subprocess = MockSubProcess()
        mock_popen.return_value = mock_subprocess
        mock_temp_dir.return_value = "test_mysql_to_hive"

        with mock.patch.dict('os.environ', self.env_vars):
            sql = "SELECT * FROM baby_names LIMIT 1000;"
            op = MySqlToHiveOperator(
                task_id='test_m2h',
                hive_cli_conn_id='hive_cli_default',
                sql=sql,
                hive_table='test_mysql_to_hive',
                recreate=True,
                delimiter=",",
                tblproperties={'test_property': 'test_value'},
                dag=self.dag)
            op.run(start_date=DEFAULT_DATE,
                   end_date=DEFAULT_DATE, ignore_ti_state=True)

        hive_cmd = ['beeline', '-u', '"jdbc:hive2://localhost:10000/default"', '-hiveconf',
                    'airflow.ctx.dag_id=unit_test_dag', '-hiveconf', 'airflow.ctx.task_id=test_m2h',
                    '-hiveconf', 'airflow.ctx.execution_date=2015-01-01T00:00:00+00:00', '-hiveconf',
                    'airflow.ctx.dag_run_id=55', '-hiveconf', 'airflow.ctx.dag_owner=airflow',
                    '-hiveconf', '[email protected]', '-hiveconf',
                    'mapreduce.job.queuename=airflow', '-hiveconf', 'mapred.job.queue.name=airflow',
                    '-hiveconf', 'tez.queue.name=airflow',
                    '-f', '/tmp/airflow_hiveop_test_mysql_to_hive/tmptest_mysql_to_hive']

        mock_popen.assert_called_with(
            hive_cmd,
            stdout=mock_subprocess.PIPE,
            stderr=mock_subprocess.STDOUT,
            cwd="/tmp/airflow_hiveop_test_mysql_to_hive",
            close_fds=True
        )
예제 #2
0
    def test_mysql_to_hive_type_conversion(self, mock_load_file):
        mysql_table = 'test_mysql_to_hive'

        hook = MySqlHook()

        try:
            with hook.get_conn() as conn:
                conn.execute("DROP TABLE IF EXISTS {}".format(mysql_table))
                conn.execute(
                    """
                    CREATE TABLE {} (
                        c0 TINYINT,
                        c1 SMALLINT,
                        c2 MEDIUMINT,
                        c3 INT,
                        c4 BIGINT,
                        c5 TIMESTAMP
                    )
                """.format(
                        mysql_table
                    )
                )

            op = MySqlToHiveOperator(
                task_id='test_m2h',
                hive_cli_conn_id='hive_cli_default',
                sql="SELECT * FROM {}".format(mysql_table),
                hive_table='test_mysql_to_hive',
                dag=self.dag,
            )
            op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)

            assert mock_load_file.call_count == 1
            ordered_dict = OrderedDict()
            ordered_dict["c0"] = "SMALLINT"
            ordered_dict["c1"] = "INT"
            ordered_dict["c2"] = "INT"
            ordered_dict["c3"] = "BIGINT"
            ordered_dict["c4"] = "DECIMAL(38,0)"
            ordered_dict["c5"] = "TIMESTAMP"
            self.assertEqual(mock_load_file.call_args[1]["field_dict"], ordered_dict)
        finally:
            with hook.get_conn() as conn:
                conn.execute("DROP TABLE IF EXISTS {}".format(mysql_table))
예제 #3
0
    def test_mysql_to_hive_verify_loaded_values(self, mock_popen,
                                                mock_temp_dir):
        mock_subprocess = MockSubProcess()
        mock_popen.return_value = mock_subprocess
        mock_temp_dir.return_value = "test_mysql_to_hive"

        mysql_table = 'test_mysql_to_hive'
        hive_table = 'test_mysql_to_hive'

        hook = MySqlHook()

        try:
            minmax = (
                255,
                65535,
                16777215,
                4294967295,
                18446744073709551615,
                -128,
                -32768,
                -8388608,
                -2147483648,
                -9223372036854775808,
            )

            with hook.get_conn() as conn:
                conn.execute(f"DROP TABLE IF EXISTS {mysql_table}")
                conn.execute("""
                    CREATE TABLE {} (
                        c0 TINYINT   UNSIGNED,
                        c1 SMALLINT  UNSIGNED,
                        c2 MEDIUMINT UNSIGNED,
                        c3 INT       UNSIGNED,
                        c4 BIGINT    UNSIGNED,
                        c5 TINYINT,
                        c6 SMALLINT,
                        c7 MEDIUMINT,
                        c8 INT,
                        c9 BIGINT
                    )
                """.format(mysql_table))
                conn.execute("""
                    INSERT INTO {} VALUES (
                        {}, {}, {}, {}, {}, {}, {}, {}, {}, {}
                    )
                """.format(mysql_table, *minmax))

            with mock.patch.dict('os.environ', self.env_vars):
                op = MySqlToHiveOperator(
                    task_id='test_m2h',
                    hive_cli_conn_id='hive_cli_default',
                    sql=f"SELECT * FROM {mysql_table}",
                    hive_table=hive_table,
                    recreate=True,
                    delimiter=",",
                    dag=self.dag,
                )
                op.run(start_date=DEFAULT_DATE,
                       end_date=DEFAULT_DATE,
                       ignore_ti_state=True)

                mock_cursor = MockConnectionCursor()
                mock_cursor.iterable = [minmax]
                hive_hook = MockHiveServer2Hook(connection_cursor=mock_cursor)

                result = hive_hook.get_records(f"SELECT * FROM {hive_table}")
                assert result[0] == minmax

                hive_cmd = [
                    'beeline',
                    '-u',
                    '"jdbc:hive2://localhost:10000/default"',
                    '-hiveconf',
                    'airflow.ctx.dag_id=unit_test_dag',
                    '-hiveconf',
                    'airflow.ctx.task_id=test_m2h',
                    '-hiveconf',
                    'airflow.ctx.execution_date=2015-01-01T00:00:00+00:00',
                    '-hiveconf',
                    'airflow.ctx.dag_run_id=55',
                    '-hiveconf',
                    'airflow.ctx.dag_owner=airflow',
                    '-hiveconf',
                    '[email protected]',
                    '-hiveconf',
                    'mapreduce.job.queuename=airflow',
                    '-hiveconf',
                    'mapred.job.queue.name=airflow',
                    '-hiveconf',
                    'tez.queue.name=airflow',
                    '-f',
                    '/tmp/airflow_hiveop_test_mysql_to_hive/tmptest_mysql_to_hive',
                ]

                mock_popen.assert_called_with(
                    hive_cmd,
                    stdout=mock_subprocess.PIPE,
                    stderr=mock_subprocess.STDOUT,
                    cwd="/tmp/airflow_hiveop_test_mysql_to_hive",
                    close_fds=True,
                )

        finally:
            with hook.get_conn() as conn:
                conn.execute(f"DROP TABLE IF EXISTS {mysql_table}")
예제 #4
0
    def test_mysql_to_hive_verify_csv_special_char(self, mock_popen,
                                                   mock_temp_dir):
        mock_subprocess = MockSubProcess()
        mock_popen.return_value = mock_subprocess
        mock_temp_dir.return_value = "test_mysql_to_hive"

        mysql_table = 'test_mysql_to_hive'
        hive_table = 'test_mysql_to_hive'

        hook = MySqlHook()

        try:
            db_record = ('c0', '["true"]')
            with hook.get_conn() as conn:
                conn.execute(f"DROP TABLE IF EXISTS {mysql_table}")
                conn.execute("""
                    CREATE TABLE {} (
                        c0 VARCHAR(25),
                        c1 VARCHAR(25)
                    )
                """.format(mysql_table))
                conn.execute("""
                    INSERT INTO {} VALUES (
                        '{}', '{}'
                    )
                """.format(mysql_table, *db_record))

            with mock.patch.dict('os.environ', self.env_vars):
                import unicodecsv as csv

                op = MySqlToHiveOperator(
                    task_id='test_m2h',
                    hive_cli_conn_id='hive_cli_default',
                    sql=f"SELECT * FROM {mysql_table}",
                    hive_table=hive_table,
                    recreate=True,
                    delimiter=",",
                    quoting=csv.QUOTE_NONE,
                    quotechar='',
                    escapechar='@',
                    dag=self.dag,
                )
                op.run(start_date=DEFAULT_DATE,
                       end_date=DEFAULT_DATE,
                       ignore_ti_state=True)

                mock_cursor = MockConnectionCursor()
                mock_cursor.iterable = [('c0', '["true"]'), (2, 2)]
                hive_hook = MockHiveServer2Hook(connection_cursor=mock_cursor)

                result = hive_hook.get_records(f"SELECT * FROM {hive_table}")
            assert result[0] == db_record

            hive_cmd = [
                'beeline',
                '-u',
                '"jdbc:hive2://localhost:10000/default"',
                '-hiveconf',
                'airflow.ctx.dag_id=unit_test_dag',
                '-hiveconf',
                'airflow.ctx.task_id=test_m2h',
                '-hiveconf',
                'airflow.ctx.execution_date=2015-01-01T00:00:00+00:00',
                '-hiveconf',
                'airflow.ctx.dag_run_id=55',
                '-hiveconf',
                'airflow.ctx.dag_owner=airflow',
                '-hiveconf',
                '[email protected]',
                '-hiveconf',
                'mapreduce.job.queuename=airflow',
                '-hiveconf',
                'mapred.job.queue.name=airflow',
                '-hiveconf',
                'tez.queue.name=airflow',
                '-f',
                '/tmp/airflow_hiveop_test_mysql_to_hive/tmptest_mysql_to_hive',
            ]

            mock_popen.assert_called_with(
                hive_cmd,
                stdout=mock_subprocess.PIPE,
                stderr=mock_subprocess.STDOUT,
                cwd="/tmp/airflow_hiveop_test_mysql_to_hive",
                close_fds=True,
            )
        finally:
            with hook.get_conn() as conn:
                conn.execute(f"DROP TABLE IF EXISTS {mysql_table}")