def test_hive_dryrun(self, mock_popen, mock_temp_dir): mock_subprocess = MockSubProcess() mock_popen.return_value = mock_subprocess mock_temp_dir.return_value = "tst" op = HiveOperator(task_id='dry_run_basic_hql', hql=self.hql, dag=self.dag) op.dry_run() hive_cmd = [ 'beeline', '-u', '"jdbc:hive2://localhost:10000/default"', '-hiveconf', 'airflow.ctx.dag_id=', '-hiveconf', 'airflow.ctx.task_id=', '-hiveconf', 'airflow.ctx.execution_date=', '-hiveconf', 'airflow.ctx.dag_run_id=', '-hiveconf', 'airflow.ctx.dag_owner=', '-hiveconf', 'airflow.ctx.dag_email=', '-hiveconf', 'mapreduce.job.queuename=airflow', '-hiveconf', 'mapred.job.queue.name=airflow', '-hiveconf', 'tez.queue.name=airflow', '-f', '/tmp/airflow_hiveop_tst/tmptst' ] mock_popen.assert_called_with(hive_cmd, stdout=mock_subprocess.PIPE, stderr=mock_subprocess.STDOUT, cwd="/tmp/airflow_hiveop_tst", close_fds=True)
def test_beeline(self, mock_popen, mock_temp_dir): mock_subprocess = MockSubProcess() mock_popen.return_value = mock_subprocess mock_temp_dir.return_value = "tst" hive_cmd = [ 'beeline', '-u', '"jdbc:hive2://localhost:10000/default"', '-hiveconf', 'airflow.ctx.dag_id=test_dag_id', '-hiveconf', 'airflow.ctx.task_id=beeline_hql', '-hiveconf', 'airflow.ctx.execution_date=2015-01-01T00:00:00+00:00', '-hiveconf', 'airflow.ctx.dag_run_id=', '-hiveconf', 'airflow.ctx.dag_owner=airflow', '-hiveconf', 'airflow.ctx.dag_email=', '-hiveconf', 'mapreduce.job.queuename=airflow', '-hiveconf', 'mapred.job.queue.name=airflow', '-hiveconf', 'tez.queue.name=airflow', '-hiveconf', 'mapred.job.name=test_job_name', '-f', '/tmp/airflow_hiveop_tst/tmptst' ] op = HiveOperator(task_id='beeline_hql', hive_cli_conn_id='hive_cli_default', hql=self.hql, dag=self.dag, mapred_job_name="test_job_name") op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) mock_popen.assert_called_with(hive_cmd, stdout=mock_subprocess.PIPE, stderr=mock_subprocess.STDOUT, cwd="/tmp/airflow_hiveop_tst", close_fds=True)
def test_run_cli(self, mock_popen, mock_temp_dir): mock_subprocess = MockSubProcess() mock_popen.return_value = mock_subprocess mock_temp_dir.return_value = "test_run_cli" with mock.patch.dict('os.environ', { 'AIRFLOW_CTX_DAG_ID': 'test_dag_id', 'AIRFLOW_CTX_TASK_ID': 'test_task_id', 'AIRFLOW_CTX_EXECUTION_DATE': '2015-01-01T00:00:00+00:00', 'AIRFLOW_CTX_DAG_RUN_ID': '55', 'AIRFLOW_CTX_DAG_OWNER': 'airflow', 'AIRFLOW_CTX_DAG_EMAIL': '*****@*****.**', }): hook = MockHiveCliHook() hook.run_cli("SHOW DATABASES") hive_cmd = ['beeline', '-u', '"jdbc:hive2://localhost:10000/default"', '-hiveconf', 'airflow.ctx.dag_id=test_dag_id', '-hiveconf', 'airflow.ctx.task_id=test_task_id', '-hiveconf', 'airflow.ctx.execution_date=2015-01-01T00:00:00+00:00', '-hiveconf', 'airflow.ctx.dag_run_id=55', '-hiveconf', 'airflow.ctx.dag_owner=airflow', '-hiveconf', '[email protected]', '-hiveconf', 'mapreduce.job.queuename=airflow', '-hiveconf', 'mapred.job.queue.name=airflow', '-hiveconf', 'tez.queue.name=airflow', '-f', '/tmp/airflow_hiveop_test_run_cli/tmptest_run_cli'] mock_popen.assert_called_with( hive_cmd, stdout=mock_subprocess.PIPE, stderr=mock_subprocess.STDOUT, cwd="/tmp/airflow_hiveop_test_run_cli", close_fds=True )
def test_mysql_to_hive_tblproperties(self, mock_popen, mock_temp_dir): mock_subprocess = MockSubProcess() mock_popen.return_value = mock_subprocess mock_temp_dir.return_value = "test_mysql_to_hive" with mock.patch.dict('os.environ', self.env_vars): sql = "SELECT * FROM baby_names LIMIT 1000;" op = MySqlToHiveOperator( task_id='test_m2h', hive_cli_conn_id='hive_cli_default', sql=sql, hive_table='test_mysql_to_hive', recreate=True, delimiter=",", tblproperties={'test_property': 'test_value'}, dag=self.dag) op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) hive_cmd = ['beeline', '-u', '"jdbc:hive2://localhost:10000/default"', '-hiveconf', 'airflow.ctx.dag_id=unit_test_dag', '-hiveconf', 'airflow.ctx.task_id=test_m2h', '-hiveconf', 'airflow.ctx.execution_date=2015-01-01T00:00:00+00:00', '-hiveconf', 'airflow.ctx.dag_run_id=55', '-hiveconf', 'airflow.ctx.dag_owner=airflow', '-hiveconf', '[email protected]', '-hiveconf', 'mapreduce.job.queuename=airflow', '-hiveconf', 'mapred.job.queue.name=airflow', '-hiveconf', 'tez.queue.name=airflow', '-f', '/tmp/airflow_hiveop_test_mysql_to_hive/tmptest_mysql_to_hive'] mock_popen.assert_called_with( hive_cmd, stdout=mock_subprocess.PIPE, stderr=mock_subprocess.STDOUT, cwd="/tmp/airflow_hiveop_test_mysql_to_hive", close_fds=True )
def test_execute_bteq_runcmd_return_last_line(self, mock_tmpfile, mock_tmpdir, mock_popen): # Givens mock_subprocess = MockSubProcess(output=self._bteq_subprocess_output) mock_subprocess.returncode = 0 mock_popen.return_value = mock_subprocess mock_tmpdir.return_value.__enter__.return_value = '/tmp/airflowtmp_ttu_bteq' mock_tmpfile.return_value.__enter__.return_value.name = 'test.bteq' # When hook = TtuHook(ttu_conn_id='ttu_default') # Then res = hook.execute_bteq(bteq="", xcom_push_flag=True) self.assertEqual( "*** RC (return code) = 0", res, )
def test_execute_bteq_runcmd(self, mock_tmpfile, mock_tmpdir, mock_popen): # Given mock_subprocess = MockSubProcess() mock_subprocess.returncode = 0 mock_popen.return_value = mock_subprocess mock_tmpdir.return_value.__enter__.return_value = '/tmp/airflowtmp_ttu_bteq' mock_tmpfile.return_value.__enter__.return_value.name = 'test.bteq' # When hook = TtuHook(ttu_conn_id='ttu_default') hook.execute_bteq(bteq="") # Then mock_popen.assert_called_with(['bteq'], stdin=mock.ANY, stdout=mock_subprocess.PIPE, stderr=mock_subprocess.STDOUT, cwd='/tmp/airflowtmp_ttu_bteq', preexec_fn=mock.ANY)
def test_execute_bteq_runcmd_error_noraise(self, mock_tmpfile, mock_tmpdir, mock_popen): # Givens mock_subprocess = MockSubProcess( output=self._bteq_error_no_failure_subprocess_output) mock_subprocess.returncode = 0 mock_popen.return_value = mock_subprocess mock_tmpdir.return_value.__enter__.return_value = '/tmp/airflowtmp_ttu_bteq' mock_tmpfile.return_value.__enter__.return_value.name = 'test.bteq' # When hook = TtuHook(ttu_conn_id='ttu_default') # Then with self.assertLogs(level="INFO") as cm: hook.execute_bteq(bteq="") self.assertEqual( "INFO:airflow.providers.teradata.hooks.ttu.TtuHook:BTEQ command exited with return code 0", cm.output[-1], )
def test_execute_bteq_runcmd_error_raise(self, mock_tmpfile, mock_tmpdir, mock_popen): # Given mock_subprocess = MockSubProcess( output=self._bteq_failure_subprocess_output) mock_subprocess.returncode = 311 mock_popen.return_value = mock_subprocess mock_tmpdir.return_value.__enter__.return_value = '/tmp/airflowtmp_ttu_bteq' mock_tmpfile.return_value.__enter__.return_value.name = 'test.bteq' # When hook = TtuHook(ttu_conn_id='ttu_default') # Then with self.assertRaises(AirflowException) as cm: hook.execute_bteq(bteq="") msg = ( "BTEQ command exited with return code 311 because of " "*** Failure 3706 Syntax error: expected something between '(' and the string 'test'" ) self.assertEqual(str(cm.exception), msg)
def test_mysql_to_hive_partition(self, mock_popen, mock_temp_dir): mock_subprocess = MockSubProcess() mock_popen.return_value = mock_subprocess mock_temp_dir.return_value = "test_mysql_to_hive_part" with mock.patch.dict('os.environ', self.env_vars): sql = "SELECT * FROM baby_names LIMIT 1000;" t = MySqlToHiveTransfer(task_id='test_m2h', hive_cli_conn_id='beeline_default', sql=sql, hive_table='test_mysql_to_hive_part', partition={'ds': DEFAULT_DATE_DS}, recreate=False, create=True, delimiter=",", dag=self.dag) t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) hive_cmd = [ u'beeline', u'-u', u'"jdbc:hive2://localhost:10000/default"', u'-hiveconf', u'[email protected]', u'-hiveconf', u'airflow.ctx.dag_id=test_dag_id', u'-hiveconf', u'airflow.ctx.dag_owner=airflow', u'-hiveconf', u'airflow.ctx.dag_run_id=55', u'-hiveconf', u'airflow.ctx.execution_date=2015-01-01T00:00:00+00:00', u'-hiveconf', u'airflow.ctx.task_id=test_task_id', u'-hiveconf', u'mapreduce.job.queuename=airflow', u'-hiveconf', u'mapred.job.queue.name=airflow', u'-hiveconf', u'tez.queue.name=airflow', u'-f', u'/tmp/airflow_hiveop_test_mysql_to_hive_part/tmptest_mysql_to_hive_part' ] mock_popen.assert_called_with( hive_cmd, stdout=mock_subprocess.PIPE, stderr=mock_subprocess.STDOUT, cwd=u"/tmp/airflow_hiveop_test_mysql_to_hive_part", close_fds=True)
def test_run_cli_with_hive_conf(self, mock_popen): hql = ( "set key;\n" "set airflow.ctx.dag_id;\nset airflow.ctx.dag_run_id;\n" "set airflow.ctx.task_id;\nset airflow.ctx.execution_date;\n" ) dag_id_ctx_var_name = AIRFLOW_VAR_NAME_FORMAT_MAPPING['AIRFLOW_CONTEXT_DAG_ID']['env_var_format'] task_id_ctx_var_name = AIRFLOW_VAR_NAME_FORMAT_MAPPING['AIRFLOW_CONTEXT_TASK_ID']['env_var_format'] execution_date_ctx_var_name = AIRFLOW_VAR_NAME_FORMAT_MAPPING['AIRFLOW_CONTEXT_EXECUTION_DATE'][ 'env_var_format' ] dag_run_id_ctx_var_name = AIRFLOW_VAR_NAME_FORMAT_MAPPING['AIRFLOW_CONTEXT_DAG_RUN_ID'][ 'env_var_format' ] mock_output = [ 'Connecting to jdbc:hive2://localhost:10000/default', 'log4j:WARN No appenders could be found for logger (org.apache.hive.jdbc.Utils).', 'log4j:WARN Please initialize the log4j system properly.', 'log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.', 'Connected to: Apache Hive (version 1.2.1.2.3.2.0-2950)', 'Driver: Hive JDBC (version 1.2.1.spark2)', 'Transaction isolation: TRANSACTION_REPEATABLE_READ', '0: jdbc:hive2://localhost:10000/default> USE default;', 'No rows affected (0.37 seconds)', '0: jdbc:hive2://localhost:10000/default> set key;', '+------------+--+', '| set |', '+------------+--+', '| key=value |', '+------------+--+', '1 row selected (0.133 seconds)', '0: jdbc:hive2://localhost:10000/default> set airflow.ctx.dag_id;', '+---------------------------------+--+', '| set |', '+---------------------------------+--+', '| airflow.ctx.dag_id=test_dag_id |', '+---------------------------------+--+', '1 row selected (0.008 seconds)', '0: jdbc:hive2://localhost:10000/default> set airflow.ctx.dag_run_id;', '+-----------------------------------------+--+', '| set |', '+-----------------------------------------+--+', '| airflow.ctx.dag_run_id=test_dag_run_id |', '+-----------------------------------------+--+', '1 row selected (0.007 seconds)', '0: jdbc:hive2://localhost:10000/default> set airflow.ctx.task_id;', '+-----------------------------------+--+', '| set |', '+-----------------------------------+--+', '| airflow.ctx.task_id=test_task_id |', '+-----------------------------------+--+', '1 row selected (0.009 seconds)', '0: jdbc:hive2://localhost:10000/default> set airflow.ctx.execution_date;', '+-------------------------------------------------+--+', '| set |', '+-------------------------------------------------+--+', '| airflow.ctx.execution_date=test_execution_date |', '+-------------------------------------------------+--+', '1 row selected (0.006 seconds)', '0: jdbc:hive2://localhost:10000/default> ', '0: jdbc:hive2://localhost:10000/default> ', 'Closing: 0: jdbc:hive2://localhost:10000/default', '', ] with mock.patch.dict( 'os.environ', { dag_id_ctx_var_name: 'test_dag_id', task_id_ctx_var_name: 'test_task_id', execution_date_ctx_var_name: 'test_execution_date', dag_run_id_ctx_var_name: 'test_dag_run_id', }, ): hook = MockHiveCliHook() mock_popen.return_value = MockSubProcess(output=mock_output) output = hook.run_cli(hql=hql, hive_conf={'key': 'value'}) process_inputs = " ".join(mock_popen.call_args_list[0][0][0]) self.assertIn('value', process_inputs) self.assertIn('test_dag_id', process_inputs) self.assertIn('test_task_id', process_inputs) self.assertIn('test_execution_date', process_inputs) self.assertIn('test_dag_run_id', process_inputs) self.assertIn('value', output) self.assertIn('test_dag_id', output) self.assertIn('test_task_id', output) self.assertIn('test_execution_date', output) self.assertIn('test_dag_run_id', output)
def test_mysql_to_hive_verify_loaded_values(self, mock_popen, mock_temp_dir): mock_subprocess = MockSubProcess() mock_popen.return_value = mock_subprocess mock_temp_dir.return_value = "test_mysql_to_hive" mysql_table = 'test_mysql_to_hive' hive_table = 'test_mysql_to_hive' hook = MySqlHook() try: minmax = ( 255, 65535, 16777215, 4294967295, 18446744073709551615, -128, -32768, -8388608, -2147483648, -9223372036854775808, ) with hook.get_conn() as conn: conn.execute(f"DROP TABLE IF EXISTS {mysql_table}") conn.execute(""" CREATE TABLE {} ( c0 TINYINT UNSIGNED, c1 SMALLINT UNSIGNED, c2 MEDIUMINT UNSIGNED, c3 INT UNSIGNED, c4 BIGINT UNSIGNED, c5 TINYINT, c6 SMALLINT, c7 MEDIUMINT, c8 INT, c9 BIGINT ) """.format(mysql_table)) conn.execute(""" INSERT INTO {} VALUES ( {}, {}, {}, {}, {}, {}, {}, {}, {}, {} ) """.format(mysql_table, *minmax)) with mock.patch.dict('os.environ', self.env_vars): op = MySqlToHiveOperator( task_id='test_m2h', hive_cli_conn_id='hive_cli_default', sql=f"SELECT * FROM {mysql_table}", hive_table=hive_table, recreate=True, delimiter=",", dag=self.dag, ) op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) mock_cursor = MockConnectionCursor() mock_cursor.iterable = [minmax] hive_hook = MockHiveServer2Hook(connection_cursor=mock_cursor) result = hive_hook.get_records(f"SELECT * FROM {hive_table}") assert result[0] == minmax hive_cmd = [ 'beeline', '-u', '"jdbc:hive2://localhost:10000/default"', '-hiveconf', 'airflow.ctx.dag_id=unit_test_dag', '-hiveconf', 'airflow.ctx.task_id=test_m2h', '-hiveconf', 'airflow.ctx.execution_date=2015-01-01T00:00:00+00:00', '-hiveconf', 'airflow.ctx.dag_run_id=55', '-hiveconf', 'airflow.ctx.dag_owner=airflow', '-hiveconf', '[email protected]', '-hiveconf', 'mapreduce.job.queuename=airflow', '-hiveconf', 'mapred.job.queue.name=airflow', '-hiveconf', 'tez.queue.name=airflow', '-f', '/tmp/airflow_hiveop_test_mysql_to_hive/tmptest_mysql_to_hive', ] mock_popen.assert_called_with( hive_cmd, stdout=mock_subprocess.PIPE, stderr=mock_subprocess.STDOUT, cwd="/tmp/airflow_hiveop_test_mysql_to_hive", close_fds=True, ) finally: with hook.get_conn() as conn: conn.execute(f"DROP TABLE IF EXISTS {mysql_table}")
def test_mysql_to_hive_verify_csv_special_char(self, mock_popen, mock_temp_dir): mock_subprocess = MockSubProcess() mock_popen.return_value = mock_subprocess mock_temp_dir.return_value = "test_mysql_to_hive" mysql_table = 'test_mysql_to_hive' hive_table = 'test_mysql_to_hive' hook = MySqlHook() try: db_record = ('c0', '["true"]') with hook.get_conn() as conn: conn.execute(f"DROP TABLE IF EXISTS {mysql_table}") conn.execute(""" CREATE TABLE {} ( c0 VARCHAR(25), c1 VARCHAR(25) ) """.format(mysql_table)) conn.execute(""" INSERT INTO {} VALUES ( '{}', '{}' ) """.format(mysql_table, *db_record)) with mock.patch.dict('os.environ', self.env_vars): import unicodecsv as csv op = MySqlToHiveOperator( task_id='test_m2h', hive_cli_conn_id='hive_cli_default', sql=f"SELECT * FROM {mysql_table}", hive_table=hive_table, recreate=True, delimiter=",", quoting=csv.QUOTE_NONE, quotechar='', escapechar='@', dag=self.dag, ) op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) mock_cursor = MockConnectionCursor() mock_cursor.iterable = [('c0', '["true"]'), (2, 2)] hive_hook = MockHiveServer2Hook(connection_cursor=mock_cursor) result = hive_hook.get_records(f"SELECT * FROM {hive_table}") assert result[0] == db_record hive_cmd = [ 'beeline', '-u', '"jdbc:hive2://localhost:10000/default"', '-hiveconf', 'airflow.ctx.dag_id=unit_test_dag', '-hiveconf', 'airflow.ctx.task_id=test_m2h', '-hiveconf', 'airflow.ctx.execution_date=2015-01-01T00:00:00+00:00', '-hiveconf', 'airflow.ctx.dag_run_id=55', '-hiveconf', 'airflow.ctx.dag_owner=airflow', '-hiveconf', '[email protected]', '-hiveconf', 'mapreduce.job.queuename=airflow', '-hiveconf', 'mapred.job.queue.name=airflow', '-hiveconf', 'tez.queue.name=airflow', '-f', '/tmp/airflow_hiveop_test_mysql_to_hive/tmptest_mysql_to_hive', ] mock_popen.assert_called_with( hive_cmd, stdout=mock_subprocess.PIPE, stderr=mock_subprocess.STDOUT, cwd="/tmp/airflow_hiveop_test_mysql_to_hive", close_fds=True, ) finally: with hook.get_conn() as conn: conn.execute(f"DROP TABLE IF EXISTS {mysql_table}")