def test_mysql_to_hive_verify_loaded_values(self): mysql_conn_id = 'airflow_ci' mysql_table = 'test_mysql_to_hive' hive_table = 'test_mysql_to_hive' from airflow.hooks.mysql_hook import MySqlHook m = MySqlHook(mysql_conn_id) try: minmax = (255, 65535, 16777215, 4294967295, 18446744073709551615, -128, -32768, -8388608, -2147483648, -9223372036854775808) with m.get_conn() as c: c.execute("DROP TABLE IF EXISTS {}".format(mysql_table)) c.execute(""" CREATE TABLE {} ( c0 TINYINT UNSIGNED, c1 SMALLINT UNSIGNED, c2 MEDIUMINT UNSIGNED, c3 INT UNSIGNED, c4 BIGINT UNSIGNED, c5 TINYINT, c6 SMALLINT, c7 MEDIUMINT, c8 INT, c9 BIGINT ) """.format(mysql_table)) c.execute(""" INSERT INTO {} VALUES ( {}, {}, {}, {}, {}, {}, {}, {}, {}, {} ) """.format(mysql_table, *minmax)) from airflow.operators.mysql_to_hive import MySqlToHiveTransfer t = MySqlToHiveTransfer(task_id='test_m2h', mysql_conn_id=mysql_conn_id, hive_cli_conn_id='beeline_default', sql="SELECT * FROM {}".format(mysql_table), hive_table=hive_table, recreate=True, delimiter=",", dag=self.dag) t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) from airflow.hooks.hive_hooks import HiveServer2Hook h = HiveServer2Hook() r = h.get_records("SELECT * FROM {}".format(hive_table)) self.assertEqual(r[0], minmax) finally: with m.get_conn() as c: c.execute("DROP TABLE IF EXISTS {}".format(mysql_table))
def test_mysql_to_hive(self): from airflow.operators.mysql_to_hive import MySqlToHiveTransfer sql = "SELECT * FROM baby_names LIMIT 1000;" op = MySqlToHiveTransfer( task_id='test_m2h', hive_cli_conn_id='hive_cli_default', sql=sql, hive_table='test_mysql_to_hive', recreate=True, delimiter=",", dag=self.dag) op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_mysql_to_hive(self): from airflow.operators.mysql_to_hive import MySqlToHiveTransfer sql = "SELECT * FROM baby_names LIMIT 1000;" t = MySqlToHiveTransfer( task_id='test_m2h', hive_cli_conn_id='hive_cli_default', sql=sql, hive_table='test_mysql_to_hive', recreate=True, delimiter=",", dag=self.dag) t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_mysql_to_hive_tblproperties(self): from airflow.operators.mysql_to_hive import MySqlToHiveTransfer sql = "SELECT * FROM baby_names LIMIT 1000;" t = MySqlToHiveTransfer( task_id='test_m2h', hive_cli_conn_id='beeline_default', sql=sql, hive_table='test_mysql_to_hive', recreate=True, delimiter=",", tblproperties={'test_property': 'test_value'}, dag=self.dag) t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_mysql_to_hive(self): # import airflow.operators from airflow.operators.mysql_to_hive import MySqlToHiveTransfer sql = "SELECT * FROM baby_names LIMIT 1000;" t = MySqlToHiveTransfer(task_id='test_m2h', mysql_conn_id='airflow_ci', hive_cli_conn_id='beeline_default', sql=sql, hive_table='test_mysql_to_hive', recreate=True, delimiter=",", dag=self.dag) t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, force=True)
def test_mysql_to_hive_partition(self): from airflow.operators.mysql_to_hive import MySqlToHiveTransfer sql = "SELECT * FROM baby_names LIMIT 1000;" t = MySqlToHiveTransfer( task_id='test_m2h', hive_cli_conn_id='beeline_default', sql=sql, hive_table='test_mysql_to_hive_part', partition={'ds': DEFAULT_DATE_DS}, recreate=False, create=True, delimiter=",", dag=self.dag) t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_mysql_to_hive_tblproperties(self): # import airflow.operators from airflow.operators.mysql_to_hive import MySqlToHiveTransfer sql = "SELECT * FROM baby_names LIMIT 1000;" t = MySqlToHiveTransfer( task_id='test_m2h', mysql_conn_id='airflow_ci', hive_cli_conn_id='beeline_default', sql=sql, hive_table='test_mysql_to_hive', recreate=True, delimiter=",", tblproperties={'test_property':'test_value'}, dag=self.dag) t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_mysql_to_hive_partition(self): from airflow.operators.mysql_to_hive import MySqlToHiveTransfer sql = "SELECT * FROM baby_names LIMIT 1000;" t = MySqlToHiveTransfer( task_id='test_m2h', mysql_conn_id='airflow_ci', hive_cli_conn_id='beeline_default', sql=sql, hive_table='test_mysql_to_hive_part', partition={'ds': DEFAULT_DATE_DS}, recreate=False, create=True, delimiter=",", dag=self.dag) t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, force=True)
def test_mysql_to_hive_verify_csv_special_char(self): mysql_table = 'test_mysql_to_hive' hive_table = 'test_mysql_to_hive' from airflow.hooks.mysql_hook import MySqlHook hook = MySqlHook() try: db_record = ( 'c0', '["true"]' ) with hook.get_conn() as conn: conn.execute("DROP TABLE IF EXISTS {}".format(mysql_table)) conn.execute(""" CREATE TABLE {} ( c0 VARCHAR(25), c1 VARCHAR(25) ) """.format(mysql_table)) conn.execute(""" INSERT INTO {} VALUES ( '{}', '{}' ) """.format(mysql_table, *db_record)) from airflow.operators.mysql_to_hive import MySqlToHiveTransfer import unicodecsv as csv op = MySqlToHiveTransfer( task_id='test_m2h', hive_cli_conn_id='hive_cli_default', sql="SELECT * FROM {}".format(mysql_table), hive_table=hive_table, recreate=True, delimiter=",", quoting=csv.QUOTE_NONE, quotechar='', escapechar='@', dag=self.dag) op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) from airflow.providers.apache.hive.hooks.hive import HiveServer2Hook hive_hook = HiveServer2Hook() result = hive_hook.get_records("SELECT * FROM {}".format(hive_table)) self.assertEqual(result[0], db_record) finally: with hook.get_conn() as conn: conn.execute("DROP TABLE IF EXISTS {}".format(mysql_table))
def test_mysql_to_hive_type_conversion(self, mock_load_file): mysql_conn_id = 'airflow_ci' mysql_table = 'test_mysql_to_hive' from airflow.hooks.mysql_hook import MySqlHook m = MySqlHook(mysql_conn_id) try: with m.get_conn() as c: c.execute("DROP TABLE IF EXISTS {}".format(mysql_table)) c.execute(""" CREATE TABLE {} ( c0 TINYINT, c1 SMALLINT, c2 MEDIUMINT, c3 INT, c4 BIGINT, c5 TIMESTAMP ) """.format(mysql_table)) from airflow.operators.mysql_to_hive import MySqlToHiveTransfer t = MySqlToHiveTransfer(task_id='test_m2h', mysql_conn_id=mysql_conn_id, hive_cli_conn_id='beeline_default', sql="SELECT * FROM {}".format(mysql_table), hive_table='test_mysql_to_hive', dag=self.dag) t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) mock_load_file.assert_called_once() d = OrderedDict() d["c0"] = "SMALLINT" d["c1"] = "INT" d["c2"] = "INT" d["c3"] = "BIGINT" d["c4"] = "DECIMAL(38,0)" d["c5"] = "TIMESTAMP" self.assertEqual(mock_load_file.call_args[1]["field_dict"], d) finally: with m.get_conn() as c: c.execute("DROP TABLE IF EXISTS {}".format(mysql_table))
def test_mysql_to_hive_type_conversion(self, mock_load_file): mysql_table = 'test_mysql_to_hive' from airflow.hooks.mysql_hook import MySqlHook hook = MySqlHook() try: with hook.get_conn() as conn: conn.execute("DROP TABLE IF EXISTS {}".format(mysql_table)) conn.execute(""" CREATE TABLE {} ( c0 TINYINT, c1 SMALLINT, c2 MEDIUMINT, c3 INT, c4 BIGINT, c5 TIMESTAMP ) """.format(mysql_table)) from airflow.operators.mysql_to_hive import MySqlToHiveTransfer op = MySqlToHiveTransfer( task_id='test_m2h', hive_cli_conn_id='hive_cli_default', sql="SELECT * FROM {}".format(mysql_table), hive_table='test_mysql_to_hive', dag=self.dag) op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) assert mock_load_file.call_count == 1 ordered_dict = OrderedDict() ordered_dict["c0"] = "SMALLINT" ordered_dict["c1"] = "INT" ordered_dict["c2"] = "INT" ordered_dict["c3"] = "BIGINT" ordered_dict["c4"] = "DECIMAL(38,0)" ordered_dict["c5"] = "TIMESTAMP" self.assertEqual(mock_load_file.call_args[1]["field_dict"], ordered_dict) finally: with hook.get_conn() as conn: conn.execute("DROP TABLE IF EXISTS {}".format(mysql_table))
def test_mysql_to_hive_type_conversion(self, mock_load_file): mysql_conn_id = 'airflow_ci' mysql_table = 'test_mysql_to_hive' from airflow.hooks.mysql_hook import MySqlHook m = MySqlHook(mysql_conn_id) try: with m.get_conn() as c: c.execute("DROP TABLE IF EXISTS {}".format(mysql_table)) c.execute(""" CREATE TABLE {} ( c0 TINYINT, c1 SMALLINT, c2 MEDIUMINT, c3 INT, c4 BIGINT ) """.format(mysql_table)) from airflow.operators.mysql_to_hive import MySqlToHiveTransfer t = MySqlToHiveTransfer( task_id='test_m2h', mysql_conn_id=mysql_conn_id, hive_cli_conn_id='beeline_default', sql="SELECT * FROM {}".format(mysql_table), hive_table='test_mysql_to_hive', dag=self.dag) t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) mock_load_file.assert_called_once() d = OrderedDict() d["c0"] = "SMALLINT" d["c1"] = "INT" d["c2"] = "INT" d["c3"] = "BIGINT" d["c4"] = "DECIMAL(38,0)" self.assertEqual(mock_load_file.call_args[1]["field_dict"], d) finally: with m.get_conn() as c: c.execute("DROP TABLE IF EXISTS {}".format(mysql_table))
def test_mysql_to_hive_tblproperties(self, mock_popen, mock_temp_dir): mock_subprocess = MockSubProcess() mock_popen.return_value = mock_subprocess mock_temp_dir.return_value = "test_mysql_to_hive_tblproperties" with mock.patch.dict('os.environ', self.env_vars): sql = "SELECT * FROM baby_names LIMIT 1000;" t = MySqlToHiveTransfer( task_id='test_m2h', hive_cli_conn_id='beeline_default', sql=sql, hive_table='test_mysql_to_hive_tblproperties', recreate=True, delimiter=",", tblproperties={'test_property': 'test_value'}, dag=self.dag) t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) hive_cmd = [ u'beeline', u'-u', u'"jdbc:hive2://localhost:10000/default"', u'-hiveconf', u'[email protected]', u'-hiveconf', u'airflow.ctx.dag_id=test_dag_id', u'-hiveconf', u'airflow.ctx.dag_owner=airflow', u'-hiveconf', u'airflow.ctx.dag_run_id=55', u'-hiveconf', u'airflow.ctx.execution_date=2015-01-01T00:00:00+00:00', u'-hiveconf', u'airflow.ctx.task_id=test_task_id', u'-hiveconf', u'mapreduce.job.queuename=airflow', u'-hiveconf', u'mapred.job.queue.name=airflow', u'-hiveconf', u'tez.queue.name=airflow', u'-f', u'/tmp/airflow_hiveop_test_mysql_to_hive_tblproperties/' u'tmptest_mysql_to_hive_tblproperties' ] mock_popen.assert_called_with( hive_cmd, stdout=mock_subprocess.PIPE, stderr=mock_subprocess.STDOUT, cwd=u"/tmp/airflow_hiveop_test_mysql_to_hive_tblproperties", close_fds=True)
def test_mysql_to_hive_verify_loaded_values(self): mysql_conn_id = 'airflow_ci' mysql_table = 'test_mysql_to_hive' hive_table = 'test_mysql_to_hive' from airflow.hooks.mysql_hook import MySqlHook m = MySqlHook(mysql_conn_id) try: minmax = ( 255, 65535, 16777215, 4294967295, 18446744073709551615, -128, -32768, -8388608, -2147483648, -9223372036854775808 ) with m.get_conn() as c: c.execute("DROP TABLE IF EXISTS {}".format(mysql_table)) c.execute(""" CREATE TABLE {} ( c0 TINYINT UNSIGNED, c1 SMALLINT UNSIGNED, c2 MEDIUMINT UNSIGNED, c3 INT UNSIGNED, c4 BIGINT UNSIGNED, c5 TINYINT, c6 SMALLINT, c7 MEDIUMINT, c8 INT, c9 BIGINT ) """.format(mysql_table)) c.execute(""" INSERT INTO {} VALUES ( {}, {}, {}, {}, {}, {}, {}, {}, {}, {} ) """.format(mysql_table, *minmax)) from airflow.operators.mysql_to_hive import MySqlToHiveTransfer t = MySqlToHiveTransfer( task_id='test_m2h', mysql_conn_id=mysql_conn_id, hive_cli_conn_id='beeline_default', sql="SELECT * FROM {}".format(mysql_table), hive_table=hive_table, recreate=True, delimiter=",", dag=self.dag) t.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) from airflow.hooks.hive_hooks import HiveServer2Hook h = HiveServer2Hook() r = h.get_records("SELECT * FROM {}".format(hive_table)) self.assertEqual(r[0], minmax) finally: with m.get_conn() as c: c.execute("DROP TABLE IF EXISTS {}".format(mysql_table))
def test_mysql_to_hive_verify_loaded_values(self, mock_popen, mock_temp_dir): mock_subprocess = MockSubProcess() mock_popen.return_value = mock_subprocess mock_temp_dir.return_value = "test_mysql_to_hive" mysql_table = 'test_mysql_to_hive' hive_table = 'test_mysql_to_hive' hook = MySqlHook() try: minmax = (255, 65535, 16777215, 4294967295, 18446744073709551615, -128, -32768, -8388608, -2147483648, -9223372036854775808) with hook.get_conn() as conn: conn.execute("DROP TABLE IF EXISTS {}".format(mysql_table)) conn.execute(""" CREATE TABLE {} ( c0 TINYINT UNSIGNED, c1 SMALLINT UNSIGNED, c2 MEDIUMINT UNSIGNED, c3 INT UNSIGNED, c4 BIGINT UNSIGNED, c5 TINYINT, c6 SMALLINT, c7 MEDIUMINT, c8 INT, c9 BIGINT ) """.format(mysql_table)) conn.execute(""" INSERT INTO {} VALUES ( {}, {}, {}, {}, {}, {}, {}, {}, {}, {} ) """.format(mysql_table, *minmax)) with mock.patch.dict('os.environ', self.env_vars): op = MySqlToHiveTransfer( task_id='test_m2h', hive_cli_conn_id='hive_cli_default', sql="SELECT * FROM {}".format(mysql_table), hive_table=hive_table, recreate=True, delimiter=",", dag=self.dag) op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) mock_cursor = MockConnectionCursor() mock_cursor.iterable = [minmax] hive_hook = MockHiveServer2Hook(connection_cursor=mock_cursor) result = hive_hook.get_records( "SELECT * FROM {}".format(hive_table)) self.assertEqual(result[0], minmax) hive_cmd = [ u'hive', u'-hiveconf', u'[email protected]', u'-hiveconf', u'airflow.ctx.dag_id=test_dag_id', u'-hiveconf', u'airflow.ctx.dag_owner=airflow', u'-hiveconf', u'airflow.ctx.dag_run_id=55', u'-hiveconf', u'airflow.ctx.execution_date=2015-01-01T00:00:00+00:00', u'-hiveconf', u'airflow.ctx.task_id=test_task_id', u'-hiveconf', u'mapreduce.job.queuename=airflow', u'-hiveconf', u'mapred.job.queue.name=airflow', u'-hiveconf', u'tez.queue.name=airflow', u'-f', u'/tmp/airflow_hiveop_test_mysql_to_hive/tmptest_mysql_to_hive' ] mock_popen.assert_called_with( hive_cmd, stdout=mock_subprocess.PIPE, stderr=mock_subprocess.STDOUT, cwd="/tmp/airflow_hiveop_test_mysql_to_hive", close_fds=True) finally: with hook.get_conn() as conn: conn.execute("DROP TABLE IF EXISTS {}".format(mysql_table))