def test_mysql_hook_test_bulk_load(self, client): with MySqlContext(client): records = ("foo", "bar", "baz") import tempfile with tempfile.NamedTemporaryFile() as f: f.write("\n".join(records).encode('utf8')) f.flush() hook = MySqlHook('airflow_db') with closing(hook.get_conn()) as conn: with closing(conn.cursor()) as cursor: cursor.execute( """ CREATE TABLE IF NOT EXISTS test_airflow ( dummy VARCHAR(50) ) """ ) cursor.execute("TRUNCATE TABLE test_airflow") hook.bulk_load("test_airflow", f.name) cursor.execute("SELECT dummy FROM test_airflow") results = tuple(result[0] for result in cursor.fetchall()) assert sorted(results) == sorted(records)
def test_mysql_hook_test_bulk_load(self): records = ("foo", "bar", "baz") import tempfile with tempfile.NamedTemporaryFile() as f: f.write("\n".join(records).encode('utf8')) f.flush() hook = MySqlHook('airflow_db') with hook.get_conn() as conn: conn.execute(""" CREATE TABLE IF NOT EXISTS test_airflow ( dummy VARCHAR(50) ) """) conn.execute("TRUNCATE TABLE test_airflow") hook.bulk_load("test_airflow", f.name) conn.execute("SELECT dummy FROM test_airflow") results = tuple(result[0] for result in conn.fetchall()) self.assertEqual(sorted(results), sorted(records))
def execute(self, context): hive = HiveServer2Hook(hiveserver2_conn_id=self.hiveserver2_conn_id) self.log.info("Extracting data from Hive: %s", self.sql) hive_conf = context_to_airflow_vars(context) if self.hive_conf: hive_conf.update(self.hive_conf) if self.bulk_load: tmp_file = NamedTemporaryFile() hive.to_csv( self.sql, tmp_file.name, delimiter='\t', lineterminator='\n', output_header=False, hive_conf=hive_conf, ) else: hive_results = hive.get_records(self.sql, hive_conf=hive_conf) mysql = MySqlHook(mysql_conn_id=self.mysql_conn_id) if self.mysql_preoperator: self.log.info("Running MySQL preoperator") mysql.run(self.mysql_preoperator) self.log.info("Inserting rows into MySQL") if self.bulk_load: mysql.bulk_load(table=self.mysql_table, tmp_file=tmp_file.name) tmp_file.close() else: mysql.insert_rows(table=self.mysql_table, rows=hive_results) if self.mysql_postoperator: self.log.info("Running MySQL postoperator") mysql.run(self.mysql_postoperator) self.log.info("Done.")