Beispiel #1
0
    def test_mysql_hook_test_bulk_load(self, client):
        with MySqlContext(client):
            records = ("foo", "bar", "baz")

            import tempfile

            with tempfile.NamedTemporaryFile() as f:
                f.write("\n".join(records).encode('utf8'))
                f.flush()

                hook = MySqlHook('airflow_db')
                with closing(hook.get_conn()) as conn:
                    with closing(conn.cursor()) as cursor:
                        cursor.execute(
                            """
                            CREATE TABLE IF NOT EXISTS test_airflow (
                                dummy VARCHAR(50)
                            )
                        """
                        )
                        cursor.execute("TRUNCATE TABLE test_airflow")
                        hook.bulk_load("test_airflow", f.name)
                        cursor.execute("SELECT dummy FROM test_airflow")
                        results = tuple(result[0] for result in cursor.fetchall())
                        assert sorted(results) == sorted(records)
Beispiel #2
0
    def test_mysql_hook_test_bulk_load(self):
        records = ("foo", "bar", "baz")

        import tempfile
        with tempfile.NamedTemporaryFile() as f:
            f.write("\n".join(records).encode('utf8'))
            f.flush()

            hook = MySqlHook('airflow_db')
            with hook.get_conn() as conn:
                conn.execute("""
                    CREATE TABLE IF NOT EXISTS test_airflow (
                        dummy VARCHAR(50)
                    )
                """)
                conn.execute("TRUNCATE TABLE test_airflow")
                hook.bulk_load("test_airflow", f.name)
                conn.execute("SELECT dummy FROM test_airflow")
                results = tuple(result[0] for result in conn.fetchall())
                self.assertEqual(sorted(results), sorted(records))
    def execute(self, context):
        hive = HiveServer2Hook(hiveserver2_conn_id=self.hiveserver2_conn_id)

        self.log.info("Extracting data from Hive: %s", self.sql)
        hive_conf = context_to_airflow_vars(context)
        if self.hive_conf:
            hive_conf.update(self.hive_conf)
        if self.bulk_load:
            tmp_file = NamedTemporaryFile()
            hive.to_csv(
                self.sql,
                tmp_file.name,
                delimiter='\t',
                lineterminator='\n',
                output_header=False,
                hive_conf=hive_conf,
            )
        else:
            hive_results = hive.get_records(self.sql, hive_conf=hive_conf)

        mysql = MySqlHook(mysql_conn_id=self.mysql_conn_id)

        if self.mysql_preoperator:
            self.log.info("Running MySQL preoperator")
            mysql.run(self.mysql_preoperator)

        self.log.info("Inserting rows into MySQL")
        if self.bulk_load:
            mysql.bulk_load(table=self.mysql_table, tmp_file=tmp_file.name)
            tmp_file.close()
        else:
            mysql.insert_rows(table=self.mysql_table, rows=hive_results)

        if self.mysql_postoperator:
            self.log.info("Running MySQL postoperator")
            mysql.run(self.mysql_postoperator)

        self.log.info("Done.")