def execute(self, context): hive = HiveServer2Hook(hiveserver2_conn_id=self.hiveserver2_conn_id) logging.info("Extracting data from Hive") logging.info(self.sql) if self.bulk_load: tmpfile = NamedTemporaryFile() hive.to_csv(self.sql, tmpfile.name, delimiter='\t', lineterminator='\n', output_header=False) else: results = hive.get_records(self.sql) mysql = MySqlHook(mysql_conn_id=self.mysql_conn_id) if self.mysql_preoperator: logging.info("Running MySQL preoperator") mysql.run(self.mysql_preoperator) logging.info("Inserting rows into MySQL") if self.bulk_load: mysql.bulk_load(table=self.mysql_table, tmp_file=tmpfile.name) tmpfile.close() else: mysql.insert_rows(table=self.mysql_table, rows=results) if self.mysql_postoperator: logging.info("Running MySQL postoperator") mysql.run(self.mysql_postoperator) logging.info("Done.")
def execute(self, context): samba = SambaHook(samba_conn_id=self.samba_conn_id) hive = HiveServer2Hook(hiveserver2_conn_id=self.hiveserver2_conn_id) tmpfile = tempfile.NamedTemporaryFile() logging.info("Fetching file from Hive") hive.to_csv(hql=self.hql, csv_filepath=tmpfile.name) logging.info("Pushing to samba") samba.push_from_local(self.destination_filepath, tmpfile.name)
def execute(self, context): hive = HiveServer2Hook(hiveserver2_conn_id=self.hive_cli_conn_id) logging.info("Extracting data from Hive") results = hive.get_records(self.sql) mysql = MySqlHook(mysql_conn_id=self.mysql_conn_id) logging.info("Inserting rows into MySQL") mysql.insert_rows(table=self.mysql_table, rows=results)
def execute(self, context): hive = HiveServer2Hook(hiveserver2_conn_id=self.hiveserver2_conn_id) logging.info("Extracting data from Hive") logging.info(self.sql) results = hive.get_records(self.sql) mysql = MySqlHook(mysql_conn_id=self.mysql_conn_id) if self.mysql_preoperator: logging.info("Running MySQL preoperator") logging.info(self.mysql_preoperator) mysql.run(self.mysql_preoperator) logging.info("Inserting rows into MySQL") mysql.insert_rows(table=self.mysql_table, rows=results)