def execute(self, context): hive = HiveServer2Hook(hiveserver2_conn_id=self.hiveserver2_conn_id) logging.info("Extracting data from Hive") logging.info(self.sql) if self.bulk_load: tmpfile = NamedTemporaryFile() hive.to_csv(self.sql, tmpfile.name, delimiter='\t', lineterminator='\n', output_header=False) else: results = hive.get_records(self.sql) mysql = MySqlHook(mysql_conn_id=self.mysql_conn_id) if self.mysql_preoperator: logging.info("Running MySQL preoperator") mysql.run(self.mysql_preoperator) logging.info("Inserting rows into MySQL") if self.bulk_load: mysql.bulk_load(table=self.mysql_table, tmp_file=tmpfile.name) tmpfile.close() else: mysql.insert_rows(table=self.mysql_table, rows=results) if self.mysql_postoperator: logging.info("Running MySQL postoperator") mysql.run(self.mysql_postoperator) logging.info("Done.")
def execute(self, context): logging.info('Executing: ' + str(self.sql)) hook = MySqlHook(mysql_conn_id=self.mysql_conn_id) hook.run( self.sql, autocommit=self.autocommit, parameters=self.parameters)
def insert_or_delete_task_state(*args, **kargs): ti = kargs["ti"] ##current task instance insert_sql = kargs["insert_sql"] # insert upstream state sql delete_sql = kargs["delete_sql"] # delete upstream state sql upstream_task_id = kargs["up_id"] # upstream task id xcom_pull_key = kargs["xcom_key"] # key for xcom_pull mysql_conn_id = kargs["mysql_conn_id"] # conn id for database which stores state table actual_sql = "" ##method 1 of get the upstream task's state # up_state = ti.xcom_pull(key=xcom_pull_key, task_ids = upstream_task_id) upstream_task_list = kargs["task"].upstream_list logging.info("%s upstream task list is %s" % (kargs["task"], upstream_task_list)) upstream_task_state = ( upstream_task_list[0] .get_task_instances( session=settings.Session(), start_date=kargs["execution_date"], end_date=kargs["execution_date"] )[0] .state ) logging.info("%s upstream task(%s) state is %s" % (kargs["task"], upstream_task_list[0], upstream_task_state)) if upstream_task_state == "success": actual_sql = insert_sql elif upstream_task_state == "failed": actual_sql = delete_sql else: actual_sql = "show databases;" ## if not success or failed, do something effectiveless logging.info("upstream state is %s, actual update status sql is %s" % (str(upstream_task_state), str(actual_sql))) ##start to execute sql logging.info("Executing: %s" % (str(actual_sql))) hook = MySqlHook(mysql_conn_id=mysql_conn_id) hook.run(actual_sql)
def execute(self, context): hive = HiveServer2Hook(hiveserver2_conn_id=self.hive_cli_conn_id) logging.info("Extracting data from Hive") results = hive.get_records(self.sql) mysql = MySqlHook(mysql_conn_id=self.mysql_conn_id) if self.mysql_preoperator: logging.info("Running MySQL preoperator") mysql.run(self.mysql_preoperator) logging.info("Inserting rows into MySQL") mysql.insert_rows(table=self.mysql_table, rows=results)
def execute(self, context): hive = HiveServer2Hook(hiveserver2_conn_id=self.hiveserver2_conn_id) logging.info("Extracting data from Hive") results = hive.get_records(self.sql) mysql = MySqlHook(mysql_conn_id=self.mysql_conn_id) if self.mysql_preoperator: logging.info("Running MySQL preoperator") mysql.run(self.mysql_preoperator) logging.info("Inserting rows into MySQL") mysql.insert_rows(table=self.mysql_table, rows=results)
def execute(self, context): presto = PrestoHook(presto_conn_id=self.presto_conn_id) logging.info("Extracting data from Presto") logging.info(self.sql) results = hive.get_records(self.sql) mysql = MySqlHook(mysql_conn_id=self.mysql_conn_id) if self.mysql_preoperator: logging.info("Running MySQL preoperator") logging.info(self.mysql_preoperator) mysql.run(self.mysql_preoperator) logging.info("Inserting rows into MySQL") mysql.insert_rows(table=self.mysql_table, rows=results)
def execute(self, context): presto = PrestoHook(presto_conn_id=self.presto_conn_id) logging.info("Extracting data from Presto") logging.info(self.sql) results = presto.get_records(self.sql) mysql = MySqlHook(mysql_conn_id=self.mysql_conn_id) if self.mysql_preoperator: logging.info("Running MySQL preoperator") logging.info(self.mysql_preoperator) mysql.run(self.mysql_preoperator) logging.info("Inserting rows into MySQL") mysql.insert_rows(table=self.mysql_table, rows=results)
def post_execute(self, context): try: log_filepath = context["task_instance"].log_filepath.strip('.log') # parse the log file into a dict parsed = HiveLog.parselog(log_filepath) # captures the name of a parent dag in a subdag routine dag_ids = context['dag'].dag_id.split('.') if len(dag_ids) > 1: parsed['parent_dag_id'] = dag_ids[0] parsed['dag_id'] = dag_ids[1:].join('.') else: parsed['parent_dag_id'] = context['dag'].dag_id parsed['dag_id'] = parsed['parent_dag_id'] # Capture extra information from context parsed['task_id'] = context['task'].task_id parsed['input_date'] = context['ds'] parsed['owner'] = context['task'].owner # Create the table if it doesnt exist sql_hook = MySqlHook("airflow_db") sql_hook.run(table_creation_string()) # Create sql insertion strings and insert strings = table_insertion_strings(parsed) logging.info('Inserting honypot log metadata into database') for add_data_to_table_string in strings: sql_hook.run(add_data_to_table_string) logging.info('Completely log data insertion') sql_hook.run('COMMIT;') except Exception as e: logging.error("Honeypot post executor failed") logging.exception(e)
class MySqlOperator(BaseOperator): """ Executes sql code in a specific mysql database. """ __mapper_args__ = {'polymorphic_identity': 'MySqlOperator'} template_fields = ('sql', ) template_ext = ('.sql', ) @apply_defaults def __init__(self, sql, mysql_conn_id, *args, **kwargs): """ Parameters: mysql_conn_id: reference to a specific mysql database sql: the sql code you to be executed """ super(MySqlOperator, self).__init__(*args, **kwargs) self.hook = MySqlHook(mysql_conn_id=mysql_conn_id) self.sql = sql def execute(self, execution_date): logging.info('Executing: ' + self.sql) self.hook.run(self.sql)
def execute(self, context=None): metastore = HiveMetastoreHook(metastore_conn_id=self.metastore_conn_id) table = metastore.get_table(table_name=self.table) field_types = {col.name: col.type for col in table.sd.cols} exprs = { ('', 'count'): 'COUNT(*)' } for col, col_type in field_types.items(): d = {} if self.assignment_func: d = self.assignment_func(col, col_type) if d is None: d = self.get_default_exprs(col, col_type) else: d = self.get_default_exprs(col, col_type) exprs.update(d) exprs.update(self.extra_exprs) exprs = OrderedDict(exprs) exprs_str = ",\n ".join([ v + " AS " + k[0] + '__' + k[1] for k, v in exprs.items()]) where_clause = [ "{0} = '{1}'".format(k, v) for k, v in self.partition.items()] where_clause = " AND\n ".join(where_clause) sql = """ SELECT {exprs_str} FROM {self.table} WHERE {where_clause}; """.format(**locals()) hook = PrestoHook(presto_conn_id=self.presto_conn_id) logging.info('Executing SQL check: ' + sql) row = hook.get_first(hql=sql) logging.info("Record: " + str(row)) if not row: raise Exception("The query returned None") part_json = json.dumps(self.partition, sort_keys=True) logging.info("Deleting rows from previous runs if they exist") mysql = MySqlHook(self.mysql_conn_id) sql = """ SELECT 1 FROM hive_stats WHERE table_name='{self.table}' AND partition_repr='{part_json}' AND dttm='{self.dttm}' LIMIT 1; """.format(**locals()) if mysql.get_records(sql): sql = """ DELETE FROM hive_stats WHERE table_name='{self.table}' AND partition_repr='{part_json}' AND dttm='{self.dttm}'; """.format(**locals()) mysql.run(sql) logging.info("Pivoting and loading cells into the Airflow db") rows = [ (self.ds, self.dttm, self.table, part_json) + (r[0][0], r[0][1], r[1]) for r in zip(exprs, row)] mysql.insert_rows( table='hive_stats', rows=rows, target_fields=[ 'ds', 'dttm', 'table_name', 'partition_repr', 'col', 'metric', 'value', ] )
def execute(self, context=None): metastore = HiveMetastoreHook(metastore_conn_id=self.metastore_conn_id) table = metastore.get_table(table_name=self.table) field_types = {col.name: col.type for col in table.sd.cols} exprs = {('', 'count'): 'COUNT(*)'} for col, col_type in field_types.items(): d = {} if self.assignment_func: d = self.assignment_func(col, col_type) if d is None: d = self.get_default_exprs(col, col_type) else: d = self.get_default_exprs(col, col_type) exprs.update(d) exprs.update(self.extra_exprs) exprs = OrderedDict(exprs) exprs_str = ",\n ".join( [v + " AS " + k[0] + '__' + k[1] for k, v in exprs.items()]) where_clause = [ "{0} = '{1}'".format(k, v) for k, v in self.partition.items() ] where_clause = " AND\n ".join(where_clause) sql = """ SELECT {exprs_str} FROM {self.table} WHERE {where_clause}; """.format(**locals()) hook = PrestoHook(presto_conn_id=self.presto_conn_id) logging.info('Executing SQL check: ' + sql) row = hook.get_first(hql=sql) logging.info("Record: " + str(row)) if not row: raise AirflowException("The query returned None") part_json = json.dumps(self.partition, sort_keys=True) logging.info("Deleting rows from previous runs if they exist") mysql = MySqlHook(self.mysql_conn_id) sql = """ SELECT 1 FROM hive_stats WHERE table_name='{self.table}' AND partition_repr='{part_json}' AND dttm='{self.dttm}' LIMIT 1; """.format(**locals()) if mysql.get_records(sql): sql = """ DELETE FROM hive_stats WHERE table_name='{self.table}' AND partition_repr='{part_json}' AND dttm='{self.dttm}'; """.format(**locals()) mysql.run(sql) logging.info("Pivoting and loading cells into the Airflow db") rows = [(self.ds, self.dttm, self.table, part_json) + (r[0][0], r[0][1], r[1]) for r in zip(exprs, row)] mysql.insert_rows(table='hive_stats', rows=rows, target_fields=[ 'ds', 'dttm', 'table_name', 'partition_repr', 'col', 'metric', 'value', ])
def execute(self, context): logging.info("Executing: " + self.sql) hook = MySqlHook(mysql_conn_id=self.mysql_conn_id) hook.run(self.sql)
def execute(self, context): logging.info('Executing: ' + str(self.sql)) hook = MySqlHook(mysql_conn_id=self.mysql_conn_id) hook.run(self.sql, autocommit=self.autocommit, parameters=self.parameters)
def execute(self, context): logging.info('Executing: ' + self.sql) hook = MySqlHook(mysql_conn_id=self.mysql_conn_id) hook.run(self.sql)