Ejemplo n.º 1
1
    def execute(self, context):
        hive = HiveCliHook(hive_cli_conn_id=self.hive_cli_conn_id)
        mysql = MySqlHook(mysql_conn_id=self.mysql_conn_id)

        logging.info("Dumping MySQL query results to local file")
        conn = mysql.get_conn()
        cursor = conn.cursor()
        cursor.execute(self.sql)
        with NamedTemporaryFile("w") as f:
            csv_writer = csv.writer(f, delimiter=self.delimiter)
            field_dict = OrderedDict()
            for field in cursor.description:
                field_dict[field[0]] = self.type_map(field[1])
            csv_writer.writerows(cursor)
            f.flush()
            cursor.close()
            conn.close()
            logging.info("Loading file into Hive")
            hive.load_file(
                f.name,
                self.hive_table,
                field_dict=field_dict,
                create=self.create,
                partition=self.partition,
                delimiter=self.delimiter,
                recreate=self.recreate)
Ejemplo n.º 2
1
 def objects(self):
     where_clause = ''
     if DB_WHITELIST:
         dbs = ",".join(["'" + db + "'" for db in DB_WHITELIST])
         where_clause = "AND b.name IN ({})".format(dbs)
     if DB_BLACKLIST:
         dbs = ",".join(["'" + db + "'" for db in DB_BLACKLIST])
         where_clause = "AND b.name NOT IN ({})".format(dbs)
     sql = """
     SELECT CONCAT(b.NAME, '.', a.TBL_NAME), TBL_TYPE
     FROM TBLS a
     JOIN DBS b ON a.DB_ID = b.DB_ID
     WHERE
         a.TBL_NAME NOT LIKE '%tmp%' AND
         a.TBL_NAME NOT LIKE '%temp%' AND
         b.NAME NOT LIKE '%tmp%' AND
         b.NAME NOT LIKE '%temp%'
     {where_clause}
     LIMIT {LIMIT};
     """.format(where_clause=where_clause, LIMIT=TABLE_SELECTOR_LIMIT)
     h = MySqlHook(METASTORE_MYSQL_CONN_ID)
     d = [
             {'id': row[0], 'text': row[0]}
         for row in h.get_records(sql)]
     return json.dumps(d)
Ejemplo n.º 3
1
 def partitions(self):
     schema, table = request.args.get("table").split('.')
     sql = """
     SELECT
         a.PART_NAME,
         a.CREATE_TIME,
         c.LOCATION,
         c.IS_COMPRESSED,
         c.INPUT_FORMAT,
         c.OUTPUT_FORMAT
     FROM PARTITIONS a
     JOIN TBLS b ON a.TBL_ID = b.TBL_ID
     JOIN DBS d ON b.DB_ID = d.DB_ID
     JOIN SDS c ON a.SD_ID = c.SD_ID
     WHERE
         b.TBL_NAME like '{table}' AND
         d.NAME like '{schema}'
     ORDER BY PART_NAME DESC
     """.format(**locals())
     h = MySqlHook(METASTORE_MYSQL_CONN_ID)
     df = h.get_pandas_df(sql)
     return df.to_html(
         classes="table table-striped table-bordered table-hover",
         index=False,
         na_rep='',)
Ejemplo n.º 4
0
 def partitions(self):
     schema, table = request.args.get("table").split('.')
     sql = """
     SELECT
         a.PART_NAME,
         a.CREATE_TIME,
         c.LOCATION,
         c.IS_COMPRESSED,
         c.INPUT_FORMAT,
         c.OUTPUT_FORMAT
     FROM PARTITIONS a
     JOIN TBLS b ON a.TBL_ID = b.TBL_ID
     JOIN DBS d ON b.DB_ID = d.DB_ID
     JOIN SDS c ON a.SD_ID = c.SD_ID
     WHERE
         b.TBL_NAME like '{table}' AND
         d.NAME like '{schema}'
     ORDER BY PART_NAME DESC
     """.format(**locals())
     h = MySqlHook(METASTORE_MYSQL_CONN_ID)
     df = h.get_pandas_df(sql)
     return df.to_html(
         classes="table table-striped table-bordered table-hover",
         index=False,
         na_rep='',
     )
Ejemplo n.º 5
0
    def execute(self, context):
        hive = HiveServer2Hook(hiveserver2_conn_id=self.hiveserver2_conn_id)
        logging.info("Extracting data from Hive")
        logging.info(self.sql)

        if self.bulk_load:
            tmpfile = NamedTemporaryFile()
            hive.to_csv(self.sql,
                        tmpfile.name,
                        delimiter='\t',
                        lineterminator='\n',
                        output_header=False)
        else:
            results = hive.get_records(self.sql)

        mysql = MySqlHook(mysql_conn_id=self.mysql_conn_id)
        if self.mysql_preoperator:
            logging.info("Running MySQL preoperator")
            mysql.run(self.mysql_preoperator)

        logging.info("Inserting rows into MySQL")

        if self.bulk_load:
            mysql.bulk_load(table=self.mysql_table, tmp_file=tmpfile.name)
            tmpfile.close()
        else:
            mysql.insert_rows(table=self.mysql_table, rows=results)

        if self.mysql_postoperator:
            logging.info("Running MySQL postoperator")
            mysql.run(self.mysql_postoperator)

        logging.info("Done.")
Ejemplo n.º 6
0
def insert_or_delete_task_state(*args, **kargs):
    ti = kargs["ti"]  ##current task instance
    insert_sql = kargs["insert_sql"]  # insert upstream state sql
    delete_sql = kargs["delete_sql"]  # delete upstream state sql
    upstream_task_id = kargs["up_id"]  # upstream task id
    xcom_pull_key = kargs["xcom_key"]  # key for xcom_pull
    mysql_conn_id = kargs["mysql_conn_id"]  # conn id for database which stores state table

    actual_sql = ""
    ##method 1 of get the upstream task's state
    # up_state = ti.xcom_pull(key=xcom_pull_key, task_ids = upstream_task_id)

    upstream_task_list = kargs["task"].upstream_list
    logging.info("%s upstream task list is %s" % (kargs["task"], upstream_task_list))
    upstream_task_state = (
        upstream_task_list[0]
        .get_task_instances(
            session=settings.Session(), start_date=kargs["execution_date"], end_date=kargs["execution_date"]
        )[0]
        .state
    )
    logging.info("%s upstream task(%s) state is %s" % (kargs["task"], upstream_task_list[0], upstream_task_state))
    if upstream_task_state == "success":
        actual_sql = insert_sql
    elif upstream_task_state == "failed":
        actual_sql = delete_sql
    else:
        actual_sql = "show databases;"  ## if not success or failed, do something effectiveless
    logging.info("upstream state is %s, actual update status sql is %s" % (str(upstream_task_state), str(actual_sql)))
    ##start to execute sql
    logging.info("Executing: %s" % (str(actual_sql)))
    hook = MySqlHook(mysql_conn_id=mysql_conn_id)
    hook.run(actual_sql)
Ejemplo n.º 7
0
    def execute(self, context):
        hive = HiveCliHook(hive_cli_conn_id=self.hive_cli_conn_id)
        mysql = MySqlHook(mysql_conn_id=self.mysql_conn_id)

        logging.info("Dumping MySQL query results to local file")
        conn = mysql.get_conn()
        cursor = conn.cursor()
        cursor.execute(self.sql)
        with NamedTemporaryFile("wb") as f:
            csv_writer = csv.writer(f,
                                    delimiter=self.delimiter,
                                    encoding="utf-8")
            field_dict = OrderedDict()
            for field in cursor.description:
                field_dict[field[0]] = self.type_map(field[1])
            csv_writer.writerows(cursor)
            f.flush()
            cursor.close()
            conn.close()
            logging.info("Loading file into Hive")
            hive.load_file(f.name,
                           self.hive_table,
                           field_dict=field_dict,
                           create=self.create,
                           partition=self.partition,
                           delimiter=self.delimiter,
                           recreate=self.recreate)
Ejemplo n.º 8
0
 def execute(self, context):
     logging.info('Executing: ' + str(self.sql))
     hook = MySqlHook(mysql_conn_id=self.mysql_conn_id)
     hook.run(
         self.sql,
         autocommit=self.autocommit,
         parameters=self.parameters)
Ejemplo n.º 9
0
    def execute(self, context):
        hive = HiveServer2Hook(hiveserver2_conn_id=self.hive_cli_conn_id)
        logging.info("Extracting data from Hive")
        results = hive.get_records(self.sql)

        mysql = MySqlHook(mysql_conn_id=self.mysql_conn_id)
        logging.info("Inserting rows into MySQL")
        mysql.insert_rows(table=self.mysql_table, rows=results)
Ejemplo n.º 10
0
    def execute(self, context):
        hive = HiveServer2Hook(hiveserver2_conn_id=self.hive_cli_conn_id)
        logging.info("Extracting data from Hive")
        results = hive.get_records(self.sql)

        mysql = MySqlHook(mysql_conn_id=self.mysql_conn_id)
        logging.info("Inserting rows into MySQL")
        mysql.insert_rows(table=self.mysql_table, rows=results)
Ejemplo n.º 11
0
 def _query_mysql(self):
     """
     Queries mysql and returns a cursor to the results.
     """
     mysql = MySqlHook(mysql_conn_id=self.mysql_conn_id)
     conn = mysql.get_conn()
     cursor = conn.cursor()
     cursor.execute(self.sql)
     return cursor
Ejemplo n.º 12
0
 def _query_mysql(self):
     """
     Queries mysql and returns a cursor to the results.
     """
     mysql = MySqlHook(mysql_conn_id=self.mysql_conn_id)
     conn = mysql.get_conn()
     cursor = conn.cursor()
     cursor.execute(self.sql)
     return cursor
Ejemplo n.º 13
0
    def __init__(self, sql, mysql_conn_id, *args, **kwargs):
        """
        Parameters:
        mysql_conn_id: reference to a specific mysql database
        sql: the sql code you to be executed
        """
        super(MySqlOperator, self).__init__(*args, **kwargs)

        self.hook = MySqlHook(mysql_conn_id=mysql_conn_id)
        self.sql = sql
Ejemplo n.º 14
0
    def execute(self, context):
        hive = HiveServer2Hook(hiveserver2_conn_id=self.hiveserver2_conn_id)
        logging.info("Extracting data from Hive")
        logging.info(self.sql)

        if self.bulk_load:
            tmpfile = NamedTemporaryFile()
            hive.to_csv(self.sql, tmpfile.name, delimiter='\t',
                lineterminator='\n', output_header=False)
        else:
            results = hive.get_records(self.sql)

        mysql = MySqlHook(mysql_conn_id=self.mysql_conn_id)
        if self.mysql_preoperator:
            logging.info("Running MySQL preoperator")
            mysql.run(self.mysql_preoperator)

        logging.info("Inserting rows into MySQL")

        if self.bulk_load:
            mysql.bulk_load(table=self.mysql_table, tmp_file=tmpfile.name)
            tmpfile.close()
        else:
            mysql.insert_rows(table=self.mysql_table, rows=results)

        if self.mysql_postoperator:
            logging.info("Running MySQL postoperator")
            mysql.run(self.mysql_postoperator)

        logging.info("Done.")
Ejemplo n.º 15
0
    def execute(self, context):
        hive = HiveServer2Hook(hiveserver2_conn_id=self.hiveserver2_conn_id)
        logging.info("Extracting data from Hive")
        logging.info(self.sql)
        results = hive.get_records(self.sql)

        mysql = MySqlHook(mysql_conn_id=self.mysql_conn_id)
        if self.mysql_preoperator:
            logging.info("Running MySQL preoperator")
            logging.info(self.mysql_preoperator)
            mysql.run(self.mysql_preoperator)

        logging.info("Inserting rows into MySQL")
        mysql.insert_rows(table=self.mysql_table, rows=results)
Ejemplo n.º 16
0
    def execute(self, context):
        presto = PrestoHook(presto_conn_id=self.presto_conn_id)
        logging.info("Extracting data from Presto")
        logging.info(self.sql)
        results = presto.get_records(self.sql)

        mysql = MySqlHook(mysql_conn_id=self.mysql_conn_id)
        if self.mysql_preoperator:
            logging.info("Running MySQL preoperator")
            logging.info(self.mysql_preoperator)
            mysql.run(self.mysql_preoperator)

        logging.info("Inserting rows into MySQL")
        mysql.insert_rows(table=self.mysql_table, rows=results)
Ejemplo n.º 17
0
    def post_execute(self, context):
        try:
            log_filepath = context["task_instance"].log_filepath.strip('.log')

            # parse the log file into a dict
            parsed = HiveLog.parselog(log_filepath)

            # captures the name of a parent dag in a subdag routine
            dag_ids = context['dag'].dag_id.split('.')
            if len(dag_ids) > 1:
                parsed['parent_dag_id'] = dag_ids[0]
                parsed['dag_id'] = dag_ids[1:].join('.')
            else:
                parsed['parent_dag_id'] = context['dag'].dag_id
                parsed['dag_id'] = parsed['parent_dag_id']

            # Capture extra information from context
            parsed['task_id'] = context['task'].task_id
            parsed['input_date'] = context['ds']
            parsed['owner'] = context['task'].owner
            # Create the table if it doesnt exist
            sql_hook = MySqlHook("airflow_db")
            sql_hook.run(table_creation_string())

            # Create sql insertion strings and insert
            strings = table_insertion_strings(parsed)
            logging.info('Inserting honypot log metadata into database')
            for add_data_to_table_string in strings:
                sql_hook.run(add_data_to_table_string)
            logging.info('Completely log data insertion')
            sql_hook.run('COMMIT;')

        except Exception as e:
            logging.error("Honeypot post executor failed")
            logging.exception(e)
Ejemplo n.º 18
0
def simple_select_mysql(ds, **kwargs):
    mysql_hook = MySqlHook(mysql_conn_id="soletrade_localhost")
    users_trend = """SELECT 
                DISTINCT(email),
                sum(spent),
                count(transactions()
            FROM
            orders AS o
            GROUP BY 1
            ;"""
    with open('temp/output.csv', 'w') as f:
        wr = csv.writer(f, delimiter=',')
        csvRow = ["user", "spent", "transactions"]
        wr.writerow(csvRow)
        for user, spent, transactions in mysql_hook.get_records(users_trend):
            wr.writerow([user, spent, transactions])
Ejemplo n.º 19
0
 def get_sql_hook(self, sql_conn_id):
     if 'sqlite' in sql_conn_id:
         from airflow.hooks import SqliteHook
         return SqliteHook(sql_conn_id)
     else:
         from airflow.hooks import MySqlHook
         return MySqlHook(sql_conn_id)
Ejemplo n.º 20
0
	def execute(self,context):
		hook = MySqlHook(mysql_conn_id=self.mysql_conn_id)
		if self.data=="" and self.redis_key != "" and self.redis_conn_id != "":
			
			self.data = eval(self.redis_hook.get(self.redis_key))
			#print type(self.data),len(self.data),self.data[0],type(self.data[0])
			if len(self.data) <=0:
				logging.error("Not inserting data as the provided key is empty")
				return 1
		try:
		    #print "Started Exec"
		    conn = hook.get_conn()
		    cursor = conn.cursor()
		    cursor.executemany(self.sql, self.data)
		except Exception,e:
		    logging.info("Exception")
		    traceback.print_exc()
Ejemplo n.º 21
0
 def index(self):
     sql = """
     SELECT
         a.name as db, db_location_uri as location,
         count(1) as object_count, a.desc as description
     FROM DBS a
     JOIN TBLS b ON a.DB_ID = b.DB_ID
     GROUP BY a.name, db_location_uri, a.desc
     """.format(
         **locals()
     )
     h = MySqlHook(METASTORE_MYSQL_CONN_ID)
     df = h.get_pandas_df(sql)
     df.db = '<a href="/admin/metastorebrowserview/db/?db=' + df.db + '">' + df.db + "</a>"
     table = df.to_html(
         classes="table table-striped table-bordered table-hover", index=False, escape=False, na_rep=""
     )
     return self.render("metastore_browser/dbs.html", table=table)
Ejemplo n.º 22
0
 def index(self):
     sql = """
     SELECT
         a.name as db, db_location_uri as location,
         count(1) as object_count, a.desc as description
     FROM DBS a
     JOIN TBLS b ON a.DB_ID = b.DB_ID
     GROUP BY a.name, db_location_uri, a.desc
     """.format(**locals())
     h = MySqlHook(METASTORE_MYSQL_CONN_ID)
     df = h.get_pandas_df(sql)
     df.db = ('<a href="/admin/metastorebrowserview/db/?db=' + df.db +
              '">' + df.db + '</a>')
     table = df.to_html(
         classes="table table-striped table-bordered table-hover",
         index=False,
         escape=False,
         na_rep='',
     )
     return self.render("metastore_browser/dbs.html", table=table)
Ejemplo n.º 23
0
 def objects(self):
     where_clause = ''
     if DB_WHITELIST:
         dbs = ",".join(["'" + db + "'" for db in DB_WHITELIST])
         where_clause = "AND b.name IN ({})".format(dbs)
     if DB_BLACKLIST:
         dbs = ",".join(["'" + db + "'" for db in DB_BLACKLIST])
         where_clause = "AND b.name NOT IN ({})".format(dbs)
     sql = """
     SELECT CONCAT(b.NAME, '.', a.TBL_NAME), TBL_TYPE
     FROM TBLS a
     JOIN DBS b ON a.DB_ID = b.DB_ID
     WHERE
         a.TBL_NAME NOT LIKE '%tmp%' AND
         a.TBL_NAME NOT LIKE '%temp%' AND
         b.NAME NOT LIKE '%tmp%' AND
         b.NAME NOT LIKE '%temp%'
     {where_clause}
     LIMIT {LIMIT};
     """.format(where_clause=where_clause, LIMIT=TABLE_SELECTOR_LIMIT)
     h = MySqlHook(METASTORE_MYSQL_CONN_ID)
     d = [{'id': row[0], 'text': row[0]} for row in h.get_records(sql)]
     return json.dumps(d)
Ejemplo n.º 24
0
class MySqlOperator(BaseOperator):
    """
    Executes sql code in a specific mysql database.
    """

    __mapper_args__ = {'polymorphic_identity': 'MySqlOperator'}
    template_fields = ('sql', )
    template_ext = ('.sql', )

    @apply_defaults
    def __init__(self, sql, mysql_conn_id, *args, **kwargs):
        """
        Parameters:
        mysql_conn_id: reference to a specific mysql database
        sql: the sql code you to be executed
        """
        super(MySqlOperator, self).__init__(*args, **kwargs)

        self.hook = MySqlHook(mysql_conn_id=mysql_conn_id)
        self.sql = sql

    def execute(self, execution_date):
        logging.info('Executing: ' + self.sql)
        self.hook.run(self.sql)
Ejemplo n.º 25
0
    def execute(self, context):
        hive = HiveServer2Hook(hiveserver2_conn_id=self.hiveserver2_conn_id)
        logging.info("Extracting data from Hive")
        logging.info(self.sql)
        results = hive.get_records(self.sql)

        mysql = MySqlHook(mysql_conn_id=self.mysql_conn_id)
        if self.mysql_preoperator:
            logging.info("Running MySQL preoperator")
            logging.info(self.mysql_preoperator)
            mysql.run(self.mysql_preoperator)

        logging.info("Inserting rows into MySQL")
        mysql.insert_rows(table=self.mysql_table, rows=results)

        if self.mysql_postoperator:
            logging.info("Running MySQL postoperator")
            logging.info(self.mysql_postoperator)
            mysql.run(self.mysql_postoperator)
Ejemplo n.º 26
0
    def execute(self, context):
        presto = PrestoHook(presto_conn_id=self.presto_conn_id)
        logging.info("Extracting data from Presto")
        logging.info(self.sql)
        results = presto.get_records(self.sql)

        mysql = MySqlHook(mysql_conn_id=self.mysql_conn_id)
        if self.mysql_preoperator:
            logging.info("Running MySQL preoperator")
            logging.info(self.mysql_preoperator)
            mysql.run(self.mysql_preoperator)

        logging.info("Inserting rows into MySQL")
        mysql.insert_rows(table=self.mysql_table, rows=results)
Ejemplo n.º 27
0
    def execute(self, context=None):
        metastore = HiveMetastoreHook(metastore_conn_id=self.metastore_conn_id)
        table = metastore.get_table(table_name=self.table)
        field_types = {col.name: col.type for col in table.sd.cols}

        exprs = {
            ('', 'count'): 'COUNT(*)'
        }
        for col, col_type in field_types.items():
            d = {}
            if self.assignment_func:
                d = self.assignment_func(col, col_type)
                if d is None:
                    d = self.get_default_exprs(col, col_type)
            else:
                d = self.get_default_exprs(col, col_type)
            exprs.update(d)
        exprs.update(self.extra_exprs)
        exprs = OrderedDict(exprs)
        exprs_str = ",\n        ".join([
            v + " AS " + k[0] + '__' + k[1]
            for k, v in exprs.items()])

        where_clause = [
            "{0} = '{1}'".format(k, v) for k, v in self.partition.items()]
        where_clause = " AND\n        ".join(where_clause)
        sql = """
        SELECT
            {exprs_str}
        FROM {self.table}
        WHERE
            {where_clause};
        """.format(**locals())

        hook = PrestoHook(presto_conn_id=self.presto_conn_id)
        logging.info('Executing SQL check: ' + sql)
        row = hook.get_first(hql=sql)
        logging.info("Record: " + str(row))
        if not row:
            raise Exception("The query returned None")

        part_json = json.dumps(self.partition, sort_keys=True)

        logging.info("Deleting rows from previous runs if they exist")
        mysql = MySqlHook(self.mysql_conn_id)
        sql = """
        SELECT 1 FROM hive_stats
        WHERE
            table_name='{self.table}' AND
            partition_repr='{part_json}' AND
            dttm='{self.dttm}'
        LIMIT 1;
        """.format(**locals())
        if mysql.get_records(sql):
            sql = """
            DELETE FROM hive_stats
            WHERE
                table_name='{self.table}' AND
                partition_repr='{part_json}' AND
                dttm='{self.dttm}';
            """.format(**locals())
            mysql.run(sql)

        logging.info("Pivoting and loading cells into the Airflow db")
        rows = [
            (self.ds, self.dttm, self.table, part_json) +
            (r[0][0], r[0][1], r[1])
            for r in zip(exprs, row)]
        mysql.insert_rows(
            table='hive_stats',
            rows=rows,
            target_fields=[
                'ds',
                'dttm',
                'table_name',
                'partition_repr',
                'col',
                'metric',
                'value',
            ]
        )
Ejemplo n.º 28
0
 def execute(self, context):
     logging.info('Executing: ' + self.sql)
     hook = MySqlHook(mysql_conn_id=self.mysql_conn_id)
     hook.run(self.sql)
Ejemplo n.º 29
0
 def execute(self, context):
     logging.info("Executing: " + self.sql)
     hook = MySqlHook(mysql_conn_id=self.mysql_conn_id)
     hook.run(self.sql)
Ejemplo n.º 30
0
 def execute(self, context):
     logging.info('Executing: ' + str(self.sql))
     hook = MySqlHook(mysql_conn_id=self.mysql_conn_id)
     hook.run(self.sql,
              autocommit=self.autocommit,
              parameters=self.parameters)
Ejemplo n.º 31
0
    def execute(self, context=None):
        metastore = HiveMetastoreHook(metastore_conn_id=self.metastore_conn_id)
        table = metastore.get_table(table_name=self.table)
        field_types = {col.name: col.type for col in table.sd.cols}

        exprs = {('', 'count'): 'COUNT(*)'}
        for col, col_type in field_types.items():
            d = {}
            if self.assignment_func:
                d = self.assignment_func(col, col_type)
                if d is None:
                    d = self.get_default_exprs(col, col_type)
            else:
                d = self.get_default_exprs(col, col_type)
            exprs.update(d)
        exprs.update(self.extra_exprs)
        exprs = OrderedDict(exprs)
        exprs_str = ",\n        ".join(
            [v + " AS " + k[0] + '__' + k[1] for k, v in exprs.items()])

        where_clause = [
            "{0} = '{1}'".format(k, v) for k, v in self.partition.items()
        ]
        where_clause = " AND\n        ".join(where_clause)
        sql = """
        SELECT
            {exprs_str}
        FROM {self.table}
        WHERE
            {where_clause};
        """.format(**locals())

        hook = PrestoHook(presto_conn_id=self.presto_conn_id)
        logging.info('Executing SQL check: ' + sql)
        row = hook.get_first(hql=sql)
        logging.info("Record: " + str(row))
        if not row:
            raise AirflowException("The query returned None")

        part_json = json.dumps(self.partition, sort_keys=True)

        logging.info("Deleting rows from previous runs if they exist")
        mysql = MySqlHook(self.mysql_conn_id)
        sql = """
        SELECT 1 FROM hive_stats
        WHERE
            table_name='{self.table}' AND
            partition_repr='{part_json}' AND
            dttm='{self.dttm}'
        LIMIT 1;
        """.format(**locals())
        if mysql.get_records(sql):
            sql = """
            DELETE FROM hive_stats
            WHERE
                table_name='{self.table}' AND
                partition_repr='{part_json}' AND
                dttm='{self.dttm}';
            """.format(**locals())
            mysql.run(sql)

        logging.info("Pivoting and loading cells into the Airflow db")
        rows = [(self.ds, self.dttm, self.table, part_json) +
                (r[0][0], r[0][1], r[1]) for r in zip(exprs, row)]
        mysql.insert_rows(table='hive_stats',
                          rows=rows,
                          target_fields=[
                              'ds',
                              'dttm',
                              'table_name',
                              'partition_repr',
                              'col',
                              'metric',
                              'value',
                          ])