Ejemplo n.º 1
0
    def test_execute_delete_previous_runs_rows(self, mock_hive_metastore_hook,
                                               mock_presto_hook,
                                               mock_mysql_hook,
                                               mock_json_dumps):
        mock_hive_metastore_hook.return_value.get_table.return_value.sd.cols = [
            fake_col
        ]
        mock_mysql_hook.return_value.get_records.return_value = True

        hive_stats_collection_operator = HiveStatsCollectionOperator(
            **self.kwargs)
        hive_stats_collection_operator.execute(context={})

        sql = """
            DELETE FROM hive_stats
            WHERE
                table_name='{}' AND
                partition_repr='{}' AND
                dttm='{}';
            """.format(
            hive_stats_collection_operator.table,
            mock_json_dumps.return_value,
            hive_stats_collection_operator.dttm,
        )
        mock_mysql_hook.return_value.run.assert_called_once_with(sql)
Ejemplo n.º 2
0
    def test_execute(self, mock_hive_metastore_hook, mock_presto_hook,
                     mock_mysql_hook, mock_json_dumps):
        mock_hive_metastore_hook.return_value.get_table.return_value.sd.cols = [
            fake_col
        ]
        mock_mysql_hook.return_value.get_records.return_value = False

        hive_stats_collection_operator = HiveStatsCollectionOperator(
            **self.kwargs)
        hive_stats_collection_operator.execute(context={})

        mock_hive_metastore_hook.assert_called_once_with(
            metastore_conn_id=hive_stats_collection_operator.metastore_conn_id)
        mock_hive_metastore_hook.return_value.get_table.assert_called_once_with(
            table_name=hive_stats_collection_operator.table)
        mock_presto_hook.assert_called_once_with(
            presto_conn_id=hive_stats_collection_operator.presto_conn_id)
        mock_mysql_hook.assert_called_once_with(
            hive_stats_collection_operator.mysql_conn_id)
        mock_json_dumps.assert_called_once_with(
            hive_stats_collection_operator.partition, sort_keys=True)
        field_types = {
            col.name: col.type
            for col in mock_hive_metastore_hook.return_value.get_table.
            return_value.sd.cols
        }
        exprs = {('', 'count'): 'COUNT(*)'}
        for col, col_type in list(field_types.items()):
            exprs.update(
                hive_stats_collection_operator.get_default_exprs(
                    col, col_type))
        exprs = OrderedDict(exprs)
        rows = [
            (hive_stats_collection_operator.ds,
             hive_stats_collection_operator.dttm,
             hive_stats_collection_operator.table,
             mock_json_dumps.return_value) + (r[0][0], r[0][1], r[1])
            for r in zip(exprs,
                         mock_presto_hook.return_value.get_first.return_value)
        ]
        mock_mysql_hook.return_value.insert_rows.assert_called_once_with(
            table='hive_stats',
            rows=rows,
            target_fields=[
                'ds',
                'dttm',
                'table_name',
                'partition_repr',
                'col',
                'metric',
                'value',
            ])
Ejemplo n.º 3
0
    def test_execute_with_assignment_func(self, mock_hive_metastore_hook,
                                          mock_presto_hook, mock_mysql_hook,
                                          mock_json_dumps):
        def assignment_func(col, _):
            return {(col, 'test'): 'TEST({})'.format(col)}

        self.kwargs.update(dict(assignment_func=assignment_func))
        mock_hive_metastore_hook.return_value.get_table.return_value.sd.cols = [
            fake_col
        ]
        mock_mysql_hook.return_value.get_records.return_value = False

        hive_stats_collection_operator = HiveStatsCollectionOperator(
            **self.kwargs)
        hive_stats_collection_operator.execute(context={})

        field_types = {
            col.name: col.type
            for col in mock_hive_metastore_hook.return_value.get_table.
            return_value.sd.cols
        }
        exprs = {('', 'count'): 'COUNT(*)'}
        for col, col_type in list(field_types.items()):
            exprs.update(
                hive_stats_collection_operator.assignment_func(col, col_type))
        exprs = OrderedDict(exprs)
        rows = [
            (hive_stats_collection_operator.ds,
             hive_stats_collection_operator.dttm,
             hive_stats_collection_operator.table,
             mock_json_dumps.return_value) + (r[0][0], r[0][1], r[1])
            for r in zip(exprs,
                         mock_presto_hook.return_value.get_first.return_value)
        ]
        mock_mysql_hook.return_value.insert_rows.assert_called_once_with(
            table='hive_stats',
            rows=rows,
            target_fields=[
                'ds',
                'dttm',
                'table_name',
                'partition_repr',
                'col',
                'metric',
                'value',
            ])