class TestDataTransfer(TestCase): def setUp(self): self.oltp_hook = PostgresHook('oltp') self.olap_hook = PostgresHook('olap') def test_validate_sales_pipeline(self): """Validate if Sales Pipeline DAG run correctly""" date = '2020-01-01' create_table('purchases', self.oltp_hook) insert_initial_data('purchases', self.oltp_hook) create_table('products', self.oltp_hook) insert_initial_data('products', self.oltp_hook) create_table('stg_purchases', self.olap_hook) create_table('stg_products', self.olap_hook) create_table('products_sales', self.olap_hook) execute_dag('products_sales_pipeline', date) stg_purchases_result = self.olap_hook.get_pandas_df( 'select * from stg_purchases') stg_purchases_expected = output_expected_as_df(f'stg_purchases_{date}') assert_frame_equal(stg_purchases_result, stg_purchases_expected) assert len(stg_purchases_result) == 3 stg_products_result = self.olap_hook.get_pandas_df( 'select * from stg_products') stg_products_expected = output_expected_as_df('stg_products') assert_frame_equal(stg_products_result, stg_products_expected) assert len(stg_products_result) == 5 product_sales_result = self.olap_hook.get_pandas_df( 'select * from products_sales') product_sales_expected = output_expected_as_df('products_sales') assert_frame_equal(product_sales_result, product_sales_expected) assert len(product_sales_result) == 3 agg_result = self.olap_hook.get_pandas_df( 'select * from agg_sales_category') agg_expected = output_expected_as_df('agg_sales_category') assert_frame_equal(agg_result, agg_expected) assert len(agg_result) == 2
def execute(self, context): self.log.info("Start Data Quality checks") redshift_conn = PostgresHook(postgres_conn_id=self.redshift_conn_id) self.log.info("Redshift Connection has been successfully established") test_pairs = zip(self.sql_queries, self.test_results) for query, test in test_pairs: self.log.debug(f"Run data quality query : {query}") result = redshift_conn.get_pandas_df(query) self.log.debug(f"Result from test query : {result}") if test(result): self.log.info("Data quality check passed.") else: self.log.info('Data quality check failed') raise AssertionError('Data quality check failed')