def test_check_operators(self): conn_id = "sqlite_default" captain_hook = BaseHook.get_hook(conn_id=conn_id) # quite funny :D captain_hook.run("CREATE TABLE operator_test_table (a, b)") captain_hook.run("insert into operator_test_table values (1,2)") op = CheckOperator( task_id='check', sql="select count(*) from operator_test_table", conn_id=conn_id, dag=self.dag) op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) op = ValueCheckOperator( task_id='value_check', pass_value=95, tolerance=0.1, conn_id=conn_id, sql="SELECT 100", dag=self.dag) op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True) captain_hook.run("drop table operator_test_table")
def _construct_operator(self, sql, pass_value, tolerance=None): dag = DAG('test_dag', start_date=datetime(2017, 1, 1)) return ValueCheckOperator(dag=dag, task_id=self.task_id, conn_id=self.conn_id, sql=sql, pass_value=pass_value, tolerance=tolerance)
s3_bucket='{{ params.s3_bucket }}', s3_key='temperature', copy_options=['COMPUPDATE OFF', 'STATUPDATE OFF', 'TRUNCATECOLUMNS'] ) # Runs a basic data quality check to ensure data is inserted check_tweets = CheckOperator( task_id='quality_check_staging_tweets_table', sql='SELECT COUNT(*) FROM public.staging_tweets', conn_id='{{ redshift_conn_id }}' ) # Runs a basic data quality check to ensure data is inserted check_happiness = ValueCheckOperator( task_id='quality_check_staging_happiness_table', sql='SELECT COUNT(*) FROM public.staging_happiness', pass_value=154, conn_id='{{ redshift_conn_id }}' ) # Runs a basic data quality check to ensure data is inserted check_temperature = ValueCheckOperator( task_id='quality_check_staging_temperature_table', sql='SELECT COUNT(*) FROM public.staging_temperature', pass_value=8599212, conn_id='{{ redshift_conn_id }}' ) # Load user table from staging tables load_users_table = PostgresOperator( task_id='load_users_table', sql='users_insert.sql'
task_id='stage_temperature_to_redshift', schema='{{ params.redshift_schema }}', table='staging_temperature', s3_bucket='{{ params.s3_bucket }}', s3_key='temperature' ) check_tweets = CheckOperator( task_id='check_staging_tweets_table', sql='SELECT count(*) FROM public.staging_tweets', conn_id='{{ redshift_conn_id }}' ) check_happiness = ValueCheckOperator( task_id='check_staging_happiness_table', sql='SELECT count(*) FROM public.staging_happiness', pass_value=155, conn_id='{{ redshift_conn_id }}' ) check_temperature = ValueCheckOperator( task_id='check_staging_temperature_table', sql='SELECT count(*) FROM public.staging_temperature', pass_value=8235082, conn_id='{{ redshift_conn_id }}' ) load_users_table = PostgresOperator( task_id='load_users_table', sql='users_insert.sql' )
task_id='check_interaction_intervals', table='interaction', metrics_thresholds={ 'COUNT(*)': 1.5, 'MAX(amount)': 1.3, 'MIN(amount)': 1.4, 'SUM(amount)': 1.3 }, date_filter_column='interaction_date', days_back=5, conn_id=CONN_ID) check_interaction_amount_value = ValueCheckOperator( task_id='check_interaction_amount_value', sql= "SELECT COUNT(1) FROM interaction WHERE interaction_date=CURRENT_DATE - 1", pass_value=200, tolerance=0.2, conn_id=CONN_ID) check_unique_products_value = ValueCheckOperator( task_id='check_unique_products_value', sql= "SELECT COUNT(DISTINCT(product_id)) FROM interaction WHERE interaction_date=CURRENT_DATE - 1", pass_value=150, tolerance=0.3, conn_id=CONN_ID) check_replaced_amount_value = ValueCheckOperator( task_id='check_replaced_amount_value', sql="""