Ejemplo n.º 1
0
    def test_check_operators(self):

        conn_id = "sqlite_default"

        captain_hook = BaseHook.get_hook(conn_id=conn_id)  # quite funny :D
        captain_hook.run("CREATE TABLE operator_test_table (a, b)")
        captain_hook.run("insert into operator_test_table values (1,2)")

        op = CheckOperator(
            task_id='check',
            sql="select count(*) from operator_test_table",
            conn_id=conn_id,
            dag=self.dag)
        op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)

        op = ValueCheckOperator(
            task_id='value_check',
            pass_value=95,
            tolerance=0.1,
            conn_id=conn_id,
            sql="SELECT 100",
            dag=self.dag)
        op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)

        captain_hook.run("drop table operator_test_table")
    def _construct_operator(self, sql, pass_value, tolerance=None):
        dag = DAG('test_dag', start_date=datetime(2017, 1, 1))

        return ValueCheckOperator(dag=dag,
                                  task_id=self.task_id,
                                  conn_id=self.conn_id,
                                  sql=sql,
                                  pass_value=pass_value,
                                  tolerance=tolerance)
Ejemplo n.º 3
0
        s3_bucket='{{ params.s3_bucket }}',
        s3_key='temperature',
        copy_options=['COMPUPDATE OFF', 'STATUPDATE OFF', 'TRUNCATECOLUMNS']
    )

    # Runs a basic data quality check to ensure data is inserted
    check_tweets = CheckOperator(
        task_id='quality_check_staging_tweets_table',
        sql='SELECT COUNT(*) FROM public.staging_tweets',
        conn_id='{{ redshift_conn_id }}'
    )

    # Runs a basic data quality check to ensure data is inserted
    check_happiness = ValueCheckOperator(
        task_id='quality_check_staging_happiness_table',
        sql='SELECT COUNT(*) FROM public.staging_happiness',
        pass_value=154,
        conn_id='{{ redshift_conn_id }}'
    )

    # Runs a basic data quality check to ensure data is inserted
    check_temperature = ValueCheckOperator(
        task_id='quality_check_staging_temperature_table',
        sql='SELECT COUNT(*) FROM public.staging_temperature',
        pass_value=8599212,
        conn_id='{{ redshift_conn_id }}'
    )

    # Load user table from staging tables
    load_users_table = PostgresOperator(
        task_id='load_users_table',
        sql='users_insert.sql'
Ejemplo n.º 4
0
        task_id='stage_temperature_to_redshift',
        schema='{{ params.redshift_schema }}',
        table='staging_temperature',
        s3_bucket='{{ params.s3_bucket }}',
        s3_key='temperature'
    )

    check_tweets = CheckOperator(
        task_id='check_staging_tweets_table',
        sql='SELECT count(*) FROM public.staging_tweets',
        conn_id='{{ redshift_conn_id }}'
    )

    check_happiness = ValueCheckOperator(
        task_id='check_staging_happiness_table',
        sql='SELECT count(*) FROM public.staging_happiness',
        pass_value=155,
        conn_id='{{ redshift_conn_id }}'
    )

    check_temperature = ValueCheckOperator(
        task_id='check_staging_temperature_table',
        sql='SELECT count(*) FROM public.staging_temperature',
        pass_value=8235082,
        conn_id='{{ redshift_conn_id }}'
    )

    load_users_table = PostgresOperator(
        task_id='load_users_table',
        sql='users_insert.sql'
    )
Ejemplo n.º 5
0
        task_id='check_interaction_intervals',
        table='interaction',
        metrics_thresholds={
            'COUNT(*)': 1.5,
            'MAX(amount)': 1.3,
            'MIN(amount)': 1.4,
            'SUM(amount)': 1.3
        },
        date_filter_column='interaction_date',
        days_back=5,
        conn_id=CONN_ID)

    check_interaction_amount_value = ValueCheckOperator(
        task_id='check_interaction_amount_value',
        sql=
        "SELECT COUNT(1) FROM interaction WHERE interaction_date=CURRENT_DATE - 1",
        pass_value=200,
        tolerance=0.2,
        conn_id=CONN_ID)

    check_unique_products_value = ValueCheckOperator(
        task_id='check_unique_products_value',
        sql=
        "SELECT COUNT(DISTINCT(product_id)) FROM interaction WHERE interaction_date=CURRENT_DATE - 1",
        pass_value=150,
        tolerance=0.3,
        conn_id=CONN_ID)

    check_replaced_amount_value = ValueCheckOperator(
        task_id='check_replaced_amount_value',
        sql="""