def test_dbt_run(self, mock_run_cli): operator = DbtRunOperator( task_id='run', dag=self.dag ) operator.execute(None) mock_run_cli.assert_called_once_with('run')
execution_date = '{{ yesterday_ds }}' now = datetime.datetime.now() execution_hour = datetime.datetime.now().replace(microsecond=0, second=0, minute=0) - timedelta(hours=1) dbt_tags = 'tag:daily' if now.hour == 1 else 'tag:hourly' default_args = { 'dir': '/checkout-dot-com' } with DAG(dag_id='dbt', default_args=default_args, schedule_interval='@hourly') as dag: dbt_run = DbtRunOperator( task_id = 'dbt_run', models = dbt_tags, vars = { 'execution_date': execution_date, 'execution_hour': execution_hour } retries = 1 ) dbt_test = DbtTestOperator( task_id='dbt_test', # Run will only fail if a test does, no need to retry. retries=0 ) dbt_run >> dbt_test
from airflow import DAG from airflow_dbt.operators.dbt_operator import (DbtSeedOperator, DbtSnapshotOperator, DbtRunOperator, DbtTestOperator) from airflow.utils.dates import days_ago default_args = { 'dir': '/home/klox-dev/.venv/bin/dbt', 'start_date': days_ago(0) } with DAG(dag_id='dbt_airflow', default_args=default_args, schedule_interval='@once') as dag: dbt_seed = DbtSeedOperator(task_id='dbt_seed', ) dbt_snapshot = DbtSnapshotOperator(task_id='dbt_snapshot', ) dbt_run = DbtRunOperator(task_id='dbt_run', ) dbt_test = DbtTestOperator( task_id='dbt_test', retries= 0, # Failing tests would fail the task, and we don't want Airflow to try again ) dbt_seed >> dbt_snapshot >> dbt_run >> dbt_test
'schema': 'example', 'table': 'taxi_zone_lookup', 'data_asset_name': 'taxi_zone_lookup' }, 'expectation_suite_name': 'custom_sql_query.warning' }], data_context_root_dir=GE_ROOT_DIR, dag=dag) dbt_run = DbtRunOperator(task_id='dbt_run', dir=DBT_PROJECT_DIR, profiles_dir=DBT_ROOT_DIR, target=DBT_TARGET, dag=dag) validate_transform = GreatExpectationsOperator( task_id='validate_transform', expectation_suite_name='taxi_zone_incremental.source', batch_kwargs={ 'datasource': 'spark-thrift-server', 'schema': 'example', 'table': 'taxi_zone_incremental', 'data_asset_name': 'taxi_zone_incremental' }, data_context_root_dir=GE_ROOT_DIR, dag=dag)
'datasource': 'challenge_src' }, dag=dag) sync_task = BashOperator( task_id='sync_validations', depends_on_past=False, bash_command=templated_command, params={ 'task_name': 'Done with validations', 'start_date': default_args['start_date'] }, dag=dag, ) dbt_task = DbtRunOperator(task_id='dbt', dag=dag) # dbt_task = DummyOperator(task_id='dbt', dag=dag) done_task = BashOperator( task_id='done', depends_on_past=False, bash_command=templated_command, params={ 'task_name': 'All done', 'start_date': default_args['start_date'] }, dag=dag, ) start_task >> [valid_prod_task, valid_cust_task, valid_ordr_task ] >> sync_task >> dbt_task >> done_task
'ACL': 'public-read' } } dbt_vars = { 'DBT_USER': Variable.get('DBT_USER'), 'DBT_PASSWORD': Variable.get('DBT_PASSWORD') } with DAG(dag_id='covid_19_bokeh_app_etl', default_args=default_args, schedule_interval="0 */3 * * *") as dag: dbt_seed = DbtSeedOperator(task_id='dbt_seed', vars=dbt_vars) dbt_run = DbtRunOperator(task_id='dbt_run', vars=dbt_vars) dbt_test = DbtTestOperator(task_id='dbt_test', vars=dbt_vars, retries=0) dbt_seed >> dbt_run >> dbt_test extract_jobs = { 'jhu_cases': jhu_cases_etl, 'jhu_deaths': jhu_deaths_etl, 'jhu_lookup': jhu_lookup_etl, 'jhu_us_cases': jhu_us_cases_etl, 'jhu_us_deaths': jhu_us_deaths_etl, 'local_uk_data': local_uk_data_etl, 'owid_global_vaccinations': owid_global_vaccinations_etl, 'bloomberg_global_vaccinations': bloomberg_global_vaccinations_etl }
default_args = json.loads(Variable.get('covid19')) default_args.update({"retry_delay": timedelta(minutes=default_args["retry_delay"])}) dbt_dir = os.environ["DBT_DIR"] dbt_profiles_dir = os.environ["DBT_PROFILES_DIR"] with DAG( 'covid19_dbt', default_args=default_args, description='Managing dbt data pipeline', schedule_interval='@daily') as dag: ingest_covid19_day_task = Covid19ToIngestions(task_id='ingest_covid19_day_to_dbt', dag=dag) dbt_run = DbtRunOperator( task_id='dbt_run', dir = dbt_dir, profiles_dir=dbt_profiles_dir, models='covid19_stats_materialized' ) dbt_test = DbtTestOperator( task_id='dbt_test', dir=dbt_dir, profiles_dir=dbt_profiles_dir, retries=0, # Failing tests would fail the task, and we don't want Airflow to try again models='covid19_stats_materialized' ) ingest_covid19_day_task >> dbt_run >> dbt_test
from airflow_dbt.operators.dbt_operator import DbtRunOperator, DbtTestOperator default_args = {'retries': 0, 'start_date': datetime.datetime(2019, 8, 10)} dag = DAG( 'analytics_pipeline', default_args=default_args, schedule_interval=None, # set to '0 8 * * *' for daily at 8am catchup=False, # tells airflow where sql is template_searchpath='/home/preidy/Code/analytics_pipeline/airflow/sql/') extract_and_load = BigQueryOperator( task_id='extract_and_load', sql='extract_sql.sql', destination_dataset_table='pat-scratch.analytics_pipeline.raw__nyc_311', write_disposition='WRITE_TRUNCATE', # overwrite entire table if it exists, use_legacy_sql=False, location='US', project_id='pat-scratch', dag=dag) dbt_run = DbtRunOperator(task_id='dbt_run', dir='/home/preidy/Code/analytics_pipeline/nyc311/', dag=dag) dbt_test = DbtTestOperator(task_id='dbt_test', dir='/home/preidy/Code/analytics_pipeline/nyc311/', dag=dag) extract_and_load >> dbt_run >> dbt_test