def histogram_aggregates_subdag(parent_dag_name, child_dag_name, default_args, schedule_interval, dataset_id): GLAM_HISTOGRAM_AGGREGATES_SUBDAG = "%s.%s" % (parent_dag_name, child_dag_name) default_args["depends_on_past"] = True dag = DAG( GLAM_HISTOGRAM_AGGREGATES_SUBDAG, default_args=default_args, schedule_interval=schedule_interval, ) clients_histogram_aggregates_new = bigquery_etl_query( task_id="clients_histogram_aggregates_new", destination_table="clients_histogram_aggregates_new_v1", dataset_id=dataset_id, project_id="moz-fx-data-shared-prod", owner="*****@*****.**", email=["*****@*****.**", "*****@*****.**"], date_partition_parameter=None, parameters=("submission_date:DATE:{{ds}}", ), arguments=("--replace", ), dag=dag, ) clients_histogram_aggregates_final = SubDagOperator( subdag=repeated_subdag( GLAM_HISTOGRAM_AGGREGATES_SUBDAG, GLAM_HISTOGRAM_AGGREGATES_FINAL_SUBDAG, default_args, dag.schedule_interval, dataset_id, ), task_id=GLAM_HISTOGRAM_AGGREGATES_FINAL_SUBDAG, executor=get_default_executor(), dag=dag, ) clients_histogram_aggregates_new >> clients_histogram_aggregates_final return dag
project_id, "--dataset", dataset_id, ], docker_image="mozilla/bigquery-etl:latest", dag=dag, ) # SubdagOperator uses a SequentialExecutor by default # so its tasks will run sequentially. clients_histogram_bucket_counts = SubDagOperator( subdag=repeated_subdag( GLAM_DAG, "clients_histogram_bucket_counts", default_args, dag.schedule_interval, dataset_id, ("submission_date:DATE:{{ds}}", ), 10, None, ), task_id="clients_histogram_bucket_counts", dag=dag, ) clients_histogram_probe_counts = bigquery_etl_query( task_id="clients_histogram_probe_counts", destination_table="clients_histogram_probe_counts_v1", dataset_id=dataset_id, project_id=project_id, owner="*****@*****.**", date_partition_parameter=None,