def add_task(task_id, bash_command): return bash_operator.BashOperator( task_id=task_id, bash_command=bash_command, execution_timeout=timedelta(hours=15), env=environment, dag=dag)
def build_bash_operator(operator_ref, dag_ref): """ Builds a DAG operator of type: BashOperator. Args: operator_ref (string): the definition of the operator dag_ref (string): the reference to the dag to associate this operator """ op = bash_operator.BashOperator(task_id=operator_ref['task_id'], bash_command=";".join( operator_ref['command']), dag=dag_ref) return op
def add_export_task(toggle, task_id, bash_command, dependencies=None): if toggle: operator = bash_operator.BashOperator( task_id=task_id, bash_command=bash_command, execution_timeout=timedelta(hours=15), env=environment, dag=dag) if dependencies is not None and len(dependencies) > 0: for dependency in dependencies: if dependency is not None: dependency >> operator return operator else: return None
def convert_to_airflow_op(self): return bash_operator.BashOperator( bash_command='exit 1', task_id=self.task_id, trigger_rule=self.trigger_rule, )
# See the License for the specific language governing permissions and # limitations under the License. # [START composer_quickstart] import datetime import airflow from airflow.operators import bash_operator YESTERDAY = datetime.datetime.now() - datetime.timedelta(days=1) default_args = { 'owner': 'Composer Example', 'depends_on_past': False, 'email': [''], 'email_on_failure': False, 'email_on_retry': False, 'retries': 1, 'retry_delay': datetime.timedelta(minutes=5), 'start_date': YESTERDAY, } with airflow.DAG('composer_sample_dag', 'catchup=False', default_args=default_args, schedule_interval=datetime.timedelta(days=1)) as dag: # Print the dag_run id from the Airflow logs print_dag_run_conf = bash_operator.BashOperator( task_id='print_dag_run_conf', bash_command='echo {{ dag_run.id }}') # [END composer_quickstart]
from airflow import models from airflow.operators import bash_operator from airflow.operators.gcs_to_bq import GCSToBigQueryOperator from airflow.utils.dates import days_ago args = { 'owner': 'airflow', 'start_date': days_ago(2) } dag = models.DAG( dag_id='example_gcs_to_bq_operator', default_args=args, schedule_interval=None, tags=['example']) create_test_dataset = bash_operator.BashOperator( task_id='create_airflow_test_dataset', bash_command='bq mk airflow_test', dag=dag) # [START howto_operator_gcs_to_bq] load_csv = GCSToBigQueryOperator( task_id='gcs_to_bq_example', bucket='cloud-samples-data', source_objects=['bigquery/us-states/us-states.csv'], destination_project_dataset_table='airflow_test.gcs_to_bq_table', schema_fields=[ {'name': 'name', 'type': 'STRING', 'mode': 'NULLABLE'}, {'name': 'post_abbr', 'type': 'STRING', 'mode': 'NULLABLE'}, ], write_disposition='WRITE_TRUNCATE', dag=dag) # [END howto_operator_gcs_to_bq]
'start_date': yesterday, 'email': email, 'email_on_failure': False, 'email_on_retry': False, 'retries': 1, 'retry_delay': datetime.timedelta(minutes=5), 'project_id': gcp_project } with models.DAG('product_table', schedule_interval=datetime.timedelta(days=1), default_args=default_dag_args) as dag: bq_make_raw_dataset = bash_operator.BashOperator( task_id='make_bq_raw_dataset', bash_command= 'bq --location=asia-southeast1 ls {} || bq --location=asia-southeast1 mk {}' .format(bq_raw_dataset_name, bq_raw_dataset_name)) raw_sql_files = read_sql_from_gcs(bq_raw_dataset_name, gcs_bucket) bq_start_making_raw_tables = dummy_operator.DummyOperator( task_id='start_making_raw_tables') bq_end_making_raw_tables = dummy_operator.DummyOperator( task_id='end_making_raw_tables') for filename in raw_sql_files: sql_statement = raw_sql_files[filename].decode() table_name = filename.replace('.sql', '') table_name = table_name.replace('raw/', '')
from airflow.operators import bash_operator from airflow.operators import python_operator yesterday = datetime.datetime.combine( datetime.datetime.today() - datetime.timedelta(1), datetime.datetime.min.time()) default_dag_args = {'start_date': yesterday} with models.DAG('running_python_and_bash_operator', schedule_interval=datetime.timedelta(days=1), default_args=default_dag_args) as dag: def hello_world(): print('Hello World!') return 1 def greeting(): print('Greetings from GCP! Happy shopping.') return 'Greeting successfully printed.' hello_world_greeting = python_operator.PythonOperator( task_id='python_1', python_callable=hello_world) sales_greeting = python_operator.PythonOperator(task_id='python_2', python_callable=greeting) bash_greeting = bash_operator.BashOperator( task_id='bye_bash', bash_command='echo Goodbye! Hope to see you soon.') hello_world_greeting >> sales_greeting >> bash_greeting
# Any task you create within the context manager is automatically added to the # DAG object. with models.DAG( 'composer_sample_simple_greeting', schedule_interval=datetime.timedelta(days=1), default_args=default_dag_args) as dag: # [END composer_simple_define_dag_airflow_1] # [START composer_simple_operators_airflow_1] def greeting(): import logging logging.info('Hello World!') # An instance of an operator is called a task. In this case, the # hello_python task calls the "greeting" Python function. hello_python = python_operator.PythonOperator( task_id='hello', python_callable=greeting) # Likewise, the goodbye_bash task calls a Bash script. goodbye_bash = bash_operator.BashOperator( task_id='bye', bash_command='echo Goodbye.') # [END composer_simple_operators_airflow_1] # [START composer_simple_relationships_airflow_1] # Define the order in which the tasks complete by using the >> and << # operators. In this example, hello_python executes before goodbye_bash. hello_python >> goodbye_bash # [END composer_simple_relationships_airflow_1] # [END composer_simple_airflow_1]
default_dag_args = { 'email_on_failure': False, 'email_on_retry': False, 'retries': 0, # 'retry_delay': datetime.timedelta(minutes=5), 'start_date': datetime.datetime.today() - datetime.timedelta(days=1) } with models.DAG('lastfm-1k-ingest', schedule_interval=datetime.timedelta(days=1), default_args=default_dag_args) as dag: dataflow = dataflow_operator.DataFlowPythonOperator( task_id='ingest-users-dataflow', py_file='gs://{}/lastfm-dataset-1K/code/ingest-users.py'.format( PROJECT), job_name='ingest-users-dataflow', py_options=[], dataflow_default_options={ 'project': PROJECT, 'region': 'europe-west1' }, options={}, poll_sleep=30) start = bash_operator.BashOperator(task_id='start', bash_command='echo "Start"') end = bash_operator.BashOperator(task_id='end', bash_command='echo "End"') start >> dataflow >> end
import datetime import airflow from airflow.operators import bash_operator default_args = { 'owner': 'Composer Example', 'depends_on_past': False, 'email': [''], 'email_on_failure': False, 'email_on_retry': False, 'retries': 1, 'retry_delay': datetime.timedelta(minutes=5), 'start_date': datetime.datetime(2017, 1, 1), } with airflow.DAG( 'sample_dag', default_args=default_args, # Not scheduled, trigger only schedule_interval=None) as dag: # Print the dag_run's configuration, which includes information about the # Cloud Storage object change. print_gcs_info = bash_operator.BashOperator( task_id='print_gcs_info', bash_command='echo Running: {{ dag_run.conf }}')
import airflow from airflow.operators import bash_operator from airflow.operators import python_operator from airflow.contrib.operators import kubernetes_pod_operator YESTERDAY = datetime.datetime.now( tz=datetime.timezone.utc) - datetime.timedelta(days=1) default_args = {'start_date': YESTERDAY} dag = airflow.DAG('simple_workflow_dag', default_args=default_args, schedule_interval=None) bash_operator_task = bash_operator.BashOperator( task_id='bash_operator_example_task', bash_command='echo "Hello from Airflow Bash Operator"', dag=dag) def python_operator_func(): print("Hello from Airflow Python Operator") python_operator_task = python_operator.PythonOperator( task_id='python_operator_example_task', python_callable=python_operator_func, dag=dag) kubernetes_pod_operator_task = kubernetes_pod_operator.KubernetesPodOperator( task_id='k8s_pod_operator_example_task', name='k8s_pod_example',
# Define a DAG (directed acyclic graph) of tasks. # Any task you create within the context manager is automatically added to the # DAG object. with models.DAG( 'composer_sample_gcloud_ssh_2', schedule_interval=datetime.timedelta(days=1), default_args=default_dag_args) as dag: def greeting(): import logging logging.info('Hello World!') # An instance of an operator is called a task. In this case, the # hello_python task calls the "greeting" Python function. hello_python = python_operator.PythonOperator( task_id='hello', python_callable=greeting) gcloud_ssh = bash_operator.BashOperator( task_id='gcloud_ssh', bash_command=gcloud_command ) # Likewise, the goodbye_bash task calls a Bash script. goodbye_bash = bash_operator.BashOperator( task_id='bye', bash_command='echo Goodbye.') # Define the order in which the tasks complete by using the >> and << # operators. In this example, hello_python executes before goodbye_bash. hello_python >> gcloud_ssh >> goodbye_bash
'email': [''], 'email_on_failure': False, 'email_on_retry': False, 'retries': 1, 'retry_delay': datetime.timedelta(minutes=5), 'start_date': datetime.datetime(2017, 1, 1), } with airflow.DAG('composer_trigger_gcs_to_bq_dag', default_args=default_args, schedule_interval=None) as \ dag: # Not scheduled, trigger only # Print the dag_run's configuration, which includes information about the # Cloud Storage object change. print_gcs_info = bash_operator.BashOperator( task_id='print_gcs_info', bash_command='echo {{ dag_run.conf }}') # [Create dataset in BQ] create_test_dataset = \ bash_operator.BashOperator(task_id='create_test_dataset', bash_command='bq mk airflow_test1') # [Upload Csv from GCS to BQ using Load ] Upload_csv = bash_operator.BashOperator( task_id='Upload_csv', bash_command= 'bq load --autodetect --source_format=CSV airflow_test1.simple gs://prp-source/simple.csv' )
default_dag_args = { 'start_date': yesterday, 'retries': 1, 'retry_delay': datetime.timedelta(minutes=2) } with models.DAG('python_and_bash_with_all_success_trigger', schedule_interval=datetime.timedelta(days=1), default_args=default_dag_args) as dag: def hello_world(): raise ValueError('Oops! something went wrong.') print('Hello World!') return 1 def greeting(): print('Greetings from SpikeySales! Happy shopping.') return 'Greeting successfully printed.' hello_world_greeting = python_operator.PythonOperator( task_id='python_1', python_callable=hello_world) spikeysales_greeting = python_operator.PythonOperator( task_id='python_2', python_callable=greeting) bash_greeting = bash_operator.BashOperator( task_id='bye_bash', bash_command='echo Goodbye! Hope to see you soon.', trigger_rule=trigger_rule.TriggerRule.ALL_SUCCESS) hello_world_greeting >> spikeysales_greeting >> bash_greeting
'retries': 1, 'retry_delay': datetime.timedelta(minutes=5), 'start_date': TOMORROW, } dag = DAG( 'Airflow_Bigquery', default_args=default_args, description= 'Load and transform data from Google cloud storage to Google bigquery with Airflow', ) start_operator = dummy_operator.DummyOperator(task_id='Begin_execution', dag=dag) create_dataset = bash_operator.BashOperator( task_id='create_airflow_iot_dataset', bash_command='bq mk iot', dag=dag) load_csv = gcs_to_bq.GoogleCloudStorageToBigQueryOperator( task_id='gcs_to_bq', bucket='bucket1_hazem', source_objects=['heartRate-final.csv'], destination_project_dataset_table='iot.heartRateTable', trigger_rule='all_done', skip_leading_rows=1, schema_fields=[ { 'name': 'sensorID', 'type': 'STRING', 'mode': 'NULLABLE' }, {
# # https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """An example DAG demonstrating use of variables and how to test it.""" import datetime from airflow import models from airflow.operators import bash_operator from airflow.operators import dummy_operator yesterday = datetime.datetime.now() - datetime.timedelta(days=1) default_dag_args = { 'start_date': yesterday, } with models.DAG('composer_sample_cycle', schedule_interval=datetime.timedelta(days=1), default_args=default_dag_args) as dag: start = dummy_operator.DummyOperator(task_id='start') end = dummy_operator.DummyOperator(task_id='end') variable_example = bash_operator.BashOperator( task_id='variable_example', bash_command='echo project_id=' + models.Variable.get('gcp_project'))
remove_cluster = dataproc_operator.DataprocClusterDeleteOperator( project_id=PROJECT, task_id="delete_cluster", cluster_name='vf-polimi-demo', region='europe-west1') def check_batch_kpi_scheduled_cluster_running(**kwargs): ti = kwargs['ti'] xcom_value = ti.xcom_pull(task_ids='batch_kpi_scheduled_cluster') if xcom_value == "vf-polimi-demo": return 'delete_cluster' else: return 'end' branch_batch_kpi_scheduled_active_cluster = BranchPythonOperator( task_id='check_batch_kpi_scheduled_cluster', provide_context=True, python_callable=check_batch_kpi_scheduled_cluster_running) batch_kpi_scheduled_cluster_running = bash_operator.BashOperator( task_id='batch_kpi_scheduled_cluster', bash_command= "gcloud dataproc clusters list --region europe-west1 | grep 'vf-polimi-demo'| awk '{print $1; exit}'", xcom_push=True, trigger_rule="all_done") end_pipeline = dummy_operator.DummyOperator(task_id='end') create_dataproc_cluster >> run_batch_kpi_scheduled >> batch_kpi_scheduled_cluster_running >> branch_batch_kpi_scheduled_active_cluster >> [ remove_cluster, end_pipeline ]
'bindexis_end2end', schedule_interval=datetime.timedelta(days=1), # or in cron Format default_args=default_dag_args) as dag: # An instance of an operator is called a task. In this case, the # hello_python task calls the "greeting" Python function. bindexis_python = python_operator.PythonOperator( task_id='bindexis-dataload-start', python_callable=def_bindexis_dataload.bindexis_dataload, op_kwargs={'user_bindexis': Variable.get("user_bindexis"), 'pw_bindexis': Variable.get("password_bindexis")}, retries=2) # Likewise, the goodbye_bash task calls a Bash script. end_bash = bash_operator.BashOperator( task_id='bindexis-end', bash_command='echo bindexis-dataload-end.') # Define the order in which the tasks complete by using the >> and << # operators. In this example, bindexis_python executes before end_bash. bindexis_python >> end_bash # Send email confirmation #email_summary = EmailOperator( # task_id='email_summary', # to=models.Variable.get('email'), # subject='ERROR: Bindexis Dataload and Trigger', # html_content=""" # Bindexis Dataload fails. # Error: {ERROR_FROM_LOG}.
import datetime from airflow import models from airflow.operators import bash_operator with models.DAG( 'composer_hello_world', schedule_interval=datetime.timedelta(days=1), default_args={'start_date': datetime.datetime(2020, 9, 1),}) as dag: goodbye_bash = bash_operator.BashOperator( task_id='hello_world', bash_command='ls $DAGS_FOLDER')
import datetime from airflow import models from airflow.operators import bash_operator default_dag_args = { 'start_date': datetime.datetime(2018, 12, 17, 0, 0), 'retries': 1, 'retry_delay': datetime.timedelta(minutes=2), 'project_id': models.Variable.get('gcp_project') } source_bucket = models.Variable.get('gcs_source_bucket') dest_bucket = models.Variable.get('gcs_dest_bucket') with models.DAG('transferring_data_from_gcs_to_gcs', schedule_interval=None, default_args=default_dag_args) as dag: transfer_data_gcs_to_gcs = bash_operator.BashOperator( task_id='data_transfer_gcs_to_gcs', bash_command='gsutil cp -r {source} {dest}'.format( source=source_bucket, dest=dest_bucket)) transfer_data_gcs_to_gcs
'retries': 1, 'retry_delay': datetime.timedelta(minutes=5), 'project_id': models.Variable.get('gcp_project') } with models.DAG( 'composer_sample_bq_notify', schedule_interval=datetime.timedelta(weeks=4), default_args=default_dag_args) as dag: # [END composer_notify_failure] # [START composer_bash_bq] # Create BigQuery output dataset. make_bq_dataset = bash_operator.BashOperator( task_id='make_bq_dataset', # Executing 'bq' command requires Google Cloud SDK which comes # preinstalled in Cloud Composer. bash_command='bq ls {} || bq mk {}'.format( bq_dataset_name, bq_dataset_name)) # [END composer_bash_bq] # [START composer_bigquery] # Query recent StackOverflow questions. bq_recent_questions_query = bigquery_operator.BigQueryOperator( task_id='bq_recent_questions_query', sql=""" SELECT owner_display_name, title, view_count FROM `bigquery-public-data.stackoverflow.posts_questions` WHERE creation_date < CAST('{max_date}' AS TIMESTAMP) AND creation_date >= CAST('{min_date}' AS TIMESTAMP) ORDER BY view_count DESC LIMIT 100
from airflow.contrib.operators import gcs_to_bq except ImportError: pass if gcs_to_bq is not None: args = { 'owner': 'Datametica', 'start_date': airflow.utils.dates.days_ago(2) } dag = models.DAG(dag_id='gcs_to_bq_operator', default_args=args, schedule_interval=None) create_test_dataset = bash_operator.BashOperator( task_id='create_airflow_test_dataset_1', bash_command='bq mk airflow_test_1', dag=dag) # [START howto_operator_gcs_to_bq] load_csv = gcs_to_bq.GoogleCloudStorageToBigQueryOperator( task_id='gcs_to_bq_example', bucket='dataflow_poc11', source_objects=['task1.csv'], destination_project_dataset_table= 'gcs-bq.airflow_test.gcs_to_bq_table_1', schema_fields=[ { 'name': 'name', 'type': 'STRING', 'mode': 'NULLABLE' },
if (x <= 2): return 'hello_spikey' else: return 'dummy' run_this_first = dummy_operator.DummyOperator(task_id='run_this_first') # BranchPythonOperator takes in a callable which returns the task id of the next task. branching = python_operator.BranchPythonOperator( task_id='branching', python_callable=makeBranchChoice) run_this_first >> branching spikeysales_greeting = python_operator.PythonOperator( task_id='hello_spikey', python_callable=greeting) dummy_followed_python = dummy_operator.DummyOperator( task_id='follow_python') dummy = dummy_operator.DummyOperator(task_id='dummy') bash_greeting = bash_operator.BashOperator( task_id='bye_bash', bash_command='echo Goodbye! Hope to see you soon.', trigger_rule='one_success') branching >> spikeysales_greeting >> dummy_followed_python >> bash_greeting branching >> dummy >> bash_greeting
import datetime import airflow from airflow.operators import bash_operator YESTERDAY = datetime.datetime.now() - datetime.timedelta(days=1) default_args = { 'owner': 'Composer Example', 'depends_on_past': False, 'email': [''], 'email_on_failure': False, 'email_on_retry': False, 'retries': 1, 'retry_delay': datetime.timedelta(minutes=5), 'start_date': YESTERDAY, } with airflow.DAG('composer_sample_dag', 'catchup=False', default_args=default_args, schedule_interval=None) as dag: #schedule_interval=datetime.timedelta(days=1)) as dag: # Print the dag_run id from the Airflow logs print_dag_run_conf = bash_operator.BashOperator( task_id='print_dag_run_conf', bash_command='echo "run_id={{ run_id }} | dag_run={{ dag_run }}"')
def greeting(ds, **kwargs): import logging conf, ti = kwargs["dag_run"].conf or {}, kwargs["ti"] logging.info( f'Hello! conf: {conf.get("key")}, xcom: {ti.xcom_pull(key="xcomkey")}' ) set_xcoms = python_operator.PythonOperator(task_id="set_xcoms", provide_context=True, python_callable=set_xcom_fn) greet_python = python_operator.PythonOperator(task_id='greeting', provide_context=True, python_callable=greeting) # bash_command='echo dagrun: {{ dag_run.conf }} / airflow_val: {{ var.value.project_id }}' yo_dagrun = bash_operator.BashOperator( task_id='yo', bash_command= 'echo dagrun: {{ dag_run.conf["key"] }} / airflow_val: {{ var.value.project_id }}' ) # Likewise, the goodbye_bash task calls a Bash script. goodbye_bash = bash_operator.BashOperator( task_id='bye', bash_command='echo {{ ti.xcom_pull(key="xcomkey") }}') # Define the order in which the tasks complete by using the >> and << # operators. In this example, hello_python executes before goodbye_bash. set_xcoms >> greet_python >> yo_dagrun >> goodbye_bash
datetime.datetime.min.time()) # [START composer_notify_failure] default_dag_args = { 'start_date': yesterday, 'retries': 1, 'retry_delay': datetime.timedelta(minutes=5), 'project_id': models.Variable.get('gcp_project') } with models.DAG( 'bqml_demo_composer', schedule_interval=datetime.timedelta(weeks=4), default_args=default_dag_args) as dag: # [END composer_notify_failure] # Create BQML Model create_bqml_model = bash_operator.BashOperator( task_id='create_bqml_model', bash_command='bq query "$(gsutil cat gs://anand-bq-test-2-2/bqdemo/query2.txt)"', trigger_rule=trigger_rule.TriggerRule.ALL_DONE) # Define DAG dependencies. ( create_bqml_model )
import airflow import datetime from airflow import DAG from airflow.operators import bash_operator, dummy_operator default_args = { 'owner': 'Nitin Ware', 'depends_on_past': False, 'retries': 1, 'retry_delay': datetime.timedelta(minutes=5), 'start_date': airflow.utils.dates.days_ago(1), } dag = DAG( 'bash_dag', 'catchup=False', default_args=default_args, schedule_interval="@once", ) start_dag = dummy_operator.DummyOperator( task_id='start', dag=dag, ) bash_dag = bash_operator.BashOperator(task_id='bash_command', bash_command='echo Hello Bash.', dag=dag) start_dag >> bash_dag
""" Simple DAG for using Airflow """ import datetime import logging from airflow import models from airflow.operators import bash_operator from airflow.operators import python_operator DEFAULT_DAG_ARGS = {'start_date': datetime.datetime(2018, 1, 1)} with models.DAG('composer_sample_greeting', schedule_interval=datetime.timedelta(days=1), default_args=DEFAULT_DAG_ARGS) as dag: def _hello_python(): """ A method here """ logging.info('Hello World!') HELLO_PYTHON = python_operator.PythonOperator( task_id='HELLO_PYTHON', python_callable=_hello_python) GOODBYE_BASH = bash_operator.BashOperator(task_id='GOODBYE_BASH', bash_command='echo Goodbye') HELLO_PYTHON >> GOODBYE_BASH
'{{ macros.ds_format(macros.ds_add(ds, 0), "%Y-%m-%d", "%Y%m%d") }}Z-' + user_id + '-' ] bd_dates = [ '{{ macros.ds_format(macros.ds_add(ds, -1), "%Y-%m-%d", "%Y%m%d") }}', '{{ macros.ds_format(macros.ds_add(ds, 0), "%Y-%m-%d", "%Y%m%d") }}' ] # 한국시 기준 데이터로 보여주기 위해 UTC 기준2일치 데이터를 처리 for i in range(2): output_directory = '{}/data/log/rescuetime'.format(datalake_gs) # 한국시(+9:00) 기준 레스큐 타임 데이터를 UTC 기준으로 저장하기 위해 한번의 2일치 데이터를 조회한다. load_rescuetime = bash_operator.BashOperator( task_id=('load_rescuetime-%s' % i), bash_command= 'java -jar ${{AIRFLOW_HOME}}/dags/dd-importers-load-rescuetime.jar -user_id={} -api_key={} -input_begin_date={} -input_end_date={} -input_timezone=Asia/Seoul -output_date={} -output_timezone=UTC -output_directory={} -output_filenameprefix={} -shard_size=3' .format(user_id, api_key, input_begin_dates[i], input_end_dates[i], input_begin_dates[i], output_directory, output_filename_prefixes[i]), dag=dag) create_rescuetime_bd = dataflow_operator.DataflowTemplateOperator( task_id=('create_rescuetime_bd-%s' % i), template='{}/templates/dd-etls-create-rescuetime'.format(dataflow_gs), parameters={ 'runner': 'DataflowRunner', 'inputFilePattern': '{}/data/log/rescuetime/{}Z-*'.format(datalake_gs, bd_dates[i]), 'outputTable': '{}:dw_datadriver.rescuetime_tbl_bd_data${}'.format( project_id, bd_dates[i])