def test_should_generate_DAG_file_from_workflow_with_hourly_scheduling( self, get_timezone_offset_seconds_mock): # given workdir = os.path.dirname(__file__) get_timezone_offset_seconds_mock.return_value = 2 * 3600 docker_repository = 'eu.gcr.io/my_docker_repository_project/my-project' # given job1 = Job(id='job1', component=mock.Mock(), retry_count=10, retry_pause_sec=20) job2 = Job(id='job2', component=mock.Mock(), retry_count=100, retry_pause_sec=200) job3 = Job(id='job3', component=mock.Mock(), retry_count=100, retry_pause_sec=200) w_job1 = WorkflowJob(job1, 1) w_job2 = WorkflowJob(job2, 2) w_job3 = WorkflowJob(job3, 3) graph = {w_job1: (w_job2, w_job3), w_job2: (w_job3, )} workflow = Workflow(workflow_id='my_workflow', definition=Definition(graph), start_time_factory=hourly_start_time, schedule_interval='@hourly') # when dag_file_path = generate_dag_file(workdir, docker_repository, workflow, '2020-07-02 10:00:00', '0.3.0', 'ca') # then self.assertEqual( dag_file_path, workdir + '/.dags/my_workflow__v0_3_0__2020_07_02_10_00_00_dag.py') dag_file_content = Path(dag_file_path).read_text() expected_dag_content = ''' import datetime from airflow import DAG from airflow.contrib.operators import kubernetes_pod_operator default_args = { 'owner': 'airflow', 'depends_on_past': True, 'start_date': datetime.datetime(2020, 7, 2, 8, 0), 'email_on_failure': False, 'email_on_retry': False, 'execution_timeout': datetime.timedelta(seconds=10800), } dag = DAG( 'my_workflow__v0_3_0__2020_07_02_10_00_00', default_args=default_args, max_active_runs=1, schedule_interval='@hourly' ) tjob1 = kubernetes_pod_operator.KubernetesPodOperator( task_id='job1', name='job1', cmds=['bf'], arguments=['run', '--job', 'my_workflow.job1', '--runtime', '{{ execution_date.strftime("%Y-%m-%d %H:%M:%S") }}', '--project-package', 'ca', '--config', '{{var.value.env}}'], namespace='default', image='eu.gcr.io/my_docker_repository_project/my-project:0.3.0', is_delete_operator_pod=True, retries=10, retry_delay=datetime.timedelta(seconds=20), dag=dag, execution_timeout=datetime.timedelta(seconds=10800)) tjob2 = kubernetes_pod_operator.KubernetesPodOperator( task_id='job2', name='job2', cmds=['bf'], arguments=['run', '--job', 'my_workflow.job2', '--runtime', '{{ execution_date.strftime("%Y-%m-%d %H:%M:%S") }}', '--project-package', 'ca', '--config', '{{var.value.env}}'], namespace='default', image='eu.gcr.io/my_docker_repository_project/my-project:0.3.0', is_delete_operator_pod=True, retries=100, retry_delay=datetime.timedelta(seconds=200), dag=dag, execution_timeout=datetime.timedelta(seconds=10800)) tjob2.set_upstream(tjob1) tjob3 = kubernetes_pod_operator.KubernetesPodOperator( task_id='job3', name='job3', cmds=['bf'], arguments=['run', '--job', 'my_workflow.job3', '--runtime', '{{ execution_date.strftime("%Y-%m-%d %H:%M:%S") }}', '--project-package', 'ca', '--config', '{{var.value.env}}'], namespace='default', image='eu.gcr.io/my_docker_repository_project/my-project:0.3.0', is_delete_operator_pod=True, retries=100, retry_delay=datetime.timedelta(seconds=200), dag=dag, execution_timeout=datetime.timedelta(seconds=10800)) tjob3.set_upstream(tjob2) tjob3.set_upstream(tjob1) ''' self.assert_files_are_equal(expected_dag_content, dag_file_content)
def test_should_generate_DAG_file_from_workflow_with_daily_scheduling( self): # given workdir = os.path.dirname(__file__) docker_repository = 'eu.gcr.io/my_docker_repository_project/my-project' # given job1 = Job(id='job1', component=mock.Mock(), retry_count=10, retry_pause_sec=20) w_job1 = WorkflowJob(job1, 1) graph = {w_job1: ()} workflow = Workflow(workflow_id='my_daily_workflow', definition=Definition(graph), schedule_interval='@daily') # when dag_file_path = generate_dag_file(workdir, docker_repository, workflow, '2020-07-01', '0.3.0', 'ca') # then self.assertEqual( dag_file_path, workdir + '/.dags/my_daily_workflow__v0_3_0__2020_07_01_00_00_00_dag.py') dag_file_content = Path(dag_file_path).read_text() expected_dag_content = ''' from airflow import DAG from datetime import timedelta from datetime import datetime from airflow.contrib.operators import kubernetes_pod_operator default_args = { 'owner': 'airflow', 'depends_on_past': True, 'start_date': datetime.strptime("2020-07-01", "%Y-%m-%d") - (timedelta(hours=24)), 'email_on_failure': False, 'email_on_retry': False, 'execution_timeout': timedelta(minutes=90) } dag = DAG( 'my_daily_workflow__v0_3_0__2020_07_01_00_00_00', default_args=default_args, max_active_runs=1, schedule_interval='@daily' ) tjob1 = kubernetes_pod_operator.KubernetesPodOperator( task_id='job1', name='job1', cmds=['bf'], arguments=['run', '--job', 'my_daily_workflow.job1', '--runtime', '{{ execution_date.strftime("%Y-%m-%d %H:%M:%S") }}', '--project-package', 'ca', '--config', '{{var.value.env}}'], namespace='default', image='eu.gcr.io/my_docker_repository_project/my-project:0.3.0', is_delete_operator_pod=True, retries=10, retry_delay= timedelta(seconds=20), dag=dag) ''' self.assert_files_are_equal(expected_dag_content, dag_file_content)