def test_render_template(self): # Given operator = SparkSubmitOperator(task_id='spark_submit_job', dag=self.dag, **self._config) ti = TaskInstance(operator, DEFAULT_DATE) # When ti.render_templates() # Then expected_application_args = [ '-f', 'foo', '--bar', 'bar', '--start', (DEFAULT_DATE - timedelta(days=1)).strftime("%Y-%m-%d"), '--end', DEFAULT_DATE.strftime("%Y-%m-%d"), '--with-spaces', 'args should keep embdedded spaces', ] expected_name = 'spark_submit_job' self.assertListEqual(expected_application_args, getattr(operator, '_application_args')) self.assertEqual(expected_name, getattr(operator, '_name'))
def test_render_template(self): ti = TaskInstance(self.operator, DEFAULT_DATE) ti.render_templates() expected_args = { 'Name': 'test_job_flow', 'ReleaseLabel': '5.11.0', 'Steps': [{ 'Name': 'test_step', 'ActionOnFailure': 'CONTINUE', 'HadoopJarStep': { 'Jar': 'command-runner.jar', 'Args': [ '/usr/lib/spark/bin/run-example', (DEFAULT_DATE - timedelta(days=1)).strftime("%Y-%m-%d"), DEFAULT_DATE.strftime("%Y-%m-%d"), ] } }] } self.assertDictEqual(self.operator.job_flow_overrides, expected_args)
def test_load_templated_yaml(self): dag = DAG(dag_id='example_cloudbuild_operator', start_date=TEST_DEFAULT_DATE) with tempfile.NamedTemporaryFile(suffix='.yaml', mode='w+t') as build: build.writelines(""" steps: - name: 'ubuntu' args: ['echo', 'Hello {{ params.name }}!'] """) build.seek(0) body_path = build.name operator = CloudBuildCreateOperator(body=body_path, task_id="task-id", dag=dag, params={'name': 'airflow'}) operator.prepare_template() ti = TaskInstance(operator, TEST_DEFAULT_DATE) ti.render_templates() expected_body = { 'steps': [{ 'name': 'ubuntu', 'args': ['echo', 'Hello airflow!'] }] } self.assertEqual(expected_body, operator.body)
def test_bigquery_operator_defaults(self, mock_hook): operator = BigQueryOperator(task_id=TASK_ID, sql='Select * from test_table', dag=self.dag, default_args=self.args) operator.execute(None) mock_hook.return_value \ .get_conn() \ .cursor() \ .run_query \ .assert_called_once_with( sql='Select * from test_table', destination_dataset_table=None, write_disposition='WRITE_EMPTY', allow_large_results=False, flatten_results=None, udf_config=None, maximum_billing_tier=None, maximum_bytes_billed=None, create_disposition='CREATE_IF_NEEDED', schema_update_options=(), query_params=None, labels=None, priority='INTERACTIVE', time_partitioning=None, api_resource_configs=None, cluster_fields=None, ) self.assertTrue(isinstance(operator.sql, six.string_types)) ti = TaskInstance(task=operator, execution_date=DEFAULT_DATE) ti.render_templates() self.assertTrue(isinstance(ti.task.sql, six.string_types))
def test_render_template(self): operator = KylinCubeOperator( task_id="kylin_build_1", kylin_conn_id='kylin_default', project="{{ params.project }}", cube="{{ params.cube }}", command="{{ params.command }}", start_time="{{ params.start_time }}", end_time="{{ params.end_time }}", is_track_job=True, dag=self.dag, params={ 'project': 'learn_kylin', 'cube': 'kylin_sales_cube', 'command': 'build', 'start_time': '1483200000000', 'end_time': '1483286400000', }, ) ti = TaskInstance(operator, DEFAULT_DATE) ti.render_templates() self.assertEqual('learn_kylin', getattr(operator, 'project')) self.assertEqual('kylin_sales_cube', getattr(operator, 'cube')) self.assertEqual('build', getattr(operator, 'command')) self.assertEqual('1483200000000', getattr(operator, 'start_time')) self.assertEqual('1483286400000', getattr(operator, 'end_time'))
def test_dag_sample_w_template_mock(mocker: MockerFixture, mock_xcom_pull, mock_xcom_push): # Besides undoing the mocking automatically after the end of the test # https://github.com/pytest-dev/pytest-mock#pytest-mock mocker.patch.object(TaskInstance, 'xcom_pull', mock_xcom_pull) mocker.patch.object(TaskInstance, 'xcom_push', mock_xcom_push) dagbag = models.DagBag(dag_folder=DAG_DIR, include_examples=False) dag = dagbag.get_dag(dag_id="dag_sample_w_template") # type: models.DAG dagrun = dag.create_dagrun(run_id="run_id", state=state.State.RUNNING, execution_date=DEFAULT_DATE, conf=configuration.conf) for t in dagrun.dag.tasks: ti = TaskInstance(task=t, execution_date=DEFAULT_DATE) # print(f'ctx: {ctx}') print(f'taskid: {ti.task_id}, \ privide_context: {ti.task.provide_context if hasattr(ti.task, "provide_context") else "False"} \ callable: {ti.task.python_callable.__name__ if hasattr(ti.task, "python_callable") else "False"} \ bash_command: {ti.task.bash_command if hasattr(ti.task, "bash_command") else "False"}' ) # if hasattr(ti.task, "provide_context"): ti.render_templates() print(f'taskid: {ti.task_id}, \ privide_context: {ti.task.provide_context if hasattr(ti.task, "provide_context") else "False"} \ callable: {ti.task.python_callable.__name__ if hasattr(ti.task, "python_callable") else "False"} \ bash_command: {ti.task.bash_command if hasattr(ti.task, "bash_command") else "False"}' ) print(f'task: {ti.task_id} ok')
def test_render_template(self): ti = TaskInstance(self.mock_operator, DEFAULT_DATE) ti.render_templates() expected_rendered_template = {'$lt': '2017-01-01T00:00:00+00:00Z'} self.assertDictEqual(expected_rendered_template, getattr(self.mock_operator, 'mongo_query'))
def test_render_template_from_file(self): self.operator.job_flow_overrides = 'job.j2.json' self.operator.params = {'releaseLabel': '5.11.0'} ti = TaskInstance(self.operator, DEFAULT_DATE) ti.render_templates() self.emr_client_mock.run_job_flow.return_value = RUN_JOB_FLOW_SUCCESS_RETURN emr_session_mock = MagicMock() emr_session_mock.client.return_value = self.emr_client_mock boto3_session_mock = MagicMock(return_value=emr_session_mock) with patch('boto3.session.Session', boto3_session_mock): self.operator.execute(None) expected_args = { 'Name': 'test_job_flow', 'ReleaseLabel': '5.11.0', 'Steps': [ { 'Name': 'test_step', 'ActionOnFailure': 'CONTINUE', 'HadoopJarStep': { 'Jar': 'command-runner.jar', 'Args': [ '/usr/lib/spark/bin/run-example', '2016-12-31', '2017-01-01', ], }, } ], } assert self.operator.job_flow_overrides == expected_args
def test_templates(self, _): dag_id = 'test_dag_id' args = {'start_date': DEFAULT_DATE} self.dag = DAG(dag_id, default_args=args) # pylint: disable=attribute-defined-outside-init op = CloudDataTransferServiceGCSToGCSOperator( source_bucket='{{ dag.dag_id }}', destination_bucket='{{ dag.dag_id }}', description='{{ dag.dag_id }}', object_conditions={'exclude_prefixes': ['{{ dag.dag_id }}']}, gcp_conn_id='{{ dag.dag_id }}', task_id=TASK_ID, dag=self.dag, ) ti = TaskInstance(op, DEFAULT_DATE) ti.render_templates() assert dag_id == getattr(op, 'source_bucket') assert dag_id == getattr(op, 'destination_bucket') assert dag_id == getattr(op, 'description') # pylint: disable=unsubscriptable-object assert dag_id == getattr(op, 'object_conditions')['exclude_prefixes'][0] # pylint: enable=unsubscriptable-object assert dag_id == getattr(op, 'gcp_conn_id')
def test_templates(self, _): dag_id = 'test_dag_id' args = {'start_date': DEFAULT_DATE} self.dag = DAG(dag_id, default_args=args) # pylint:disable=attribute-defined-outside-init op = S3ToGoogleCloudStorageTransferOperator( s3_bucket='{{ dag.dag_id }}', gcs_bucket='{{ dag.dag_id }}', description='{{ dag.dag_id }}', object_conditions={'exclude_prefixes': ['{{ dag.dag_id }}']}, gcp_conn_id='{{ dag.dag_id }}', task_id=TASK_ID, dag=self.dag, ) ti = TaskInstance(op, DEFAULT_DATE) ti.render_templates() self.assertEqual(dag_id, getattr(op, 's3_bucket')) self.assertEqual(dag_id, getattr(op, 'gcs_bucket')) self.assertEqual(dag_id, getattr(op, 'description')) # pylint:disable=unsubscriptable-object self.assertEqual( dag_id, getattr(op, 'object_conditions')['exclude_prefixes'][0]) # pylint:enable=unsubscriptable-object self.assertEqual(dag_id, getattr(op, 'gcp_conn_id'))
def execute(self, context): if context['dag_run'].external_trigger: logging.info("skipping dependency check due to external run") return ti = TaskInstance(self.op, context['execution_date']) # the task is not supposed to have a DAG, but for the rendering we need access to the DAG setattr(ti.task, '_dag', self.dag) ti.render_templates() ti.task.execute(context)
def test_operation_get_with_templates(self, _): dag_id = 'test_dag_id' args = {'start_date': DEFAULT_DATE} self.dag = DAG(dag_id, default_args=args) # pylint: disable=attribute-defined-outside-init op = CloudDataTransferServiceGetOperationOperator( operation_name='{{ dag.dag_id }}', task_id='task-id', dag=self.dag) ti = TaskInstance(op, DEFAULT_DATE) ti.render_templates() assert dag_id == getattr(op, 'operation_name')
def test_operation_get_with_templates(self, _): dag_id = 'test_dag_id' args = {'start_date': DEFAULT_DATE} self.dag = DAG(dag_id, default_args=args) op = GcpTransferServiceOperationGetOperator( operation_name='{{ dag.dag_id }}', task_id='task-id', dag=self.dag) ti = TaskInstance(op, DEFAULT_DATE) ti.render_templates() self.assertEqual(dag_id, getattr(op, 'operation_name'))
def test_render_template(self): ti = TaskInstance(self.mock_operator, DEFAULT_DATE) ti.render_templates() expected_rendered_template = {'$lt': '2017-01-01T00:00:00+00:00Z'} self.assertDictEqual( expected_rendered_template, getattr(self.mock_operator, 'mongo_query') )
def test_templated_sensor(self): with self.dag: sensor = ExternalTaskSensor( task_id='templated_task', external_dag_id='dag_{{ ds }}', external_task_id='task_{{ ds }}' ) instance = TaskInstance(sensor, DEFAULT_DATE) instance.render_templates() self.assertEqual(sensor.external_dag_id, f"dag_{DEFAULT_DATE.date()}") self.assertEqual(sensor.external_task_id, f"task_{DEFAULT_DATE.date()}")
def test_init_with_template_cluster_label(self): dag = DAG(DAG_ID, start_date=DEFAULT_DATE) task = QuboleOperator(task_id=TASK_ID, dag=dag, cluster_label='{{ params.cluster_label }}', params={'cluster_label': 'default'}) ti = TaskInstance(task, DEFAULT_DATE) ti.render_templates() self.assertEqual(task.cluster_label, 'default')
def test_operation_get_with_templates(self, _): dag_id = 'test_dag_id' configuration.load_test_config() args = {'start_date': DEFAULT_DATE} self.dag = DAG(dag_id, default_args=args) op = GcpTransferServiceOperationGetOperator( operation_name='{{ dag.dag_id }}', task_id='task-id', dag=self.dag ) ti = TaskInstance(op, DEFAULT_DATE) ti.render_templates() self.assertEqual(dag_id, getattr(op, 'operation_name'))
def test_render_template(self): operator = DruidOperator( task_id='spark_submit_job', json_index_file=self.json_index_str, params={'index_type': 'index_hadoop', 'datasource': 'datasource_prd'}, dag=self.dag, ) ti = TaskInstance(operator, DEFAULT_DATE) ti.render_templates() assert self.rendered_index_str == operator.json_index_file
def render(args): dag = get_dag(args) task = dag.get_task(task_id=args.task_id) ti = TaskInstance(task, args.execution_date) ti.render_templates() for attr in task.__class__.template_fields: print(textwrap.dedent("""\ # ---------------------------------------------------------- # property: {} # ---------------------------------------------------------- {} """.format(attr, getattr(task, attr))))
def task_render(args): """Renders and displays templated fields for a given task""" dag = get_dag(args.subdir, args.dag_id) task = dag.get_task(task_id=args.task_id) ti = TaskInstance(task, args.execution_date) ti.render_templates() for attr in task.__class__.template_fields: print(textwrap.dedent("""\ # ---------------------------------------------------------- # property: {} # ---------------------------------------------------------- {} """.format(attr, getattr(task, attr))))
def test_templates(self, _): dag_id = 'test_dag_id' args = {'start_date': DEFAULT_DATE} self.dag = DAG(dag_id, default_args=args) op = GcpTransferServiceOperationsListOperator( filter={"job_names": ['{{ dag.dag_id }}']}, gcp_conn_id='{{ dag.dag_id }}', task_id='task-id', dag=self.dag, ) ti = TaskInstance(op, DEFAULT_DATE) ti.render_templates() self.assertEqual(dag_id, getattr(op, 'filter')['job_names'][0]) self.assertEqual(dag_id, getattr(op, 'gcp_conn_id'))
def test_templates(self, _): dag_id = 'test_dag_id' args = {'start_date': DEFAULT_DATE} self.dag = DAG(dag_id, default_args=args) # pylint:disable=attribute-defined-outside-init op = GcpTransferServiceJobUpdateOperator( job_name='{{ dag.dag_id }}', body={'transferJob': {"name": "{{ dag.dag_id }}"}}, task_id='task-id', dag=self.dag, ) ti = TaskInstance(op, DEFAULT_DATE) ti.render_templates() self.assertEqual(dag_id, getattr(op, 'body')['transferJob']['name']) self.assertEqual(dag_id, getattr(op, 'job_name'))
def test_templates(self, _): dag_id = 'test_dag_id' configuration.load_test_config() args = {'start_date': DEFAULT_DATE} self.dag = DAG(dag_id, default_args=args) op = GcpTransferServiceOperationsListOperator( filter={"job_names": ['{{ dag.dag_id }}']}, gcp_conn_id='{{ dag.dag_id }}', task_id='task-id', dag=self.dag, ) ti = TaskInstance(op, DEFAULT_DATE) ti.render_templates() self.assertEqual(dag_id, getattr(op, 'filter')['job_names'][0]) self.assertEqual(dag_id, getattr(op, 'gcp_conn_id'))
def test_init_with_template_cluster_label(self): dag = DAG(DAG_ID, start_date=DEFAULT_DATE) task = QuboleOperator( task_id=TASK_ID, dag=dag, cluster_label='{{ params.cluster_label }}', params={ 'cluster_label': 'default' } ) ti = TaskInstance(task, DEFAULT_DATE) ti.render_templates() self.assertEqual(task.cluster_label, 'default')
def test_templates(self, _): dag_id = 'test_dag_id' configuration.load_test_config() args = {'start_date': DEFAULT_DATE} self.dag = DAG(dag_id, default_args=args) op = GcpTransferServiceJobUpdateOperator( job_name='{{ dag.dag_id }}', body={'transferJob': {"name": "{{ dag.dag_id }}"}}, task_id='task-id', dag=self.dag, ) ti = TaskInstance(op, DEFAULT_DATE) ti.render_templates() self.assertEqual(dag_id, getattr(op, 'body')['transferJob']['name']) self.assertEqual(dag_id, getattr(op, 'job_name'))
def test_templates(self, _): dag_id = 'test_dag_id' self.dag = DAG(dag_id, default_args={'start_date': DEFAULT_DATE}) op = GcpTransferServiceJobCreateOperator( body={"description": "{{ dag.dag_id }}"}, gcp_conn_id='{{ dag.dag_id }}', aws_conn_id='{{ dag.dag_id }}', task_id='task-id', dag=self.dag, ) ti = TaskInstance(op, DEFAULT_DATE) ti.render_templates() self.assertEqual(dag_id, getattr(op, 'body')[DESCRIPTION]) self.assertEqual(dag_id, getattr(op, 'gcp_conn_id')) self.assertEqual(dag_id, getattr(op, 'aws_conn_id'))
def test_templated_sensor_basic(self): with self.dag: sensor = ExternalTaskSensorBasic(task_id='templated_task', external_dag_id='dag_{{ ds }}', external_task_id='task_{{ ds }}', start_date=DEFAULT_DATE) instance = TaskInstance(sensor, DEFAULT_DATE) instance.render_templates() self.assertEqual(sensor.external_dag_id, "dag_{}".format(DEFAULT_DATE.date())) self.assertEqual(sensor.external_task_id, "task_{}".format(DEFAULT_DATE.date()))
def test_templates(self, _): dag_id = 'test_dag_id' configuration.load_test_config() args = {'start_date': DEFAULT_DATE} self.dag = DAG(dag_id, default_args=args) op = GcpTransferServiceJobUpdateOperator( job_name='{{ dag.dag_id }}', body={'transferJob': {"name": "{{ dag.dag_id }}"}}, task_id='task-id', dag=self.dag, ) ti = TaskInstance(op, DEFAULT_DATE) ti.render_templates() self.assertEqual(dag_id, getattr(op, 'body')['transferJob']['name']) self.assertEqual(dag_id, getattr(op, 'job_name'))
def test_operation_cancel_with_templates(self, _): dag_id = 'test_dag_id' args = {'start_date': DEFAULT_DATE} self.dag = DAG(dag_id, default_args=args) # pylint:disable=attribute-defined-outside-init op = GcpTransferServiceOperationCancelOperator( operation_name='{{ dag.dag_id }}', gcp_conn_id='{{ dag.dag_id }}', api_version='{{ dag.dag_id }}', task_id=TASK_ID, dag=self.dag, ) ti = TaskInstance(op, DEFAULT_DATE) ti.render_templates() self.assertEqual(dag_id, getattr(op, 'operation_name')) self.assertEqual(dag_id, getattr(op, 'gcp_conn_id')) self.assertEqual(dag_id, getattr(op, 'api_version'))
def test_render_template(self): # Given operator = SparkSubmitOperator(task_id='spark_submit_job', dag=self.dag, **self._config) ti = TaskInstance(operator, DEFAULT_DATE) # When ti.render_templates() # Then expected_application_args = [u'-f foo', u'--bar bar', u'--start %s' % (DEFAULT_DATE - datetime.timedelta(days=1)).strftime("%Y-%m-%d"), u'--end %s' % DEFAULT_DATE.strftime("%Y-%m-%d")] expected_name = "spark_submit_job" self.assertListEqual(sorted(expected_application_args), sorted(getattr(operator, '_application_args'))) self.assertEqual(expected_name, getattr(operator, '_name'))
def test_operation_resume_with_templates(self, _): dag_id = 'test_dag_id' args = {'start_date': DEFAULT_DATE} self.dag = DAG(dag_id, default_args=args) op = GcpTransferServiceOperationResumeOperator( operation_name='{{ dag.dag_id }}', gcp_conn_id='{{ dag.dag_id }}', api_version='{{ dag.dag_id }}', task_id=TASK_ID, dag=self.dag, ) ti = TaskInstance(op, DEFAULT_DATE) ti.render_templates() self.assertEqual(dag_id, getattr(op, 'operation_name')) self.assertEqual(dag_id, getattr(op, 'gcp_conn_id')) self.assertEqual(dag_id, getattr(op, 'api_version'))
def test_templates(self, _): dag_id = 'test_dag_id' # pylint: disable=attribute-defined-outside-init self.dag = DAG(dag_id, default_args={'start_date': DEFAULT_DATE}) op = CloudDataTransferServiceCreateJobOperator( body={"description": "{{ dag.dag_id }}"}, gcp_conn_id='{{ dag.dag_id }}', aws_conn_id='{{ dag.dag_id }}', task_id='task-id', dag=self.dag, ) ti = TaskInstance(op, DEFAULT_DATE) ti.render_templates() assert dag_id == getattr(op, 'body')[DESCRIPTION] assert dag_id == getattr(op, 'gcp_conn_id') assert dag_id == getattr(op, 'aws_conn_id')
def test_templates(self, _): dag_id = 'test_dag_id' configuration.load_test_config() self.dag = DAG(dag_id, default_args={'start_date': DEFAULT_DATE}) op = GcpTransferServiceJobCreateOperator( body={"description": "{{ dag.dag_id }}"}, gcp_conn_id='{{ dag.dag_id }}', aws_conn_id='{{ dag.dag_id }}', task_id='task-id', dag=self.dag, ) ti = TaskInstance(op, DEFAULT_DATE) ti.render_templates() self.assertEqual(dag_id, getattr(op, 'body')[DESCRIPTION]) self.assertEqual(dag_id, getattr(op, 'gcp_conn_id')) self.assertEqual(dag_id, getattr(op, 'aws_conn_id'))
def test_command_template(self): """Test whether templating works properly with r_command""" task = ROperator( task_id='test_cmd_template', r_command='cat("{{ ds }}")', dag=self.dag ) ti = TaskInstance(task=task, execution_date=DEFAULT_DATE) ti.render_templates() self.assertEqual( ti.task.r_command, 'cat("{}")'.format(DEFAULT_DATE.date().isoformat()) )
def render(args): args.execution_date = dateutil.parser.parse(args.execution_date) dagbag = DagBag(process_subdir(args.subdir)) if args.dag_id not in dagbag.dags: raise AirflowException('dag_id could not be found') dag = dagbag.dags[args.dag_id] task = dag.get_task(task_id=args.task_id) ti = TaskInstance(task, args.execution_date) ti.render_templates() for attr in task.__class__.template_fields: print(textwrap.dedent("""\ # ---------------------------------------------------------- # property: {} # ---------------------------------------------------------- {} """.format(attr, getattr(task, attr))))
def test_templated_sensor(self): dag = DAG(TEST_DAG_ID, self.args) with dag: sensor = ExternalTaskSensor(task_id='templated_task', external_dag_id='dag_{{ ds }}', external_task_id='task_{{ ds }}', start_date=DEFAULT_DATE) instance = TaskInstance(sensor, DEFAULT_DATE) instance.render_templates() self.assertEqual(sensor.external_dag_id, "dag_{}".format(DEFAULT_DATE.date())) self.assertEqual(sensor.external_task_id, "task_{}".format(DEFAULT_DATE.date()))
def render(args): args.execution_date = dateutil.parser.parse(args.execution_date) dagbag = DagBag(process_subdir(args.subdir)) if args.dag_id not in dagbag.dags: raise AirflowException('dag_id could not be found') dag = dagbag.dags[args.dag_id] task = dag.get_task(task_id=args.task_id) ti = TaskInstance(task, args.execution_date) ti.render_templates() for attr in task.__class__.template_fields: print(textwrap.dedent("""\ # ---------------------------------------------------------- # property: {} # ---------------------------------------------------------- {} """.format(attr, getattr(task, attr))))
def test_job_delete_with_templates(self, _): dag_id = 'test_dag_id' args = {'start_date': DEFAULT_DATE} self.dag = DAG(dag_id, default_args=args) # pylint: disable=attribute-defined-outside-init op = CloudDataTransferServiceDeleteJobOperator( job_name='{{ dag.dag_id }}', gcp_conn_id='{{ dag.dag_id }}', api_version='{{ dag.dag_id }}', task_id=TASK_ID, dag=self.dag, ) ti = TaskInstance(op, DEFAULT_DATE) ti.render_templates() assert dag_id == getattr(op, 'job_name') assert dag_id == getattr(op, 'gcp_conn_id') assert dag_id == getattr(op, 'api_version')
def test_operation_cancel_with_templates(self, _): dag_id = 'test_dag_id' configuration.load_test_config() args = {'start_date': DEFAULT_DATE} self.dag = DAG(dag_id, default_args=args) op = GcpTransferServiceOperationCancelOperator( operation_name='{{ dag.dag_id }}', gcp_conn_id='{{ dag.dag_id }}', api_version='{{ dag.dag_id }}', task_id=TASK_ID, dag=self.dag, ) ti = TaskInstance(op, DEFAULT_DATE) ti.render_templates() self.assertEqual(dag_id, getattr(op, 'operation_name')) self.assertEqual(dag_id, getattr(op, 'gcp_conn_id')) self.assertEqual(dag_id, getattr(op, 'api_version'))
def test_render_template(self): # Given operator = SparkSubmitOperator(task_id='spark_submit_job', dag=self.dag, **self._config) ti = TaskInstance(operator, DEFAULT_DATE) # When ti.render_templates() # Then expected_application_args = [u'-f', 'foo', u'--bar', 'bar', u'--start', (DEFAULT_DATE - timedelta(days=1)).strftime("%Y-%m-%d"), u'--end', DEFAULT_DATE.strftime("%Y-%m-%d"), u'--with-spaces', u'args should keep embdedded spaces', ] expected_name = "spark_submit_job" self.assertListEqual(expected_application_args, getattr(operator, '_application_args')) self.assertEqual(expected_name, getattr(operator, '_name'))
def test_render_template(self): ti = TaskInstance(self.operator, DEFAULT_DATE) ti.render_templates() expected_args = [{ 'Name': 'test_step', 'ActionOnFailure': 'CONTINUE', 'HadoopJarStep': { 'Jar': 'command-runner.jar', 'Args': [ '/usr/lib/spark/bin/run-example', (DEFAULT_DATE - timedelta(days=1)).strftime("%Y-%m-%d"), DEFAULT_DATE.strftime("%Y-%m-%d"), ] } }] self.assertListEqual(self.operator.steps, expected_args)
def test_templated_sensor(self): dag = DAG(TEST_DAG_ID, self.args) with dag: sensor = ExternalTaskSensor( task_id='templated_task', external_dag_id='dag_{{ ds }}', external_task_id='task_{{ ds }}', start_date=DEFAULT_DATE ) instance = TaskInstance(sensor, DEFAULT_DATE) instance.render_templates() self.assertEqual(sensor.external_dag_id, "dag_{}".format(DEFAULT_DATE.date())) self.assertEqual(sensor.external_task_id, "task_{}".format(DEFAULT_DATE.date()))
def test_render_template(self): json_str = ''' { "type": "{{ params.index_type }}", "datasource": "{{ params.datasource }}", "spec": { "dataSchema": { "granularitySpec": { "intervals": ["{{ ds }}/{{ macros.ds_add(ds, 1) }}"] } } } } ''' m = mock.mock_open(read_data=json_str) with mock.patch('airflow.contrib.operators.druid_operator.open', m, create=True) as m: operator = DruidOperator( task_id='spark_submit_job', json_index_file='index_spec.json', params={ 'index_type': 'index_hadoop', 'datasource': 'datasource_prd' }, dag=self.dag ) ti = TaskInstance(operator, DEFAULT_DATE) ti.render_templates() m.assert_called_once_with('index_spec.json') expected = '''{ "datasource": "datasource_prd", "spec": { "dataSchema": { "granularitySpec": { "intervals": [ "2017-01-01/2017-01-02" ] } } }, "type": "index_hadoop" }''' self.assertEqual(expected, getattr(operator, 'index_spec_str'))
def test_templates(self, _): dag_id = 'test_dag_id' configuration.load_test_config() args = {'start_date': DEFAULT_DATE} self.dag = DAG(dag_id, default_args=args) op = GoogleCloudStorageToGoogleCloudStorageTransferOperator( source_bucket='{{ dag.dag_id }}', destination_bucket='{{ dag.dag_id }}', description='{{ dag.dag_id }}', object_conditions={'exclude_prefixes': ['{{ dag.dag_id }}']}, gcp_conn_id='{{ dag.dag_id }}', task_id=TASK_ID, dag=self.dag, ) ti = TaskInstance(op, DEFAULT_DATE) ti.render_templates() self.assertEqual(dag_id, getattr(op, 'source_bucket')) self.assertEqual(dag_id, getattr(op, 'destination_bucket')) self.assertEqual(dag_id, getattr(op, 'description')) self.assertEqual(dag_id, getattr(op, 'object_conditions')['exclude_prefixes'][0]) self.assertEqual(dag_id, getattr(op, 'gcp_conn_id'))
def test_render_template(self): ti = TaskInstance(self.operator, DEFAULT_DATE) ti.render_templates() expected_args = { 'Name': 'test_job_flow', 'ReleaseLabel': '5.11.0', 'Steps': [{ 'Name': 'test_step', 'ActionOnFailure': 'CONTINUE', 'HadoopJarStep': { 'Jar': 'command-runner.jar', 'Args': [ '/usr/lib/spark/bin/run-example', (DEFAULT_DATE - timedelta(days=1)).strftime("%Y-%m-%d"), DEFAULT_DATE.strftime("%Y-%m-%d"), ] } }] } self.assertDictEqual(self.operator.job_flow_overrides, expected_args)
def test_instance_start_with_templates(self, _): dag_id = 'test_dag_id' configuration.load_test_config() args = { 'start_date': DEFAULT_DATE } self.dag = DAG(dag_id, default_args=args) op = GceInstanceStartOperator( project_id='{{ dag.dag_id }}', zone='{{ dag.dag_id }}', resource_id='{{ dag.dag_id }}', gcp_conn_id='{{ dag.dag_id }}', api_version='{{ dag.dag_id }}', task_id='id', dag=self.dag ) ti = TaskInstance(op, DEFAULT_DATE) ti.render_templates() self.assertEqual(dag_id, getattr(op, 'project_id')) self.assertEqual(dag_id, getattr(op, 'zone')) self.assertEqual(dag_id, getattr(op, 'resource_id')) self.assertEqual(dag_id, getattr(op, 'gcp_conn_id')) self.assertEqual(dag_id, getattr(op, 'api_version'))
def test_bigquery_operator_defaults(self, mock_hook): operator = BigQueryOperator( task_id=TASK_ID, sql='Select * from test_table', dag=self.dag, default_args=self.args ) operator.execute(None) mock_hook.return_value \ .get_conn() \ .cursor() \ .run_query \ .assert_called_once_with( sql='Select * from test_table', destination_dataset_table=None, write_disposition='WRITE_EMPTY', allow_large_results=False, flatten_results=None, udf_config=None, maximum_billing_tier=None, maximum_bytes_billed=None, create_disposition='CREATE_IF_NEEDED', schema_update_options=(), query_params=None, labels=None, priority='INTERACTIVE', time_partitioning=None, api_resource_configs=None, cluster_fields=None, ) self.assertTrue(isinstance(operator.sql, six.string_types)) ti = TaskInstance(task=operator, execution_date=DEFAULT_DATE) ti.render_templates() self.assertTrue(isinstance(ti.task.sql, six.string_types))