class DataFlowPythonOperatorTest(unittest.TestCase): def setUp(self): self.dataflow = DataFlowPythonOperator( task_id=TASK_ID, py_file=PY_FILE, py_options=PY_OPTIONS, dataflow_default_options=DEFAULT_OPTIONS, options=ADDITIONAL_OPTIONS) def test_init(self): """Test DataFlowPythonOperator instance is properly initialized.""" self.assertEqual(self.dataflow.task_id, TASK_ID) self.assertEqual(self.dataflow.py_file, PY_FILE) self.assertEqual(self.dataflow.py_options, PY_OPTIONS) self.assertEqual(self.dataflow.dataflow_default_options, DEFAULT_OPTIONS) self.assertEqual(self.dataflow.options, ADDITIONAL_OPTIONS) @mock.patch('airflow.contrib.operators.dataflow_operator.DataFlowHook') def test_exec(self, dataflow_mock): """Test DataFlowHook is created and the right args are passed to start_python_workflow. """ start_python_hook = dataflow_mock.return_value.start_python_dataflow self.dataflow.execute(None) assert dataflow_mock.called expected_options = { 'project': 'test', 'staging_location': 'gs://test/staging', 'output': 'gs://test/output' } start_python_hook.assert_called_once_with(TASK_ID, expected_options, PY_FILE, PY_OPTIONS)
class DataFlowPythonOperatorTest(unittest.TestCase): def setUp(self): self.dataflow = DataFlowPythonOperator( task_id=TASK_ID, py_file=PY_FILE, job_name=JOB_NAME, py_options=PY_OPTIONS, dataflow_default_options=DEFAULT_OPTIONS_PYTHON, options=ADDITIONAL_OPTIONS, poll_sleep=POLL_SLEEP) def test_init(self): """Test DataFlowPythonOperator instance is properly initialized.""" self.assertEqual(self.dataflow.task_id, TASK_ID) self.assertEqual(self.dataflow.job_name, JOB_NAME) self.assertEqual(self.dataflow.py_file, PY_FILE) self.assertEqual(self.dataflow.py_options, PY_OPTIONS) self.assertEqual(self.dataflow.poll_sleep, POLL_SLEEP) self.assertEqual(self.dataflow.dataflow_default_options, DEFAULT_OPTIONS_PYTHON) self.assertEqual(self.dataflow.options, EXPECTED_ADDITIONAL_OPTIONS) @mock.patch('airflow.contrib.operators.dataflow_operator.DataFlowHook') @mock.patch(GCS_HOOK_STRING.format('GoogleCloudBucketHelper')) def test_exec(self, gcs_hook, dataflow_mock): """Test DataFlowHook is created and the right args are passed to start_python_workflow. """ start_python_hook = dataflow_mock.return_value.start_python_dataflow gcs_download_hook = gcs_hook.return_value.google_cloud_to_local self.dataflow.execute(None) self.assertTrue(dataflow_mock.called) expected_options = { 'project': 'test', 'staging_location': 'gs://test/staging', 'output': 'gs://test/output', 'labels': { 'foo': 'bar', 'airflow-version': TEST_VERSION } } gcs_download_hook.assert_called_once_with(PY_FILE) start_python_hook.assert_called_once_with(JOB_NAME, expected_options, mock.ANY, PY_OPTIONS) self.assertTrue(self.dataflow.py_file.startswith('/tmp/dataflow'))
class DataFlowPythonOperatorTest(unittest.TestCase): def setUp(self): self.dataflow = DataFlowPythonOperator( task_id=TASK_ID, py_file=PY_FILE, job_name=JOB_NAME, py_options=PY_OPTIONS, dataflow_default_options=DEFAULT_OPTIONS_PYTHON, options=ADDITIONAL_OPTIONS, poll_sleep=POLL_SLEEP) def test_init(self): """Test DataFlowPythonOperator instance is properly initialized.""" self.assertEqual(self.dataflow.task_id, TASK_ID) self.assertEqual(self.dataflow.job_name, JOB_NAME) self.assertEqual(self.dataflow.py_file, PY_FILE) self.assertEqual(self.dataflow.py_options, PY_OPTIONS) self.assertEqual(self.dataflow.poll_sleep, POLL_SLEEP) self.assertEqual(self.dataflow.dataflow_default_options, DEFAULT_OPTIONS_PYTHON) self.assertEqual(self.dataflow.options, EXPECTED_ADDITIONAL_OPTIONS) @mock.patch('airflow.contrib.operators.dataflow_operator.DataFlowHook') @mock.patch(GCS_HOOK_STRING.format('GoogleCloudBucketHelper')) def test_exec(self, gcs_hook, dataflow_mock): """Test DataFlowHook is created and the right args are passed to start_python_workflow. """ start_python_hook = dataflow_mock.return_value.start_python_dataflow gcs_download_hook = gcs_hook.return_value.google_cloud_to_local self.dataflow.execute(None) self.assertTrue(dataflow_mock.called) expected_options = { 'project': 'test', 'staging_location': 'gs://test/staging', 'output': 'gs://test/output', 'labels': {'foo': 'bar', 'airflow-version': TEST_VERSION} } gcs_download_hook.assert_called_once_with(PY_FILE) start_python_hook.assert_called_once_with(JOB_NAME, expected_options, mock.ANY, PY_OPTIONS) self.assertTrue(self.dataflow.py_file.startswith('/tmp/dataflow'))
class DataFlowPythonOperatorTest(unittest.TestCase): def setUp(self): self.dataflow = DataFlowPythonOperator( task_id=TASK_ID, py_file=PY_FILE, py_options=PY_OPTIONS, dataflow_default_options=DEFAULT_OPTIONS, options=ADDITIONAL_OPTIONS) def test_init(self): """Test DataFlowPythonOperator instance is properly initialized.""" self.assertEqual(self.dataflow.task_id, TASK_ID) self.assertEqual(self.dataflow.py_file, PY_FILE) self.assertEqual(self.dataflow.py_options, PY_OPTIONS) self.assertEqual(self.dataflow.dataflow_default_options, DEFAULT_OPTIONS) self.assertEqual(self.dataflow.options, ADDITIONAL_OPTIONS) @mock.patch('airflow.contrib.operators.dataflow_operator.DataFlowHook') @mock.patch(GCS_HOOK_STRING.format('GoogleCloudStorageHook')) def test_exec(self, gcs_hook, dataflow_mock): """Test DataFlowHook is created and the right args are passed to start_python_workflow. """ start_python_hook = dataflow_mock.return_value.start_python_dataflow gcs_download_hook = gcs_hook.return_value.download self.dataflow.execute(None) self.assertTrue(dataflow_mock.called) expected_options = { 'project': 'test', 'staging_location': 'gs://test/staging', 'output': 'gs://test/output' } gcs_download_hook.assert_called_once_with('my-bucket', 'my-object.py', mock.ANY) start_python_hook.assert_called_once_with(TASK_ID, expected_options, mock.ANY, PY_OPTIONS) self.assertTrue(self.dataflow.py_file.startswith('/tmp/dataflow'))