Esempio n. 1
0
class TestDataflowHook(unittest.TestCase):
    def setUp(self):
        with mock.patch(BASE_STRING.format('CloudBaseHook.__init__'),
                        new=mock_init):
            self.dataflow_hook = DataflowHook(gcp_conn_id='test')

    @mock.patch(
        "airflow.providers.google.cloud.hooks.dataflow.DataflowHook._authorize"
    )
    @mock.patch("airflow.providers.google.cloud.hooks.dataflow.build")
    def test_dataflow_client_creation(self, mock_build, mock_authorize):
        result = self.dataflow_hook.get_conn()
        mock_build.assert_called_once_with('dataflow',
                                           'v1b3',
                                           http=mock_authorize.return_value,
                                           cache_discovery=False)
        self.assertEqual(mock_build.return_value, result)

    @mock.patch(DATAFLOW_STRING.format('uuid.uuid4'))
    @mock.patch(DATAFLOW_STRING.format('_DataflowJobsController'))
    @mock.patch(DATAFLOW_STRING.format('_DataflowRunner'))
    @mock.patch(DATAFLOW_STRING.format('DataflowHook.get_conn'))
    def test_start_python_dataflow(self, mock_conn, mock_dataflow,
                                   mock_dataflowjob, mock_uuid):
        mock_uuid.return_value = MOCK_UUID
        mock_conn.return_value = None
        dataflow_instance = mock_dataflow.return_value
        dataflow_instance.wait_for_done.return_value = None
        dataflowjob_instance = mock_dataflowjob.return_value
        dataflowjob_instance.wait_for_done.return_value = None
        self.dataflow_hook.start_python_dataflow(job_name=JOB_NAME,
                                                 variables=DATAFLOW_OPTIONS_PY,
                                                 dataflow=PY_FILE,
                                                 py_options=PY_OPTIONS)
        expected_cmd = [
            "python3", '-m', PY_FILE, '--region=us-central1',
            '--runner=DataflowRunner', '--project=test', '--labels=foo=bar',
            '--staging_location=gs://test/staging',
            '--job_name={}-{}'.format(JOB_NAME, MOCK_UUID)
        ]
        self.assertListEqual(sorted(mock_dataflow.call_args[1]["cmd"]),
                             sorted(expected_cmd))

    @parameterized.expand([('default_to_python3', 'python3'),
                           ('major_version_2', 'python2'),
                           ('major_version_3', 'python3'),
                           ('minor_version', 'python3.6')])
    @mock.patch(DATAFLOW_STRING.format('uuid.uuid4'))
    @mock.patch(DATAFLOW_STRING.format('_DataflowJobsController'))
    @mock.patch(DATAFLOW_STRING.format('_DataflowRunner'))
    @mock.patch(DATAFLOW_STRING.format('DataflowHook.get_conn'))
    def test_start_python_dataflow_with_custom_interpreter(
            self, name, py_interpreter, mock_conn, mock_dataflow,
            mock_dataflowjob, mock_uuid):
        del name  # unused variable
        mock_uuid.return_value = MOCK_UUID
        mock_conn.return_value = None
        dataflow_instance = mock_dataflow.return_value
        dataflow_instance.wait_for_done.return_value = None
        dataflowjob_instance = mock_dataflowjob.return_value
        dataflowjob_instance.wait_for_done.return_value = None
        self.dataflow_hook.start_python_dataflow(job_name=JOB_NAME,
                                                 variables=DATAFLOW_OPTIONS_PY,
                                                 dataflow=PY_FILE,
                                                 py_options=PY_OPTIONS,
                                                 py_interpreter=py_interpreter)
        expected_cmd = [
            py_interpreter, '-m', PY_FILE, '--region=us-central1',
            '--runner=DataflowRunner', '--project=test', '--labels=foo=bar',
            '--staging_location=gs://test/staging',
            '--job_name={}-{}'.format(JOB_NAME, MOCK_UUID)
        ]
        self.assertListEqual(sorted(mock_dataflow.call_args[1]["cmd"]),
                             sorted(expected_cmd))

    @mock.patch(DATAFLOW_STRING.format('uuid.uuid4'))
    @mock.patch(DATAFLOW_STRING.format('_DataflowJobsController'))
    @mock.patch(DATAFLOW_STRING.format('_DataflowRunner'))
    @mock.patch(DATAFLOW_STRING.format('DataflowHook.get_conn'))
    def test_start_java_dataflow(self, mock_conn, mock_dataflow,
                                 mock_dataflowjob, mock_uuid):
        mock_uuid.return_value = MOCK_UUID
        mock_conn.return_value = None
        dataflow_instance = mock_dataflow.return_value
        dataflow_instance.wait_for_done.return_value = None
        dataflowjob_instance = mock_dataflowjob.return_value
        dataflowjob_instance.wait_for_done.return_value = None
        self.dataflow_hook.start_java_dataflow(job_name=JOB_NAME,
                                               variables=DATAFLOW_OPTIONS_JAVA,
                                               jar=JAR_FILE)
        expected_cmd = [
            'java', '-jar', JAR_FILE, '--region=us-central1',
            '--runner=DataflowRunner', '--project=test',
            '--stagingLocation=gs://test/staging', '--labels={"foo":"bar"}',
            '--jobName={}-{}'.format(JOB_NAME, MOCK_UUID)
        ]
        self.assertListEqual(sorted(mock_dataflow.call_args[1]["cmd"]),
                             sorted(expected_cmd))

    @mock.patch(DATAFLOW_STRING.format('uuid.uuid4'))
    @mock.patch(DATAFLOW_STRING.format('_DataflowJobsController'))
    @mock.patch(DATAFLOW_STRING.format('_DataflowRunner'))
    @mock.patch(DATAFLOW_STRING.format('DataflowHook.get_conn'))
    def test_start_java_dataflow_with_job_class(self, mock_conn, mock_dataflow,
                                                mock_dataflowjob, mock_uuid):
        mock_uuid.return_value = MOCK_UUID
        mock_conn.return_value = None
        dataflow_instance = mock_dataflow.return_value
        dataflow_instance.wait_for_done.return_value = None
        dataflowjob_instance = mock_dataflowjob.return_value
        dataflowjob_instance.wait_for_done.return_value = None
        self.dataflow_hook.start_java_dataflow(job_name=JOB_NAME,
                                               variables=DATAFLOW_OPTIONS_JAVA,
                                               jar=JAR_FILE,
                                               job_class=JOB_CLASS)
        expected_cmd = [
            'java', '-cp', JAR_FILE, JOB_CLASS, '--region=us-central1',
            '--runner=DataflowRunner', '--project=test',
            '--stagingLocation=gs://test/staging', '--labels={"foo":"bar"}',
            '--jobName={}-{}'.format(JOB_NAME, MOCK_UUID)
        ]
        self.assertListEqual(sorted(mock_dataflow.call_args[1]["cmd"]),
                             sorted(expected_cmd))

    @parameterized.expand([
        (JOB_NAME, JOB_NAME, False),
        ('test-example', 'test_example', False),
        ('test-dataflow-pipeline-12345678', JOB_NAME, True),
        ('test-example-12345678', 'test_example', True),
        ('df-job-1', 'df-job-1', False),
        ('df-job', 'df-job', False),
        ('dfjob', 'dfjob', False),
        ('dfjob1', 'dfjob1', False),
    ])
    @mock.patch(DATAFLOW_STRING.format('uuid.uuid4'), return_value=MOCK_UUID)
    def test_valid_dataflow_job_name(self, expected_result, job_name,
                                     append_job_name, mock_uuid4):
        job_name = self.dataflow_hook._build_dataflow_job_name(
            job_name=job_name, append_job_name=append_job_name)

        self.assertEqual(expected_result, job_name)

    @parameterized.expand([("1dfjob@", ), ("dfjob@", ), ("df^jo", )])
    def test_build_dataflow_job_name_with_invalid_value(self, job_name):
        self.assertRaises(ValueError,
                          self.dataflow_hook._build_dataflow_job_name,
                          job_name=job_name,
                          append_job_name=False)
Esempio n. 2
0
class TestDataflowHook(unittest.TestCase):
    def setUp(self):
        with mock.patch(BASE_STRING.format('GoogleBaseHook.__init__'),
                        new=mock_init):
            self.dataflow_hook = DataflowHook(gcp_conn_id='test')

    @mock.patch(
        "airflow.providers.google.cloud.hooks.dataflow.DataflowHook._authorize"
    )
    @mock.patch("airflow.providers.google.cloud.hooks.dataflow.build")
    def test_dataflow_client_creation(self, mock_build, mock_authorize):
        result = self.dataflow_hook.get_conn()
        mock_build.assert_called_once_with('dataflow',
                                           'v1b3',
                                           http=mock_authorize.return_value,
                                           cache_discovery=False)
        self.assertEqual(mock_build.return_value, result)

    @mock.patch(DATAFLOW_STRING.format('uuid.uuid4'))
    @mock.patch(DATAFLOW_STRING.format('_DataflowJobsController'))
    @mock.patch(DATAFLOW_STRING.format('_DataflowRunner'))
    @mock.patch(DATAFLOW_STRING.format('DataflowHook.get_conn'))
    def test_start_python_dataflow(self, mock_conn, mock_dataflow,
                                   mock_dataflowjob, mock_uuid):
        mock_uuid.return_value = MOCK_UUID
        mock_conn.return_value = None
        dataflow_instance = mock_dataflow.return_value
        dataflow_instance.wait_for_done.return_value = None
        dataflowjob_instance = mock_dataflowjob.return_value
        dataflowjob_instance.wait_for_done.return_value = None
        self.dataflow_hook.start_python_dataflow(  # pylint: disable=no-value-for-parameter
            job_name=JOB_NAME,
            variables=DATAFLOW_VARIABLES_PY,
            dataflow=PY_FILE,
            py_options=PY_OPTIONS,
        )
        expected_cmd = [
            "python3",
            '-m',
            PY_FILE,
            '--region=us-central1',
            '--runner=DataflowRunner',
            '--project=test',
            '--labels=foo=bar',
            '--staging_location=gs://test/staging',
            '--job_name={}-{}'.format(JOB_NAME, MOCK_UUID),
        ]
        self.assertListEqual(sorted(mock_dataflow.call_args[1]["cmd"]),
                             sorted(expected_cmd))

    @mock.patch(DATAFLOW_STRING.format('uuid.uuid4'))
    @mock.patch(DATAFLOW_STRING.format('_DataflowJobsController'))
    @mock.patch(DATAFLOW_STRING.format('_DataflowRunner'))
    @mock.patch(DATAFLOW_STRING.format('DataflowHook.get_conn'))
    def test_start_python_dataflow_with_custom_region_as_variable(
            self, mock_conn, mock_dataflow, mock_dataflowjob, mock_uuid):
        mock_uuid.return_value = MOCK_UUID
        mock_conn.return_value = None
        dataflow_instance = mock_dataflow.return_value
        dataflow_instance.wait_for_done.return_value = None
        dataflowjob_instance = mock_dataflowjob.return_value
        dataflowjob_instance.wait_for_done.return_value = None
        variables = copy.deepcopy(DATAFLOW_VARIABLES_PY)
        variables['region'] = TEST_LOCATION
        self.dataflow_hook.start_python_dataflow(  # pylint: disable=no-value-for-parameter
            job_name=JOB_NAME,
            variables=variables,
            dataflow=PY_FILE,
            py_options=PY_OPTIONS,
        )
        expected_cmd = [
            "python3",
            '-m',
            PY_FILE,
            f'--region={TEST_LOCATION}',
            '--runner=DataflowRunner',
            '--project=test',
            '--labels=foo=bar',
            '--staging_location=gs://test/staging',
            '--job_name={}-{}'.format(JOB_NAME, MOCK_UUID),
        ]
        self.assertListEqual(sorted(mock_dataflow.call_args[1]["cmd"]),
                             sorted(expected_cmd))

    @mock.patch(DATAFLOW_STRING.format('uuid.uuid4'))
    @mock.patch(DATAFLOW_STRING.format('_DataflowJobsController'))
    @mock.patch(DATAFLOW_STRING.format('_DataflowRunner'))
    @mock.patch(DATAFLOW_STRING.format('DataflowHook.get_conn'))
    def test_start_python_dataflow_with_custom_region_as_paramater(
            self, mock_conn, mock_dataflow, mock_dataflowjob, mock_uuid):
        mock_uuid.return_value = MOCK_UUID
        mock_conn.return_value = None
        dataflow_instance = mock_dataflow.return_value
        dataflow_instance.wait_for_done.return_value = None
        dataflowjob_instance = mock_dataflowjob.return_value
        dataflowjob_instance.wait_for_done.return_value = None
        self.dataflow_hook.start_python_dataflow(  # pylint: disable=no-value-for-parameter
            job_name=JOB_NAME,
            variables=DATAFLOW_VARIABLES_PY,
            dataflow=PY_FILE,
            py_options=PY_OPTIONS,
            location=TEST_LOCATION,
        )
        expected_cmd = [
            "python3",
            '-m',
            PY_FILE,
            f'--region={TEST_LOCATION}',
            '--runner=DataflowRunner',
            '--project=test',
            '--labels=foo=bar',
            '--staging_location=gs://test/staging',
            '--job_name={}-{}'.format(JOB_NAME, MOCK_UUID),
        ]
        self.assertListEqual(sorted(mock_dataflow.call_args[1]["cmd"]),
                             sorted(expected_cmd))

    @mock.patch(DATAFLOW_STRING.format('uuid.uuid4'))
    @mock.patch(DATAFLOW_STRING.format('_DataflowJobsController'))
    @mock.patch(DATAFLOW_STRING.format('_DataflowRunner'))
    @mock.patch(DATAFLOW_STRING.format('DataflowHook.get_conn'))
    def test_start_python_dataflow_with_multiple_extra_packages(
            self, mock_conn, mock_dataflow, mock_dataflowjob, mock_uuid):
        mock_uuid.return_value = MOCK_UUID
        mock_conn.return_value = None
        dataflow_instance = mock_dataflow.return_value
        dataflow_instance.wait_for_done.return_value = None
        dataflowjob_instance = mock_dataflowjob.return_value
        dataflowjob_instance.wait_for_done.return_value = None
        variables: Dict[str, Any] = copy.deepcopy(DATAFLOW_VARIABLES_PY)
        variables['extra-package'] = ['a.whl', 'b.whl']

        self.dataflow_hook.start_python_dataflow(  # pylint: disable=no-value-for-parameter
            job_name=JOB_NAME,
            variables=variables,
            dataflow=PY_FILE,
            py_options=PY_OPTIONS,
        )
        expected_cmd = [
            "python3",
            '-m',
            PY_FILE,
            '--extra-package=a.whl',
            '--extra-package=b.whl',
            '--region=us-central1',
            '--runner=DataflowRunner',
            '--project=test',
            '--labels=foo=bar',
            '--staging_location=gs://test/staging',
            '--job_name={}-{}'.format(JOB_NAME, MOCK_UUID),
        ]
        self.assertListEqual(sorted(mock_dataflow.call_args[1]["cmd"]),
                             sorted(expected_cmd))

    @parameterized.expand([
        ('default_to_python3', 'python3'),
        ('major_version_2', 'python2'),
        ('major_version_3', 'python3'),
        ('minor_version', 'python3.6'),
    ])
    @mock.patch(DATAFLOW_STRING.format('uuid.uuid4'))
    @mock.patch(DATAFLOW_STRING.format('_DataflowJobsController'))
    @mock.patch(DATAFLOW_STRING.format('_DataflowRunner'))
    @mock.patch(DATAFLOW_STRING.format('DataflowHook.get_conn'))
    def test_start_python_dataflow_with_custom_interpreter(
        self,
        name,
        py_interpreter,
        mock_conn,
        mock_dataflow,
        mock_dataflowjob,
        mock_uuid,
    ):
        del name  # unused variable
        mock_uuid.return_value = MOCK_UUID
        mock_conn.return_value = None
        dataflow_instance = mock_dataflow.return_value
        dataflow_instance.wait_for_done.return_value = None
        dataflowjob_instance = mock_dataflowjob.return_value
        dataflowjob_instance.wait_for_done.return_value = None
        self.dataflow_hook.start_python_dataflow(  # pylint: disable=no-value-for-parameter
            job_name=JOB_NAME,
            variables=DATAFLOW_VARIABLES_PY,
            dataflow=PY_FILE,
            py_options=PY_OPTIONS,
            py_interpreter=py_interpreter,
        )
        expected_cmd = [
            py_interpreter,
            '-m',
            PY_FILE,
            '--region=us-central1',
            '--runner=DataflowRunner',
            '--project=test',
            '--labels=foo=bar',
            '--staging_location=gs://test/staging',
            '--job_name={}-{}'.format(JOB_NAME, MOCK_UUID),
        ]
        self.assertListEqual(sorted(mock_dataflow.call_args[1]["cmd"]),
                             sorted(expected_cmd))

    @parameterized.expand([
        (['foo-bar'], False),
        (['foo-bar'], True),
        ([], True),
    ])
    @mock.patch(DATAFLOW_STRING.format('prepare_virtualenv'))
    @mock.patch(DATAFLOW_STRING.format('uuid.uuid4'))
    @mock.patch(DATAFLOW_STRING.format('_DataflowJobsController'))
    @mock.patch(DATAFLOW_STRING.format('_DataflowRunner'))
    @mock.patch(DATAFLOW_STRING.format('DataflowHook.get_conn'))
    def test_start_python_dataflow_with_non_empty_py_requirements_and_without_system_packages(
        self,
        current_py_requirements,
        current_py_system_site_packages,
        mock_conn,
        mock_dataflow,
        mock_dataflowjob,
        mock_uuid,
        mock_virtualenv,
    ):
        mock_uuid.return_value = MOCK_UUID
        mock_conn.return_value = None
        dataflow_instance = mock_dataflow.return_value
        dataflow_instance.wait_for_done.return_value = None
        dataflowjob_instance = mock_dataflowjob.return_value
        dataflowjob_instance.wait_for_done.return_value = None
        mock_virtualenv.return_value = '/dummy_dir/bin/python'
        self.dataflow_hook.start_python_dataflow(  # pylint: disable=no-value-for-parameter
            job_name=JOB_NAME,
            variables=DATAFLOW_VARIABLES_PY,
            dataflow=PY_FILE,
            py_options=PY_OPTIONS,
            py_requirements=current_py_requirements,
            py_system_site_packages=current_py_system_site_packages,
        )
        expected_cmd = [
            '/dummy_dir/bin/python',
            '-m',
            PY_FILE,
            '--region=us-central1',
            '--runner=DataflowRunner',
            '--project=test',
            '--labels=foo=bar',
            '--staging_location=gs://test/staging',
            '--job_name={}-{}'.format(JOB_NAME, MOCK_UUID),
        ]
        self.assertListEqual(sorted(mock_dataflow.call_args[1]["cmd"]),
                             sorted(expected_cmd))

    @mock.patch(DATAFLOW_STRING.format('uuid.uuid4'))
    @mock.patch(DATAFLOW_STRING.format('_DataflowJobsController'))
    @mock.patch(DATAFLOW_STRING.format('_DataflowRunner'))
    @mock.patch(DATAFLOW_STRING.format('DataflowHook.get_conn'))
    def test_start_python_dataflow_with_empty_py_requirements_and_without_system_packages(
            self, mock_conn, mock_dataflow, mock_dataflowjob, mock_uuid):
        mock_uuid.return_value = MOCK_UUID
        mock_conn.return_value = None
        dataflow_instance = mock_dataflow.return_value
        dataflow_instance.wait_for_done.return_value = None
        dataflowjob_instance = mock_dataflowjob.return_value
        dataflowjob_instance.wait_for_done.return_value = None
        with self.assertRaisesRegex(AirflowException,
                                    "Invalid method invocation."):
            self.dataflow_hook.start_python_dataflow(  # pylint: disable=no-value-for-parameter
                job_name=JOB_NAME,
                variables=DATAFLOW_VARIABLES_PY,
                dataflow=PY_FILE,
                py_options=PY_OPTIONS,
                py_requirements=[],
            )

    @mock.patch(DATAFLOW_STRING.format('uuid.uuid4'))
    @mock.patch(DATAFLOW_STRING.format('_DataflowJobsController'))
    @mock.patch(DATAFLOW_STRING.format('_DataflowRunner'))
    @mock.patch(DATAFLOW_STRING.format('DataflowHook.get_conn'))
    def test_start_java_dataflow(self, mock_conn, mock_dataflow,
                                 mock_dataflowjob, mock_uuid):
        mock_uuid.return_value = MOCK_UUID
        mock_conn.return_value = None
        dataflow_instance = mock_dataflow.return_value
        dataflow_instance.wait_for_done.return_value = None
        dataflowjob_instance = mock_dataflowjob.return_value
        dataflowjob_instance.wait_for_done.return_value = None
        self.dataflow_hook.start_java_dataflow(  # pylint: disable=no-value-for-parameter
            job_name=JOB_NAME,
            variables=DATAFLOW_VARIABLES_JAVA,
            jar=JAR_FILE)
        expected_cmd = [
            'java',
            '-jar',
            JAR_FILE,
            '--region=us-central1',
            '--runner=DataflowRunner',
            '--project=test',
            '--stagingLocation=gs://test/staging',
            '--labels={"foo":"bar"}',
            '--jobName={}-{}'.format(JOB_NAME, MOCK_UUID),
        ]
        self.assertListEqual(
            sorted(expected_cmd),
            sorted(mock_dataflow.call_args[1]["cmd"]),
        )

    @mock.patch(DATAFLOW_STRING.format('uuid.uuid4'))
    @mock.patch(DATAFLOW_STRING.format('_DataflowJobsController'))
    @mock.patch(DATAFLOW_STRING.format('_DataflowRunner'))
    @mock.patch(DATAFLOW_STRING.format('DataflowHook.get_conn'))
    def test_start_java_dataflow_with_multiple_values_in_variables(
            self, mock_conn, mock_dataflow, mock_dataflowjob, mock_uuid):
        mock_uuid.return_value = MOCK_UUID
        mock_conn.return_value = None
        dataflow_instance = mock_dataflow.return_value
        dataflow_instance.wait_for_done.return_value = None
        dataflowjob_instance = mock_dataflowjob.return_value
        dataflowjob_instance.wait_for_done.return_value = None
        variables: Dict[str, Any] = copy.deepcopy(DATAFLOW_VARIABLES_JAVA)
        variables['mock-option'] = ['a.whl', 'b.whl']

        self.dataflow_hook.start_java_dataflow(  # pylint: disable=no-value-for-parameter
            job_name=JOB_NAME,
            variables=variables,
            jar=JAR_FILE)
        expected_cmd = [
            'java',
            '-jar',
            JAR_FILE,
            '--mock-option=a.whl',
            '--mock-option=b.whl',
            '--region=us-central1',
            '--runner=DataflowRunner',
            '--project=test',
            '--stagingLocation=gs://test/staging',
            '--labels={"foo":"bar"}',
            '--jobName={}-{}'.format(JOB_NAME, MOCK_UUID),
        ]
        self.assertListEqual(sorted(mock_dataflow.call_args[1]["cmd"]),
                             sorted(expected_cmd))

    @mock.patch(DATAFLOW_STRING.format('uuid.uuid4'))
    @mock.patch(DATAFLOW_STRING.format('_DataflowJobsController'))
    @mock.patch(DATAFLOW_STRING.format('_DataflowRunner'))
    @mock.patch(DATAFLOW_STRING.format('DataflowHook.get_conn'))
    def test_start_java_dataflow_with_custom_region_as_variable(
            self, mock_conn, mock_dataflow, mock_dataflowjob, mock_uuid):
        mock_uuid.return_value = MOCK_UUID
        mock_conn.return_value = None
        dataflow_instance = mock_dataflow.return_value
        dataflow_instance.wait_for_done.return_value = None
        dataflowjob_instance = mock_dataflowjob.return_value
        dataflowjob_instance.wait_for_done.return_value = None

        variables = copy.deepcopy(DATAFLOW_VARIABLES_JAVA)
        variables['region'] = TEST_LOCATION

        self.dataflow_hook.start_java_dataflow(  # pylint: disable=no-value-for-parameter
            job_name=JOB_NAME,
            variables=variables,
            jar=JAR_FILE)
        expected_cmd = [
            'java',
            '-jar',
            JAR_FILE,
            f'--region={TEST_LOCATION}',
            '--runner=DataflowRunner',
            '--project=test',
            '--stagingLocation=gs://test/staging',
            '--labels={"foo":"bar"}',
            '--jobName={}-{}'.format(JOB_NAME, MOCK_UUID),
        ]
        self.assertListEqual(
            sorted(expected_cmd),
            sorted(mock_dataflow.call_args[1]["cmd"]),
        )

    @mock.patch(DATAFLOW_STRING.format('uuid.uuid4'))
    @mock.patch(DATAFLOW_STRING.format('_DataflowJobsController'))
    @mock.patch(DATAFLOW_STRING.format('_DataflowRunner'))
    @mock.patch(DATAFLOW_STRING.format('DataflowHook.get_conn'))
    def test_start_java_dataflow_with_custom_region_as_parameter(
            self, mock_conn, mock_dataflow, mock_dataflowjob, mock_uuid):
        mock_uuid.return_value = MOCK_UUID
        mock_conn.return_value = None
        dataflow_instance = mock_dataflow.return_value
        dataflow_instance.wait_for_done.return_value = None
        dataflowjob_instance = mock_dataflowjob.return_value
        dataflowjob_instance.wait_for_done.return_value = None

        variables = copy.deepcopy(DATAFLOW_VARIABLES_JAVA)
        variables['region'] = TEST_LOCATION

        self.dataflow_hook.start_java_dataflow(  # pylint: disable=no-value-for-parameter
            job_name=JOB_NAME,
            variables=variables,
            jar=JAR_FILE)
        expected_cmd = [
            'java',
            '-jar',
            JAR_FILE,
            f'--region={TEST_LOCATION}',
            '--runner=DataflowRunner',
            '--project=test',
            '--stagingLocation=gs://test/staging',
            '--labels={"foo":"bar"}',
            '--jobName={}-{}'.format(JOB_NAME, MOCK_UUID),
        ]
        self.assertListEqual(
            sorted(expected_cmd),
            sorted(mock_dataflow.call_args[1]["cmd"]),
        )

    @mock.patch(DATAFLOW_STRING.format('uuid.uuid4'))
    @mock.patch(DATAFLOW_STRING.format('_DataflowJobsController'))
    @mock.patch(DATAFLOW_STRING.format('_DataflowRunner'))
    @mock.patch(DATAFLOW_STRING.format('DataflowHook.get_conn'))
    def test_start_java_dataflow_with_job_class(self, mock_conn, mock_dataflow,
                                                mock_dataflowjob, mock_uuid):
        mock_uuid.return_value = MOCK_UUID
        mock_conn.return_value = None
        dataflow_instance = mock_dataflow.return_value
        dataflow_instance.wait_for_done.return_value = None
        dataflowjob_instance = mock_dataflowjob.return_value
        dataflowjob_instance.wait_for_done.return_value = None
        self.dataflow_hook.start_java_dataflow(  # pylint: disable=no-value-for-parameter
            job_name=JOB_NAME,
            variables=DATAFLOW_VARIABLES_JAVA,
            jar=JAR_FILE,
            job_class=JOB_CLASS)
        expected_cmd = [
            'java',
            '-cp',
            JAR_FILE,
            JOB_CLASS,
            '--region=us-central1',
            '--runner=DataflowRunner',
            '--project=test',
            '--stagingLocation=gs://test/staging',
            '--labels={"foo":"bar"}',
            '--jobName={}-{}'.format(JOB_NAME, MOCK_UUID),
        ]
        self.assertListEqual(sorted(mock_dataflow.call_args[1]["cmd"]),
                             sorted(expected_cmd))

    @parameterized.expand([
        (JOB_NAME, JOB_NAME, False),
        ('test-example', 'test_example', False),
        ('test-dataflow-pipeline-12345678', JOB_NAME, True),
        ('test-example-12345678', 'test_example', True),
        ('df-job-1', 'df-job-1', False),
        ('df-job', 'df-job', False),
        ('dfjob', 'dfjob', False),
        ('dfjob1', 'dfjob1', False),
    ])
    @mock.patch(DATAFLOW_STRING.format('uuid.uuid4'), return_value=MOCK_UUID)
    def test_valid_dataflow_job_name(self, expected_result, job_name,
                                     append_job_name, mock_uuid4):
        job_name = self.dataflow_hook._build_dataflow_job_name(
            job_name=job_name, append_job_name=append_job_name)

        self.assertEqual(expected_result, job_name)

    @parameterized.expand([("1dfjob@", ), ("dfjob@", ), ("df^jo", )])
    def test_build_dataflow_job_name_with_invalid_value(self, job_name):
        self.assertRaises(ValueError,
                          self.dataflow_hook._build_dataflow_job_name,
                          job_name=job_name,
                          append_job_name=False)