def setUp(self): super(KubeflowV2HandlerTest, self).setUp() self.chicago_taxi_pipeline_dir = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'testdata') self._home = self.tmp_dir self.enter_context(test_case_utils.change_working_dir(self.tmp_dir)) self.enter_context(test_case_utils.override_env_var( 'HOME', self._home)) self._kubeflow_v2_home = os.path.join(self._home, 'kubeflow_v2') self.enter_context( test_case_utils.override_env_var('KUBEFLOW_V2_HOME', self._kubeflow_v2_home)) # Flags for handler. self.engine = 'kubeflow_v2' self.pipeline_path = os.path.join(self.chicago_taxi_pipeline_dir, 'test_pipeline_1.py') self.pipeline_name = _TEST_PIPELINE_NAME self.pipeline_root = os.path.join(self._home, 'tfx', 'pipelines', self.pipeline_name) self.run_id = 'dummyID' # Setting up Mock for API client, so that this Python test is hermatic. # subprocess Mock will be setup per-test. self.addCleanup(mock.patch.stopall)
def setUp(self): super().setUp() self._home = self.tmp_dir self.enter_context(test_case_utils.override_env_var( 'HOME', self._home)) self.enter_context( test_case_utils.override_env_var( 'AIRFLOW_HOME', os.path.join(os.environ['HOME'], 'airflow'))) # Flags for handler. self.engine = 'airflow' self.pipeline_path = os.path.join(_testdata_dir, 'test_pipeline_airflow_1.py') self.pipeline_root = os.path.join(self._home, 'tfx', 'pipelines') self.pipeline_name = 'chicago_taxi_simple' self.run_id = 'manual__2019-07-19T19:56:02+00:00' self.runtime_parameter = {'a': '1', 'b': '2'} self.runtime_parameter_json = json.dumps(self.runtime_parameter) self.enter_context(test_case_utils.change_working_dir(self.tmp_dir)) # Pipeline args for mocking subprocess self.pipeline_args = {labels.PIPELINE_NAME: self.pipeline_name} self._mock_get_airflow_version = self.enter_context( mock.patch.object(airflow_handler.AirflowHandler, '_get_airflow_version', return_value='2.0.1'))
def setUp(self): super(PenguinPipelineSklearnGcpTest, self).setUp() self.enter_context(test_case_utils.change_working_dir(self.tmp_dir)) self._experimental_root = os.path.dirname(__file__) self._penguin_root = os.path.dirname(self._experimental_root) self._pipeline_name = 'sklearn_test' self._data_root = os.path.join(self._penguin_root, 'data') self._trainer_module_file = os.path.join( self._experimental_root, 'penguin_utils_sklearn.py') self._evaluator_module_file = os.path.join( self._experimental_root, 'sklearn_predict_extractor.py') self._pipeline_root = os.path.join(self.tmp_dir, 'tfx', 'pipelines', self._pipeline_name) self._metadata_path = os.path.join(self.tmp_dir, 'tfx', 'metadata', self._pipeline_name, 'metadata.db') self._ai_platform_training_args = { 'project': 'project_id', 'region': 'us-central1', } self._ai_platform_serving_args = { 'model_name': 'model_name', 'project_id': 'project_id', 'regions': ['us-central1'], }
def setUp(self): super().setUp() # Set a constant version for artifact version tag. patcher = mock.patch("tfx.version.__version__") patcher.start() version.__version__ = "0.123.4.dev" self.addCleanup(patcher.stop) # Prepare executor input. serialized_metadata = self._get_text_from_test_data( "executor_invocation.json") metadata_json = json.loads(serialized_metadata) # Mutate the outputFile field. metadata_json["outputs"]["outputFile"] = _TEST_OUTPUT_METADATA_JSON self._serialized_metadata = json.dumps(metadata_json) # Prepare executor input using legacy properties and custom properties. serialized_metadata_legacy = self._get_text_from_test_data( "executor_invocation_legacy.json") metadata_json_legacy = json.loads(serialized_metadata_legacy) # Mutate the outputFile field. metadata_json_legacy["outputs"][ "outputFile"] = _TEST_OUTPUT_METADATA_JSON self._serialized_metadata_legacy = json.dumps(metadata_json_legacy) self._expected_output = (self._get_text_from_test_data( "expected_output_metadata.json").strip()) # Change working directory after the testdata files have been read. self.enter_context(test_case_utils.change_working_dir(self.tmp_dir))
def setUp(self): super().setUp() self.chicago_taxi_pipeline_dir = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'testdata') self._home = self.tmp_dir self.enter_context(test_case_utils.change_working_dir(self.tmp_dir)) self.enter_context(test_case_utils.override_env_var( 'HOME', self._home)) self._beam_home = os.path.join(os.environ['HOME'], 'beam') self.enter_context( test_case_utils.override_env_var('BEAM_HOME', self._beam_home)) # Flags for handler. self.engine = 'beam' self.pipeline_path = os.path.join(self.chicago_taxi_pipeline_dir, 'test_pipeline_beam_1.py') self.pipeline_name = 'chicago_taxi_beam' self.pipeline_root = os.path.join(self._home, 'tfx', 'pipelines', self.pipeline_name) self.run_id = 'dummyID' self.pipeline_args = { labels.PIPELINE_NAME: self.pipeline_name, labels.PIPELINE_DSL_PATH: self.pipeline_path, }
def setUp(self): super(BaseKubeflowTest, self).setUp() self._test_dir = self.tmp_dir self.enter_context(test_case_utils.change_working_dir(self.tmp_dir)) self._test_output_dir = 'gs://{}/test_output'.format(self._BUCKET_NAME) test_id = test_utils.random_id() self._testdata_root = 'gs://{}/test_data/{}'.format( self._BUCKET_NAME, test_id) subprocess.run( ['gsutil', 'cp', '-r', self._TEST_DATA_ROOT, self._testdata_root], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) self._data_root = os.path.join(self._testdata_root, 'external', 'csv') self._transform_module = os.path.join(self._MODULE_ROOT, 'transform_module.py') self._trainer_module = os.path.join(self._MODULE_ROOT, 'trainer_module.py') self.addCleanup(self._delete_test_dir, test_id)
def setUp(self): super(LocalHandlerTest, self).setUp() self.chicago_taxi_pipeline_dir = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'testdata') self._home = self.tmp_dir self.enter_context(test_case_utils.change_working_dir(self.tmp_dir)) self.enter_context(test_case_utils.override_env_var( 'HOME', self._home)) self._local_home = os.path.join(os.environ['HOME'], 'local') self.enter_context( test_case_utils.override_env_var('LOCAL_HOME', self._local_home)) # Flags for handler. self.engine = 'local' self.pipeline_path = os.path.join(self.chicago_taxi_pipeline_dir, 'test_pipeline_local_1.py') self.pipeline_name = 'chicago_taxi_local' self.pipeline_root = os.path.join(self._home, 'tfx', 'pipelines', self.pipeline_name) self.run_id = 'dummyID' # Pipeline args for mocking subprocess self.pipeline_args = { 'pipeline_name': 'chicago_taxi_local', 'pipeline_dsl_path': self.pipeline_path }
def setUp(self): super().setUp() self.chicago_taxi_pipeline_dir = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'testdata') self._home = self.tmp_dir self.enter_context(test_case_utils.change_working_dir(self.tmp_dir)) self.enter_context(test_case_utils.override_env_var( 'HOME', self._home)) self._vertex_home = os.path.join(self._home, 'vertex') self.enter_context( test_case_utils.override_env_var('VERTEX_HOME', self._vertex_home)) # Flags for handler. self.engine = 'vertex' self.pipeline_path = os.path.join(self.chicago_taxi_pipeline_dir, 'test_pipeline_kubeflow_v2_1.py') self.pipeline_name = _TEST_PIPELINE_NAME self.pipeline_root = os.path.join(self._home, 'tfx', 'pipelines', self.pipeline_name) self.run_id = 'dummyID' self.project = 'gcp_project_1' self.region = 'us-central1' self.runtime_parameter = {'a': '1', 'b': '2'} # Setting up Mock for API client, so that this Python test is hermetic. # subprocess Mock will be setup per-test. self.addCleanup(mock.patch.stopall)
def setUp(self): super(CliKubeflowEndToEndTest, self).setUp() # List of packages installed. self._pip_list = pip_utils.get_package_names() # Check if Kubeflow is installed before running E2E tests. if labels.KUBEFLOW_PACKAGE_NAME not in self._pip_list: sys.exit('Kubeflow not installed.') # Change the encoding for Click since Python 3 is configured to use ASCII as # encoding for the environment. if codecs.lookup(locale.getpreferredencoding()).name == 'ascii': os.environ['LANG'] = 'en_US.utf-8' # Testdata path. self._testdata_dir = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'testdata') self._testdata_dir_updated = self.tmp_dir fileio.makedirs(self._testdata_dir_updated) self.enter_context(test_case_utils.change_working_dir(self.tmp_dir)) # Generate a unique pipeline name. Uses tmp_dir as a random seed. self._pipeline_name = ('cli-kubeflow-e2e-test-' + test_utils.generate_random_id(self.tmp_dir)) absl.logging.info('Pipeline name is %s' % self._pipeline_name) self._pipeline_name_v2 = self._pipeline_name + '_v2' orig_pipeline_path = os.path.join(self._testdata_dir, 'test_pipeline_kubeflow_1.py') self._pipeline_path = os.path.join(self._testdata_dir_updated, 'test_pipeline_kubeflow_1.py') self._pipeline_path_v2 = os.path.join(self._testdata_dir_updated, 'test_pipeline_kubeflow_2.py') test_utils.copy_and_change_pipeline_name(orig_pipeline_path, self._pipeline_path, 'chicago_taxi_pipeline_kubeflow', self._pipeline_name) self.assertTrue(fileio.exists(self._pipeline_path)) test_utils.copy_and_change_pipeline_name(orig_pipeline_path, self._pipeline_path_v2, 'chicago_taxi_pipeline_kubeflow', self._pipeline_name_v2) self.assertTrue(fileio.exists(self._pipeline_path_v2)) # Endpoint URL self._endpoint = self._get_endpoint( subprocess.check_output( 'kubectl describe configmap inverse-proxy-config -n kubeflow'.split( ))) absl.logging.info('ENDPOINT: ' + self._endpoint) try: # Create a kfp client for cleanup after running commands. self._client = kfp.Client(host=self._endpoint) except kfp_server_api.rest.ApiException as err: absl.logging.info(err)
def setUp(self): super().setUp() self._pipeline_name = 'TEMPLATE_E2E_TEST' self._project_dir = self.tmp_dir self.enter_context(test_case_utils.change_working_dir(self.tmp_dir)) self._temp_dir = os.path.join(self._project_dir, 'tmp') os.makedirs(self._temp_dir)
def testChangeWorkingDir(self): cwd = os.getcwd() new_cwd = os.path.join(self.tmp_dir, 'new') os.makedirs(new_cwd) with test_case_utils.change_working_dir(new_cwd) as old_cwd: self.assertEqual(os.path.realpath(old_cwd), os.path.realpath(cwd)) self.assertEqual(os.path.realpath(new_cwd), os.path.realpath(os.getcwd())) self.assertEqual(os.path.realpath(cwd), os.path.realpath(os.getcwd()))
def setUp(self): super().setUp() self.enter_context(test_case_utils.change_working_dir(self.tmp_dir)) self._test_dir = self.tmp_dir self._test_output_dir = 'gs://{}/test_output'.format(self._BUCKET_NAME) aiplatform.init( project=self._GCP_PROJECT_ID, location=self._GCP_REGION, )
def setUp(self): super(DockerfileTest, self).setUp() self._testdata_dir = os.path.join( os.path.abspath(os.path.dirname(__file__)), 'testdata') # change to a temporary working dir such that # there is no setup.py in the working dir. self.enter_context(test_case_utils.change_working_dir(self.tmp_dir)) self._test_dockerfile = os.path.abspath('.test_dockerfile') with open(self._test_dockerfile, 'w') as f: f.writelines(_test_dockerfile_content)
def setUp(self): super(BaseEndToEndTest, self).setUp() # Change the encoding for Click since Python 3 is configured to use ASCII as # encoding for the environment. # TODO(b/150100590) Delete this block after Python >=3.7 if codecs.lookup(locale.getpreferredencoding()).name == 'ascii': os.environ['LANG'] = 'en_US.utf-8' self._pipeline_name = 'TEMPLATE_E2E_TEST' self._project_dir = self.tmp_dir self.enter_context(test_case_utils.change_working_dir(self.tmp_dir)) self._temp_dir = os.path.join(self._project_dir, 'tmp') os.makedirs(self._temp_dir)
def setUp(self): super().setUp() # Prepare executor input. serialized_metadata = self._get_text_from_test_data( "executor_invocation.json") metadata_json = json.loads(serialized_metadata) # Mutate the outputFile field. metadata_json["outputs"]["outputFile"] = _TEST_OUTPUT_METADATA_JSON self._serialized_metadata = json.dumps(metadata_json) self._expected_output = json.loads( self._get_text_from_test_data("expected_output_metadata.json")) # Change working directory after the testdata files have been read. self.enter_context(test_case_utils.change_working_dir(self.tmp_dir))
def setUp(self): super(AirflowHandlerTest, self).setUp() self._home = self.tmp_dir self.enter_context(test_case_utils.override_env_var( 'HOME', self._home)) self.enter_context( test_case_utils.override_env_var( 'AIRFLOW_HOME', os.path.join(os.environ['HOME'], 'airflow'))) # Flags for handler. self.engine = 'airflow' self.pipeline_path = os.path.join(_testdata_dir, 'test_pipeline_airflow_1.py') self.pipeline_root = os.path.join(self._home, 'tfx', 'pipelines') self.pipeline_name = 'chicago_taxi_simple' self.run_id = 'manual__2019-07-19T19:56:02+00:00' self.enter_context(test_case_utils.change_working_dir(self.tmp_dir)) # Pipeline args for mocking subprocess self.pipeline_args = {'pipeline_name': 'chicago_taxi_simple'}
def setUp(self): super().setUp() self._executor_invocation = pipeline_pb2.ExecutorInput() self._executor_invocation.outputs.output_file = _TEST_OUTPUT_METADATA_JSON self._executor_invocation.inputs.parameters[ 'input_base'].string_value = _TEST_INPUT_DIR self._executor_invocation.inputs.parameters[ 'output_config'].string_value = '{}' self._executor_invocation.inputs.parameters[ 'input_config'].string_value = json_format.MessageToJson( example_gen_pb2.Input(splits=[ example_gen_pb2.Input.Split(name='s1', pattern='span{SPAN}/split1/*'), example_gen_pb2.Input.Split(name='s2', pattern='span{SPAN}/split2/*') ])) self._executor_invocation.outputs.artifacts[ 'examples'].artifacts.append( pipeline_pb2.RuntimeArtifact( type=pipeline_pb2.ArtifactTypeSchema( instance_schema=compiler_utils.get_artifact_schema( standard_artifacts.Examples)))) self._executor_invocation_from_file = fileio.open( os.path.join(os.path.dirname(__file__), 'testdata', 'executor_invocation.json'), 'r').read() logging.debug('Executor invocation under test: %s', self._executor_invocation_from_file) self._expected_result_from_file = fileio.open( os.path.join(os.path.dirname(__file__), 'testdata', 'expected_output_metadata.json'), 'r').read() logging.debug('Expecting output metadata JSON: %s', self._expected_result_from_file) # Change working directory after all the testdata files have been read. self.enter_context(test_case_utils.change_working_dir(self.tmp_dir)) fileio.makedirs(os.path.dirname(_TEST_INPUT_DIR))
def setUp(self): super().setUp() self._test_dir = self.tmp_dir self.enter_context(test_case_utils.change_working_dir(self.tmp_dir)) self._test_output_dir = 'gs://{}/test_output'.format(self._BUCKET_NAME) test_id = test_utils.random_id() self._testdata_root = 'gs://{}/test_data/{}'.format( self._BUCKET_NAME, test_id) io_utils.copy_dir(self._TEST_DATA_ROOT, self._testdata_root) self._data_root = os.path.join(self._testdata_root, 'external', 'csv') self._transform_module = os.path.join(self._MODULE_ROOT, 'transform_module.py') self._trainer_module = os.path.join(self._MODULE_ROOT, 'trainer_module.py') self._serving_model_dir = os.path.join(self._testdata_root, 'output') self.addCleanup(self._delete_test_dir, test_id)
def setUp(self): super().setUp() self.enter_context(test_case_utils.change_working_dir(self.tmp_dir))
def setUp(self): super().setUp() self._source_data_dir = os.path.join( os.path.dirname(os.path.abspath(__file__)), 'testdata') self.enter_context(test_case_utils.change_working_dir(self.tmp_dir))
def setUp(self): super(BaseKubeflowV2Test, self).setUp() self.enter_context(test_case_utils.change_working_dir(self.tmp_dir)) self._test_dir = self.tmp_dir self._test_output_dir = 'gs://{}/test_output'.format(self._BUCKET_NAME)
def setUp(self): super().setUp() # Flags for handler. self.engine = 'kubeflow' self.chicago_taxi_pipeline_dir = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'testdata') self.enter_context(test_case_utils.change_working_dir(self.tmp_dir)) self.enter_context( test_case_utils.override_env_var('KFP_E2E_BASE_CONTAINER_IMAGE', 'dummy-image')) self.enter_context( test_case_utils.override_env_var('KFP_E2E_BUCKET_NAME', 'dummy-bucket')) self.enter_context( test_case_utils.override_env_var('KFP_E2E_TEST_DATA_ROOT', 'dummy-root')) self.pipeline_path = os.path.join(self.chicago_taxi_pipeline_dir, 'test_pipeline_kubeflow_1.py') self.pipeline_name = 'chicago_taxi_pipeline_kubeflow' # Kubeflow client params. self.endpoint = 'dummyEndpoint' self.namespace = 'kubeflow' self.iap_client_id = 'dummyID' self.runtime_parameter = {'a': '1', 'b': '2'} default_flags = { labels.ENGINE_FLAG: self.engine, labels.ENDPOINT: self.endpoint, labels.IAP_CLIENT_ID: self.iap_client_id, labels.NAMESPACE: self.namespace, } self.flags_with_name = { **default_flags, labels.PIPELINE_NAME: self.pipeline_name, } self.flags_with_runtime_param = { **default_flags, labels.PIPELINE_NAME: self.pipeline_name, labels.RUNTIME_PARAMETER: self.runtime_parameter, } self.flags_with_dsl_path = { **default_flags, labels.PIPELINE_DSL_PATH: self.pipeline_path, } # Pipeline args for mocking subprocess. self.pipeline_args = { 'pipeline_name': 'chicago_taxi_pipeline_kubeflow' } self.pipeline_id = 'the_pipeline_id' self.experiment_id = 'the_experiment_id' self.pipeline_version_id = 'the_pipeline_version_id' mock_client_cls = self.enter_context( mock.patch.object(kfp, 'Client', autospec=True)) self.mock_client = mock_client_cls.return_value # Required to access generated apis. self.mock_client._experiment_api = mock.MagicMock() self.mock_client.get_pipeline_id.return_value = self.pipeline_id self.mock_client.get_experiment.return_value.id = self.experiment_id versions = [mock.MagicMock()] versions[0].id = self.pipeline_version_id self.mock_client.list_pipeline_versions.return_value.versions = versions
def setUp(self): super().setUp() self.enter_context(test_case_utils.override_env_var('NEW_ENV', 'foo')) self.enter_context( test_case_utils.override_env_var('OVERWRITE_ENV', 'baz')) self.enter_context(test_case_utils.change_working_dir(self.tmp_dir))
def setUp(self): super(KubeflowHandlerTest, self).setUp() # Flags for handler. self.engine = 'kubeflow' self.chicago_taxi_pipeline_dir = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'testdata') self.enter_context(test_case_utils.change_working_dir(self.tmp_dir)) self.pipeline_path = os.path.join(self.chicago_taxi_pipeline_dir, 'test_pipeline_kubeflow_1.py') self.pipeline_name = 'chicago_taxi_pipeline_kubeflow' self.pipeline_package_path = os.path.abspath( 'chicago_taxi_pipeline_kubeflow.tar.gz') # Kubeflow client params. self.endpoint = 'dummyEndpoint' self.namespace = 'kubeflow' self.iap_client_id = 'dummyID' default_flags = { labels.ENGINE_FLAG: self.engine, labels.ENDPOINT: self.endpoint, labels.IAP_CLIENT_ID: self.iap_client_id, labels.NAMESPACE: self.namespace, } self.flags_with_name = { **default_flags, labels.PIPELINE_NAME: self.pipeline_name, } self.flags_with_dsl_path = { **default_flags, labels.PIPELINE_DSL_PATH: self.pipeline_path, } self.flags_with_package_path = { **self.flags_with_dsl_path, labels.PIPELINE_PACKAGE_PATH: self.pipeline_package_path } # Pipeline args for mocking subprocess. self.pipeline_args = { 'pipeline_name': 'chicago_taxi_pipeline_kubeflow' } self.pipeline_id = 'the_pipeline_id' self.experiment_id = 'the_experiment_id' self.pipeline_version_id = 'the_pipeline_version_id' mock_client_cls = self.enter_context( mock.patch.object(kfp, 'Client', autospec=True)) self.mock_client = mock_client_cls.return_value # Required to access generated apis. self.mock_client._experiment_api = mock.MagicMock() self.mock_client.get_pipeline_id.return_value = self.pipeline_id self.mock_client.get_experiment.return_value.id = self.experiment_id versions = [mock.MagicMock()] versions[0].id = self.pipeline_version_id self.mock_client.list_pipeline_versions.return_value.versions = versions self.mock_subprocess_call = self.enter_context( mock.patch.object(base_handler.BaseHandler, '_subprocess_call', side_effect=_create_mock_subprocess_call( self.chicago_taxi_pipeline_dir), autospec=True))