def test_do(self): source_data_dir = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'testdata') train_artifact = types.TfxArtifact('ExamplesPath', split='train') train_artifact.uri = os.path.join(source_data_dir, 'csv_example_gen/train/') eval_artifact = types.TfxArtifact('ExamplesPath', split='eval') eval_artifact.uri = os.path.join(source_data_dir, 'csv_example_gen/eval/') schema_artifact = types.TfxArtifact('Schema') schema_artifact.uri = os.path.join(source_data_dir, 'schema_gen/') module_file = os.path.join(source_data_dir, 'module_file/transform_module.py') output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) transformed_output = types.TfxArtifact('TransformPath') transformed_output.uri = os.path.join(output_data_dir, 'transformed_output') transformed_train_examples = types.TfxArtifact('ExamplesPath', split='train') transformed_train_examples.uri = os.path.join(output_data_dir, 'train') transformed_eval_examples = types.TfxArtifact('ExamplesPath', split='eval') transformed_eval_examples.uri = os.path.join(output_data_dir, 'eval') temp_path_output = types.TfxArtifact('TempPath') temp_path_output.uri = tempfile.mkdtemp() input_dict = { 'input_data': [train_artifact, eval_artifact], 'schema': [schema_artifact], } output_dict = { 'transform_output': [transformed_output], 'transformed_examples': [transformed_train_examples, transformed_eval_examples], 'temp_path': [temp_path_output], } exec_properties = { 'module_file': module_file, } # Run executor transform_executor = executor.Executor() transform_executor.Do(input_dict, output_dict, exec_properties) self.assertNotEqual( 0, len(tf.gfile.ListDirectory(transformed_train_examples.uri))) self.assertNotEqual( 0, len(tf.gfile.ListDirectory(transformed_eval_examples.uri))) path_to_saved_model = os.path.join( transformed_output.uri, tft.TFTransformOutput.TRANSFORM_FN_DIR, tf.saved_model.constants.SAVED_MODEL_FILENAME_PB) self.assertTrue(tf.gfile.Exists(path_to_saved_model))
def test_do_with_cache(self): source_data_dir = self._get_source_data_dir() output_data_dir = self._get_output_data_dir('1st_run') (input_dict, output_dict, exec_properties) = self._make_base_do_params(source_data_dir, output_data_dir) output_cache_artifact = types.TfxArtifact('OutputCache') output_cache_artifact.uri = os.path.join(output_data_dir, 'CACHE/') output_dict['cache_output_path'] = [output_cache_artifact] # Run executor transform_executor = executor.Executor() transform_executor.Do(input_dict, output_dict, exec_properties) self.assertNotEqual( 0, len(tf.gfile.ListDirectory(output_cache_artifact.uri))) output_data_dir = self._get_output_data_dir('2nd_run') (input_dict, output_dict, exec_properties) = self._make_base_do_params(source_data_dir, output_data_dir) input_cache_artifact = types.TfxArtifact('InputCache') input_cache_artifact.uri = output_cache_artifact.uri output_cache_artifact = types.TfxArtifact('OutputCache') output_cache_artifact.uri = os.path.join(output_data_dir, 'CACHE/') input_dict['cache_input_path'] = [input_cache_artifact] output_dict['cache_output_path'] = [output_cache_artifact] # Run executor again transform_executor = executor.Executor() transform_executor.Do(input_dict, output_dict, exec_properties) self.assertNotEqual( 0, len(tf.gfile.ListDirectory(output_cache_artifact.uri)))
def setUp(self): super(ExecutorTest, self).setUp() self._output_data_dir = self._get_output_data_dir() self._make_base_do_params(self._SOURCE_DATA_DIR, self._output_data_dir) # Create exec properties skeleton. self._module_file = os.path.join(self._SOURCE_DATA_DIR, 'module_file/transform_module.py') self._preprocessing_fn = '%s.%s' % ( transform_module.preprocessing_fn.__module__, transform_module.preprocessing_fn.__name__) self._exec_properties['splits_config'] = None # Executor for test. self._transform_executor = executor.Executor()
def setUp(self): super(ExecutorTest, self).setUp() self._source_data_dir = self._get_source_data_dir() self._output_data_dir = self._get_output_data_dir() self._make_base_do_params(self._source_data_dir, self._output_data_dir) # Create exec properties skeleton. self._module_file = os.path.join(self._source_data_dir, 'module_file/transform_module.py') self._preprocessing_fn = '%s.%s' % ( transform_module.preprocessing_fn.__module__, transform_module.preprocessing_fn.__name__) # Executor for test. self._transform_executor = executor.Executor()
def _runPipelineGetMetrics(self, inputs, outputs, exec_properties): pipelines = [] def _create_pipeline_wrapper(*_): result = self._makeTestPipeline() pipelines.append(result) return result with tft_unit.mock.patch.object(executor.Executor, '_CreatePipeline', autospec=True, side_effect=_create_pipeline_wrapper): transform_executor = executor.Executor() transform_executor.Do(self._input_dict, self._output_dict, self._exec_properties) assert len(pipelines) == 1 return pipelines[0].metrics
def test_do(self): source_data_dir = self._get_source_data_dir() output_data_dir = self._get_output_data_dir() (input_dict, output_dict, exec_properties) = self._make_base_do_params(source_data_dir, output_data_dir) # Run executor transform_executor = executor.Executor() transform_executor.Do(input_dict, output_dict, exec_properties) for examples in output_dict['transformed_examples']: self.assertNotEqual(0, len(tf.gfile.ListDirectory(examples.uri))) (transformed_output, ) = output_dict['transform_output'] path_to_saved_model = os.path.join( transformed_output.uri, tft.TFTransformOutput.TRANSFORM_FN_DIR, tf.saved_model.constants.SAVED_MODEL_FILENAME_PB) self.assertTrue(tf.gfile.Exists(path_to_saved_model))
def setUp(self): super(ExecutorTest, self).setUp() self._output_data_dir = self._get_output_data_dir() self._make_base_do_params(self._SOURCE_DATA_DIR, self._output_data_dir) # Create exec properties skeleton. self._module_file = os.path.join(self._SOURCE_DATA_DIR, 'module_file/transform_module.py') self._preprocessing_fn = '%s.%s' % ( transform_module.preprocessing_fn.__module__, transform_module.preprocessing_fn.__name__) self._exec_properties[ standard_component_specs.SPLITS_CONFIG_KEY] = None self._exec_properties[ standard_component_specs.FORCE_TF_COMPAT_V1_KEY] = int( self._use_force_tf_compat_v1()) # Executor for test. self._transform_executor = executor.Executor()