Beispiel #1
0
    def test_do(self):
        source_data_dir = os.path.join(
            os.path.dirname(os.path.dirname(__file__)), 'testdata')

        train_artifact = types.TfxArtifact('ExamplesPath', split='train')
        train_artifact.uri = os.path.join(source_data_dir,
                                          'csv_example_gen/train/')
        eval_artifact = types.TfxArtifact('ExamplesPath', split='eval')
        eval_artifact.uri = os.path.join(source_data_dir,
                                         'csv_example_gen/eval/')
        schema_artifact = types.TfxArtifact('Schema')
        schema_artifact.uri = os.path.join(source_data_dir, 'schema_gen/')

        module_file = os.path.join(source_data_dir,
                                   'module_file/transform_module.py')

        output_data_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)

        transformed_output = types.TfxArtifact('TransformPath')
        transformed_output.uri = os.path.join(output_data_dir,
                                              'transformed_output')
        transformed_train_examples = types.TfxArtifact('ExamplesPath',
                                                       split='train')
        transformed_train_examples.uri = os.path.join(output_data_dir, 'train')
        transformed_eval_examples = types.TfxArtifact('ExamplesPath',
                                                      split='eval')
        transformed_eval_examples.uri = os.path.join(output_data_dir, 'eval')
        temp_path_output = types.TfxArtifact('TempPath')
        temp_path_output.uri = tempfile.mkdtemp()

        input_dict = {
            'input_data': [train_artifact, eval_artifact],
            'schema': [schema_artifact],
        }
        output_dict = {
            'transform_output': [transformed_output],
            'transformed_examples':
            [transformed_train_examples, transformed_eval_examples],
            'temp_path': [temp_path_output],
        }

        exec_properties = {
            'module_file': module_file,
        }

        # Run executor
        transform_executor = executor.Executor()
        transform_executor.Do(input_dict, output_dict, exec_properties)
        self.assertNotEqual(
            0, len(tf.gfile.ListDirectory(transformed_train_examples.uri)))
        self.assertNotEqual(
            0, len(tf.gfile.ListDirectory(transformed_eval_examples.uri)))
        path_to_saved_model = os.path.join(
            transformed_output.uri, tft.TFTransformOutput.TRANSFORM_FN_DIR,
            tf.saved_model.constants.SAVED_MODEL_FILENAME_PB)
        self.assertTrue(tf.gfile.Exists(path_to_saved_model))
Beispiel #2
0
    def test_do_with_cache(self):
        source_data_dir = self._get_source_data_dir()
        output_data_dir = self._get_output_data_dir('1st_run')

        (input_dict, output_dict,
         exec_properties) = self._make_base_do_params(source_data_dir,
                                                      output_data_dir)

        output_cache_artifact = types.TfxArtifact('OutputCache')
        output_cache_artifact.uri = os.path.join(output_data_dir, 'CACHE/')

        output_dict['cache_output_path'] = [output_cache_artifact]

        # Run executor
        transform_executor = executor.Executor()
        transform_executor.Do(input_dict, output_dict, exec_properties)

        self.assertNotEqual(
            0, len(tf.gfile.ListDirectory(output_cache_artifact.uri)))

        output_data_dir = self._get_output_data_dir('2nd_run')

        (input_dict, output_dict,
         exec_properties) = self._make_base_do_params(source_data_dir,
                                                      output_data_dir)

        input_cache_artifact = types.TfxArtifact('InputCache')
        input_cache_artifact.uri = output_cache_artifact.uri

        output_cache_artifact = types.TfxArtifact('OutputCache')
        output_cache_artifact.uri = os.path.join(output_data_dir, 'CACHE/')

        input_dict['cache_input_path'] = [input_cache_artifact]
        output_dict['cache_output_path'] = [output_cache_artifact]

        # Run executor again
        transform_executor = executor.Executor()
        transform_executor.Do(input_dict, output_dict, exec_properties)

        self.assertNotEqual(
            0, len(tf.gfile.ListDirectory(output_cache_artifact.uri)))
Beispiel #3
0
    def setUp(self):
        super(ExecutorTest, self).setUp()

        self._output_data_dir = self._get_output_data_dir()
        self._make_base_do_params(self._SOURCE_DATA_DIR, self._output_data_dir)

        # Create exec properties skeleton.
        self._module_file = os.path.join(self._SOURCE_DATA_DIR,
                                         'module_file/transform_module.py')
        self._preprocessing_fn = '%s.%s' % (
            transform_module.preprocessing_fn.__module__,
            transform_module.preprocessing_fn.__name__)
        self._exec_properties['splits_config'] = None

        # Executor for test.
        self._transform_executor = executor.Executor()
Beispiel #4
0
    def setUp(self):
        super(ExecutorTest, self).setUp()

        self._source_data_dir = self._get_source_data_dir()
        self._output_data_dir = self._get_output_data_dir()

        self._make_base_do_params(self._source_data_dir, self._output_data_dir)

        # Create exec properties skeleton.
        self._module_file = os.path.join(self._source_data_dir,
                                         'module_file/transform_module.py')
        self._preprocessing_fn = '%s.%s' % (
            transform_module.preprocessing_fn.__module__,
            transform_module.preprocessing_fn.__name__)

        # Executor for test.
        self._transform_executor = executor.Executor()
Beispiel #5
0
    def _runPipelineGetMetrics(self, inputs, outputs, exec_properties):
        pipelines = []

        def _create_pipeline_wrapper(*_):
            result = self._makeTestPipeline()
            pipelines.append(result)
            return result

        with tft_unit.mock.patch.object(executor.Executor,
                                        '_CreatePipeline',
                                        autospec=True,
                                        side_effect=_create_pipeline_wrapper):
            transform_executor = executor.Executor()
            transform_executor.Do(self._input_dict, self._output_dict,
                                  self._exec_properties)
        assert len(pipelines) == 1
        return pipelines[0].metrics
Beispiel #6
0
    def test_do(self):
        source_data_dir = self._get_source_data_dir()
        output_data_dir = self._get_output_data_dir()

        (input_dict, output_dict,
         exec_properties) = self._make_base_do_params(source_data_dir,
                                                      output_data_dir)

        # Run executor
        transform_executor = executor.Executor()
        transform_executor.Do(input_dict, output_dict, exec_properties)
        for examples in output_dict['transformed_examples']:
            self.assertNotEqual(0, len(tf.gfile.ListDirectory(examples.uri)))
        (transformed_output, ) = output_dict['transform_output']
        path_to_saved_model = os.path.join(
            transformed_output.uri, tft.TFTransformOutput.TRANSFORM_FN_DIR,
            tf.saved_model.constants.SAVED_MODEL_FILENAME_PB)
        self.assertTrue(tf.gfile.Exists(path_to_saved_model))
Beispiel #7
0
    def setUp(self):
        super(ExecutorTest, self).setUp()

        self._output_data_dir = self._get_output_data_dir()
        self._make_base_do_params(self._SOURCE_DATA_DIR, self._output_data_dir)

        # Create exec properties skeleton.
        self._module_file = os.path.join(self._SOURCE_DATA_DIR,
                                         'module_file/transform_module.py')
        self._preprocessing_fn = '%s.%s' % (
            transform_module.preprocessing_fn.__module__,
            transform_module.preprocessing_fn.__name__)
        self._exec_properties[
            standard_component_specs.SPLITS_CONFIG_KEY] = None
        self._exec_properties[
            standard_component_specs.FORCE_TF_COMPAT_V1_KEY] = int(
                self._use_force_tf_compat_v1())

        # Executor for test.
        self._transform_executor = executor.Executor()