Esempio n. 1
0
 def test_construct(self):
     examples = types.TfxArtifact(type_name='ExamplesPath')
     model_exports = types.TfxArtifact(type_name='ModelExportPath')
     evaluator = component.Evaluator(
         examples=channel.as_channel([examples]),
         model_exports=channel.as_channel([model_exports]))
     self.assertEqual('ModelEvalPath', evaluator.outputs.output.type_name)
Esempio n. 2
0
    def setUp(self):
        self._source_data_dir = os.path.join(
            os.path.dirname(
                os.path.dirname(os.path.dirname(os.path.dirname(__file__)))),
            'components', 'testdata')
        self._output_data_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)
        tf.gfile.MakeDirs(self._output_data_dir)
        self._model_export = types.TfxArtifact(type_name='ModelExportPath')
        self._model_export.uri = os.path.join(self._source_data_dir,
                                              'trainer/current/')
        self._model_blessing = types.TfxArtifact(type_name='ModelBlessingPath')
        self._input_dict = {
            'model_export': [self._model_export],
            'model_blessing': [self._model_blessing],
        }

        self._model_push = types.TfxArtifact(type_name='ModelPushPath')
        self._model_push.uri = os.path.join(self._output_data_dir,
                                            'model_push')
        tf.gfile.MakeDirs(self._model_push.uri)
        self._output_dict = {
            'model_push': [self._model_push],
        }
        self._exec_properties = {
            'custom_config': {
                'ai_platform_serving_args': {
                    'model_name': 'model_name',
                    'project_id': 'project_id'
                },
            },
        }
        self._executor = Executor()
Esempio n. 3
0
    def testCsvExampleGenWrapper(self):
        input_base = types.TfxArtifact(type_name='ExternalPath', split='')
        input_base.uri = '/path/to/dataset'

        with patch.object(executor, 'Executor', autospec=True) as _:
            wrapper = executor_wrappers.CsvExampleGenWrapper(
                argparse.Namespace(
                    exec_properties=json.dumps(self.exec_properties),
                    outputs=types.jsonify_tfx_type_dict(
                        {'examples': self.examples}),
                    executor_class_path=
                    ('tfx.components.example_gen.csv_example_gen.executor.Executor'
                     ),
                    input_base=json.dumps([input_base.json_dict()])), )
            wrapper.run(output_basedir=self.output_basedir)

            # TODO(b/133011207): Validate arguments for executor and Do() method.

            metadata_file = os.path.join(self.output_basedir,
                                         'output/ml_metadata/examples')

            expected_output_examples = types.TfxArtifact(
                type_name='ExamplesPath', split='dummy')
            # Expect that span and path are resolved.
            expected_output_examples.span = 1
            expected_output_examples.uri = (
                '/path/to/output/csv_example_gen/examples/mock_workflow_id/dummy/'
            )

            with tf.gfile.GFile(metadata_file) as f:
                self.assertEqual([expected_output_examples.json_dict()],
                                 json.loads(f.read()))
Esempio n. 4
0
    def setUp(self):
        self._source_data_dir = os.path.join(
            os.path.dirname(os.path.dirname(__file__)), 'testdata')
        output_data_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)
        self.component_name = 'test_component'

        # Create input dict.
        eval_examples = types.TfxArtifact(type_name='ExamplesPath',
                                          split='eval')
        eval_examples.uri = os.path.join(self._source_data_dir,
                                         'csv_example_gen/eval/')
        model = types.TfxArtifact(type_name='ModelExportPath')
        model.uri = os.path.join(self._source_data_dir, 'trainer/current/')
        self._input_dict = {
            'examples': [eval_examples],
            'model': [model],
        }

        # Create output dict.
        self._blessing = types.TfxArtifact('ModelBlessingPath')
        self._blessing.uri = os.path.join(output_data_dir, 'blessing')
        self._output_dict = {'blessing': [self._blessing]}

        # Create context
        self._tmp_dir = os.path.join(output_data_dir, '.temp')
        self._context = executor.Executor.Context(tmp_dir=self._tmp_dir,
                                                  unique_id='2')
Esempio n. 5
0
  def test_fetch_previous_result(self):
    with metadata.Metadata(connection_config=self._connection_config) as m:

      # Create an 'previous' execution.
      exec_properties = {'log_root': 'path'}
      eid = m.prepare_execution('Test', exec_properties)
      input_artifact = types.TfxArtifact(type_name='ExamplesPath')
      m.publish_artifacts([input_artifact])
      output_artifact = types.TfxArtifact(type_name='ExamplesPath')
      input_dict = {'input': [input_artifact]}
      output_dict = {'output': [output_artifact]}
      m.publish_execution(eid, input_dict, output_dict)

      # Test previous_run.
      self.assertEqual(None, m.previous_run('Test', input_dict, {}))
      self.assertEqual(None, m.previous_run('Test', {}, exec_properties))
      self.assertEqual(None, m.previous_run('Test2', input_dict,
                                            exec_properties))
      self.assertEqual(eid, m.previous_run('Test', input_dict, exec_properties))

      # Test fetch_previous_result_artifacts.
      new_output_artifact = types.TfxArtifact(type_name='ExamplesPath')
      self.assertNotEqual(types.ARTIFACT_STATE_PUBLISHED,
                          new_output_artifact.state)
      new_output_dict = {'output': [new_output_artifact]}
      updated_output_dict = m.fetch_previous_result_artifacts(
          new_output_dict, eid)
      previous_artifact = output_dict['output'][-1].artifact
      current_artifact = updated_output_dict['output'][-1].artifact
      self.assertEqual(types.ARTIFACT_STATE_PUBLISHED,
                       current_artifact.properties['state'].string_value)
      self.assertEqual(previous_artifact.id, current_artifact.id)
      self.assertEqual(previous_artifact.type_id, current_artifact.type_id)
Esempio n. 6
0
  def setUp(self):
    self._source_data_dir = os.path.join(
        os.path.dirname(os.path.dirname(__file__)), 'testdata')
    self._output_data_dir = os.path.join(
        os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
        self._testMethodName)
    tf.gfile.MakeDirs(self._output_data_dir)
    self._model_export = types.TfxArtifact(type_name='ModelExportPath')
    self._model_export.uri = os.path.join(self._source_data_dir,
                                          'trainer/current/')
    self._model_blessing = types.TfxArtifact(type_name='ModelBlessingPath')
    self._input_dict = {
        'model_export': [self._model_export],
        'model_blessing': [self._model_blessing],
    }

    self._model_push = types.TfxArtifact(type_name='ModelPushPath')
    self._model_push.uri = os.path.join(self._output_data_dir, 'model_push')
    tf.gfile.MakeDirs(self._model_push.uri)
    self._output_dict = {
        'model_push': [self._model_push],
    }
    self._serving_model_dir = os.path.join(self._output_data_dir,
                                           'serving_model_dir')
    tf.gfile.MakeDirs(self._serving_model_dir)
    self._exec_properties = {
        'push_destination':
            json_format.MessageToJson(
                pusher_pb2.PushDestination(
                    filesystem=pusher_pb2.PushDestination.Filesystem(
                        base_directory=self._serving_model_dir))),
    }
    self._executor = executor.Executor()
Esempio n. 7
0
 def setUp(self):
     self._mock_metadata = tf.test.mock.Mock()
     self._input_dict = {
         'input_data': [types.TfxArtifact(type_name='InputType')],
     }
     input_dir = os.path.join(
         os.environ.get('TEST_TMP_DIR', self.get_temp_dir()),
         self._testMethodName, 'input_dir')
     # valid input artifacts must have a uri pointing to an existing directory.
     for key, input_list in self._input_dict.items():
         for index, artifact in enumerate(input_list):
             artifact.id = index + 1
             uri = os.path.join(input_dir, key, str(artifact.id), '')
             artifact.uri = uri
             tf.gfile.MakeDirs(uri)
     self._output_dict = {
         'output_data': [types.TfxArtifact(type_name='OutputType')],
     }
     self._exec_properties = {
         'key': 'value',
     }
     self._base_output_dir = os.path.join(
         os.environ.get('TEST_TMP_DIR', self.get_temp_dir()),
         self._testMethodName, 'base_output_dir')
     self._driver_args = data_types.DriverArgs(
         worker_name='worker_name',
         base_output_dir=self._base_output_dir,
         enable_cache=True)
     self._execution_id = 100
Esempio n. 8
0
    def test_do(self):
        source_data_dir = os.path.join(
            os.path.dirname(os.path.dirname(__file__)), 'testdata')

        train_stats_artifact = types.TfxArtifact('ExampleStatsPath',
                                                 split='train')
        train_stats_artifact.uri = os.path.join(source_data_dir,
                                                'statistics_gen/train/')

        output_data_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)

        schema_output = types.TfxArtifact('SchemaPath')
        schema_output.uri = os.path.join(output_data_dir, 'schema_output')

        input_dict = {
            'stats': [train_stats_artifact],
        }
        output_dict = {
            'output': [schema_output],
        }

        exec_properties = {}

        schema_gen_executor = executor.Executor()
        schema_gen_executor.Do(input_dict, output_dict, exec_properties)
        self.assertNotEqual(0, len(tf.gfile.ListDirectory(schema_output.uri)))
Esempio n. 9
0
 def test_valid_channel(self):
     instance_a = types.TfxArtifact('MyTypeName')
     instance_b = types.TfxArtifact('MyTypeName')
     chnl = channel.Channel('MyTypeName',
                            artifacts=[instance_a, instance_b])
     self.assertEqual(chnl.type_name, 'MyTypeName')
     self.assertItemsEqual(chnl.get(), [instance_a, instance_b])
Esempio n. 10
0
 def test_channel_as_channel_success(self):
     instance_a = types.TfxArtifact('MyTypeName')
     instance_b = types.TfxArtifact('MyTypeName')
     chnl_original = channel.Channel('MyTypeName',
                                     artifacts=[instance_a, instance_b])
     chnl_result = channel.as_channel(chnl_original)
     self.assertEqual(chnl_original, chnl_result)
Esempio n. 11
0
 def setUp(self):
     self._mock_metadata = tf.test.mock.Mock()
     self._input_dict = {
         'input_data':
         channel.Channel(
             type_name='input_data',
             artifacts=[types.TfxArtifact(type_name='input_data')])
     }
     input_dir = os.path.join(
         os.environ.get('TEST_TMP_DIR', self.get_temp_dir()),
         self._testMethodName, 'input_dir')
     # valid input artifacts must have a uri pointing to an existing directory.
     for key, input_channel in self._input_dict.items():
         for index, artifact in enumerate(input_channel.get()):
             artifact.id = index + 1
             uri = os.path.join(input_dir, key, str(artifact.id), '')
             artifact.uri = uri
             tf.gfile.MakeDirs(uri)
     self._output_dict = {
         'output_data':
         channel.Channel(type_name='output_data',
                         artifacts=[
                             types.TfxArtifact(type_name='output_data',
                                               split='split')
                         ])
     }
     self._input_artifacts = channel.unwrap_channel_dict(self._input_dict)
     self._output_artifacts = {
         'output_data': [types.TfxArtifact(type_name='OutputType')],
     }
     self._exec_properties = {
         'key': 'value',
     }
     self._execution_id = 100
Esempio n. 12
0
 def test_invalid_channel_type(self):
     instance_a = types.TfxArtifact('MyTypeName')
     instance_b = types.TfxArtifact('MyTypeName')
     with self.assertRaises(ValueError):
         channel.Channel(
             'AnotherTypeName',
             static_artifact_collection=[instance_a, instance_b])
Esempio n. 13
0
    def test_pipeline_with_artifact_info(self):
        artifacts_collection = [types.TfxArtifact('channel_one')]
        channel_one = channel.Channel(type_name='channel_one',
                                      artifacts=artifacts_collection)
        component_a = _make_fake_component_instance(
            name='component_a', inputs={}, outputs={'one': channel_one})
        component_b = _make_fake_component_instance(
            name='component_b',
            inputs={
                'a': component_a.outputs.one,
            },
            outputs={})

        my_pipeline = pipeline.Pipeline(
            pipeline_name='a',
            pipeline_root='b',
            components=[component_b, component_a],
            metadata_connection_config=self._metadata_connection_config)
        expected_artifact = types.TfxArtifact('channel_one')
        expected_artifact.name = 'one'
        expected_artifact.pipeline_name = 'a'
        expected_artifact.pipeline_timestamp_ms = 0
        expected_artifact.producer_component = 'component_a'
        self.assertItemsEqual(my_pipeline.components,
                              [component_a, component_b])
        self.assertEqual(component_a.outputs.one._artifacts[0].pipeline_name,
                         'a')
        self.assertEqual(
            component_a.outputs.one._artifacts[0].producer_component,
            component_a.component_id)
        self.assertEqual(component_a.outputs.one._artifacts[0].name, 'one')
        self.assertEqual(component_b.inputs.a._artifacts[0].pipeline_name, 'a')
        self.assertEqual(component_b.inputs.a._artifacts[0].producer_component,
                         component_a.component_id)
        self.assertEqual(component_b.inputs.a._artifacts[0].name, 'one')
Esempio n. 14
0
    def test_do(self):
        source_data_dir = os.path.join(
            os.path.dirname(os.path.dirname(__file__)), 'testdata')
        output_data_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)

        # Create input dict.
        train_examples = types.TfxArtifact(type_name='ExamplesPath',
                                           split='train')
        train_examples.uri = os.path.join(
            source_data_dir, 'transform/transformed_examples/train/')
        eval_examples = types.TfxArtifact(type_name='ExamplesPath',
                                          split='eval')
        eval_examples.uri = os.path.join(
            source_data_dir, 'transform/transformed_examples/eval/')
        transform_output = types.TfxArtifact(type_name='TransformPath')
        transform_output.uri = os.path.join(source_data_dir,
                                            'transform/transform_output/')
        schema = types.TfxArtifact(type_name='ExamplesPath')
        schema.uri = os.path.join(source_data_dir, 'schema_gen/')

        input_dict = {
            'transformed_examples': [train_examples, eval_examples],
            'transform_output': [transform_output],
            'schema': [schema],
        }

        # Create output dict.
        model_exports = types.TfxArtifact(type_name='ModelExportPath')
        model_exports.uri = os.path.join(output_data_dir, 'model_export_path')
        output_dict = {'output': [model_exports]}

        # Create exec properties.
        module_file_path = os.path.join(source_data_dir, 'module_file',
                                        'trainer_module.py')

        exec_properties = {
            'train_args':
            json_format.MessageToJson(trainer_pb2.TrainArgs(num_steps=1000)),
            'eval_args':
            json_format.MessageToJson(trainer_pb2.EvalArgs(num_steps=500)),
            'module_file':
            module_file_path,
            'warm_starting':
            False,
        }

        trainer_executor = executor.Executor()
        trainer_executor.Do(input_dict=input_dict,
                            output_dict=output_dict,
                            exec_properties=exec_properties)

        # Check outputs.
        self.assertTrue(
            tf.gfile.Exists(os.path.join(model_exports.uri, 'eval_model_dir')))
        self.assertTrue(
            tf.gfile.Exists(
                os.path.join(model_exports.uri, 'serving_model_dir')))
Esempio n. 15
0
 def test_construct(self):
     examples = types.TfxArtifact(type_name='ExamplesPath')
     model = types.TfxArtifact(type_name='ModelExportPath')
     model_validator = component.ModelValidator(
         examples=channel.as_channel([examples]),
         model=channel.as_channel([model]))
     self.assertEqual('ModelBlessingPath',
                      model_validator.outputs.blessing.type_name)
Esempio n. 16
0
    def test_execution(self):
        with Metadata(connection_config=self._connection_config,
                      logger=self._logger) as m:

            # Test prepare_execution.
            exec_properties = {}
            eid = m.prepare_execution('Test', exec_properties)
            [execution] = m.store.get_executions()
            self.assertProtoEquals(
                """
        id: 1
        type_id: 1
        properties {
          key: "state"
          value {
            string_value: "new"
          }
        }""", execution)

            # Test publish_execution.
            input_artifact = types.TfxArtifact(type_name='ExamplesPath')
            m.publish_artifacts([input_artifact])
            output_artifact = types.TfxArtifact(type_name='ExamplesPath')
            input_dict = {'input': [input_artifact]}
            output_dict = {'output': [output_artifact]}
            m.publish_execution(eid, input_dict, output_dict)
            # Make sure artifacts in output_dict are published.
            self.assertEqual(types.ARTIFACT_STATE_PUBLISHED,
                             output_artifact.state)
            # Make sure execution state are changed.
            [execution] = m.store.get_executions_by_id([eid])
            self.assertEqual('complete',
                             execution.properties['state'].string_value)
            # Make sure events are published.
            events = m.store.get_events_by_execution_ids([eid])
            self.assertEqual(2, len(events))
            self.assertEqual(input_artifact.id, events[0].artifact_id)
            self.assertEqual(metadata_store_pb2.Event.DECLARED_INPUT,
                             events[0].type)
            self.assertProtoEquals(
                """
          steps {
            key: "input"
          }
          steps {
            index: 0
          }""", events[0].path)
            self.assertEqual(output_artifact.id, events[1].artifact_id)
            self.assertEqual(metadata_store_pb2.Event.DECLARED_OUTPUT,
                             events[1].type)
            self.assertProtoEquals(
                """
          steps {
            key: "output"
          }
          steps {
            index: 0
          }""", events[1].path)
Esempio n. 17
0
    def test_run(self, mock_publisher):
        mock_publisher.return_value.publish_execution.return_value = {}

        example_gen = FileBasedExampleGen(
            executor_class=avro_executor.Executor,
            input_base=external_input(self.avro_dir_path),
            input_config=self.input_config,
            output_config=self.output_config,
            name='AvroExampleGenComponent')

        output_data_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)
        pipeline_root = os.path.join(output_data_dir, 'Test')
        tf.gfile.MakeDirs(pipeline_root)
        pipeline_info = data_types.PipelineInfo(pipeline_name='Test',
                                                pipeline_root=pipeline_root,
                                                run_id='123')

        driver_args = data_types.DriverArgs(enable_cache=True)

        connection_config = metadata_store_pb2.ConnectionConfig()
        connection_config.sqlite.SetInParent()

        launcher = component_launcher.ComponentLauncher(
            component=example_gen,
            pipeline_info=pipeline_info,
            driver_args=driver_args,
            metadata_connection_config=connection_config,
            additional_pipeline_args={})
        self.assertEqual(
            launcher._component_info.component_type, '.'.join(
                [FileBasedExampleGen.__module__,
                 FileBasedExampleGen.__name__]))

        launcher.launch()
        mock_publisher.return_value.publish_execution.assert_called_once()

        # Get output paths.
        component_id = '.'.join([example_gen.component_name, example_gen.name])
        output_path = os.path.join(pipeline_root, component_id, 'examples/1')
        train_examples = types.TfxArtifact(type_name='ExamplesPath',
                                           split='train')
        train_examples.uri = os.path.join(output_path, 'train')
        eval_examples = types.TfxArtifact(type_name='ExamplesPath',
                                          split='eval')
        eval_examples.uri = os.path.join(output_path, 'eval')

        # Check Avro example gen outputs.
        train_output_file = os.path.join(train_examples.uri,
                                         'data_tfrecord-00000-of-00001.gz')
        eval_output_file = os.path.join(eval_examples.uri,
                                        'data_tfrecord-00000-of-00001.gz')
        self.assertTrue(tf.gfile.Exists(train_output_file))
        self.assertTrue(tf.gfile.Exists(eval_output_file))
        self.assertGreater(
            tf.gfile.GFile(train_output_file).size(),
            tf.gfile.GFile(eval_output_file).size())
Esempio n. 18
0
 def test_unwrap_channel_dict(self):
     instance_a = types.TfxArtifact('MyTypeName')
     instance_b = types.TfxArtifact('MyTypeName')
     channel_dict = {
         'id': channel.Channel('MyTypeName',
                               artifacts=[instance_a, instance_b])
     }
     result = channel.unwrap_channel_dict(channel_dict)
     self.assertDictEqual(result, {'id': [instance_a, instance_b]})
Esempio n. 19
0
    def test_fetch_previous_result(self):
        with metadata.Metadata(connection_config=self._connection_config) as m:

            # Create an 'previous' execution.
            exec_properties = {'log_root': 'path'}
            eid = m.register_execution(exec_properties=exec_properties,
                                       pipeline_info=self._pipeline_info,
                                       component_info=self._component_info)
            input_artifact = types.TfxArtifact(type_name='ExamplesPath')
            m.publish_artifacts([input_artifact])
            output_artifact = types.TfxArtifact(type_name='ExamplesPath')
            input_artifacts = {'input': [input_artifact]}
            output_artifacts = {'output': [output_artifact]}
            m.publish_execution(eid, input_artifacts, output_artifacts)

            # Test previous_run.
            self.assertEqual(
                None,
                m.previous_execution(input_artifacts=input_artifacts,
                                     exec_properties={},
                                     pipeline_info=self._pipeline_info,
                                     component_info=self._component_info))
            self.assertEqual(
                None,
                m.previous_execution(input_artifacts={},
                                     exec_properties=exec_properties,
                                     pipeline_info=self._pipeline_info,
                                     component_info=self._component_info))
            self.assertEqual(
                None,
                m.previous_execution(input_artifacts=input_artifacts,
                                     exec_properties=exec_properties,
                                     pipeline_info=self._pipeline_info,
                                     component_info=data_types.ComponentInfo(
                                         component_id='unique',
                                         component_type='a.b.c')))
            self.assertEqual(
                eid,
                m.previous_execution(input_artifacts=input_artifacts,
                                     exec_properties=exec_properties,
                                     pipeline_info=self._pipeline_info,
                                     component_info=self._component_info))

            # Test fetch_previous_result_artifacts.
            new_output_artifact = types.TfxArtifact(type_name='ExamplesPath')
            self.assertNotEqual(types.ARTIFACT_STATE_PUBLISHED,
                                new_output_artifact.state)
            new_output_dict = {'output': [new_output_artifact]}
            updated_output_dict = m.fetch_previous_result_artifacts(
                new_output_dict, eid)
            previous_artifact = output_artifacts['output'][-1].artifact
            current_artifact = updated_output_dict['output'][-1].artifact
            self.assertEqual(types.ARTIFACT_STATE_PUBLISHED,
                             current_artifact.properties['state'].string_value)
            self.assertEqual(previous_artifact.id, current_artifact.id)
            self.assertEqual(previous_artifact.type_id,
                             current_artifact.type_id)
Esempio n. 20
0
 def test_construct(self):
     train_examples = types.TfxArtifact(type_name='ExamplesPath',
                                        split='train')
     eval_examples = types.TfxArtifact(type_name='ExamplesPath',
                                       split='eval')
     statistics_gen = component.StatisticsGen(
         input_data=channel.as_channel([train_examples, eval_examples]))
     self.assertEqual('ExampleStatisticsPath',
                      statistics_gen.outputs.output.type_name)
Esempio n. 21
0
 def setUp(self):
     self.input_one = types.TfxArtifact('INPUT_ONE')
     self.input_one.source = airflow_component._OrchestrationSource(
         'input_one_key', 'input_one_component_id')
     self.output_one = types.TfxArtifact('OUTPUT_ONE')
     self.output_one.source = airflow_component._OrchestrationSource(
         'output_one_key', 'output_one_component_id')
     self.input_one_json = json.dumps([self.input_one.json_dict()])
     self.output_one_json = json.dumps([self.output_one.json_dict()])
     self._logger_config = logging_utils.LoggerConfig()
Esempio n. 22
0
 def setUp(self):
     self._mock_metadata = tf.test.mock.Mock()
     self._mock_metadata.publish_execution = tf.test.mock.Mock()
     self._input_dict = {
         'input_data': [types.TfxArtifact(type_name='InputType')],
     }
     self._output_dict = {
         'output_data': [types.TfxArtifact(type_name='OutputType')],
     }
     self._execution_id = 100
Esempio n. 23
0
 def test_construct_without_transform_output(self):
   transformed_examples = types.TfxArtifact(type_name='ExamplesPath')
   schema = types.TfxArtifact(type_name='SchemaPath')
   trainer = component.Trainer(
       module_file='/path/to/module/file',
       examples=channel.as_channel([transformed_examples]),
       schema=channel.as_channel([schema]),
       train_args=trainer_pb2.TrainArgs(num_steps=100),
       eval_args=trainer_pb2.EvalArgs(num_steps=50))
   self.assertEqual('ModelExportPath', trainer.outputs.output.type_name)
Esempio n. 24
0
 def test_construct(self):
     model_export = types.TfxArtifact(type_name='ModelExportPath')
     model_blessing = types.TfxArtifact(type_name='ModelBlessingPath')
     pusher = component.Pusher(
         model_export=channel.as_channel([model_export]),
         model_blessing=channel.as_channel([model_blessing]),
         push_destination=pusher_pb2.PushDestination(
             filesystem=pusher_pb2.PushDestination.Filesystem(
                 base_directory='push_destination')))
     self.assertEqual('ModelPushPath', pusher.outputs.model_push.type_name)
Esempio n. 25
0
 def test_construct(self):
     example_validator = component.ExampleValidator(
         stats=channel.as_channel([
             types.TfxArtifact(type_name='ExampleStatisticsPath',
                               split='eval')
         ]),
         schema=channel.as_channel(
             [types.TfxArtifact(type_name='SchemaPath')]),
     )
     self.assertEqual('ExampleValidationPath',
                      example_validator.outputs.output.type_name)
Esempio n. 26
0
 def test_construct_with_slice_spec(self):
     examples = types.TfxArtifact(type_name='ExamplesPath')
     model_exports = types.TfxArtifact(type_name='ModelExportPath')
     evaluator = component.Evaluator(
         examples=channel.as_channel([examples]),
         model_exports=channel.as_channel([model_exports]),
         feature_slicing_spec=evaluator_pb2.FeatureSlicingSpec(specs=[
             evaluator_pb2.SingleSlicingSpec(
                 column_for_slicing=['trip_start_hour'])
         ]))
     self.assertEqual('ModelEvalPath', evaluator.outputs.output.type_name)
Esempio n. 27
0
    def testDo(self, mock_client):
        # Mock query result schema for _BigQueryConverter.
        mock_client.return_value.query.return_value.result.return_value.schema = self._schema

        output_data_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)

        # Create output dict.
        train_examples = types.TfxArtifact(type_name='ExamplesPath',
                                           split='train')
        train_examples.uri = os.path.join(output_data_dir, 'train')
        eval_examples = types.TfxArtifact(type_name='ExamplesPath',
                                          split='eval')
        eval_examples.uri = os.path.join(output_data_dir, 'eval')
        output_dict = {'examples': [train_examples, eval_examples]}

        # Create exe properties.
        exec_properties = {
            'input_config':
            json_format.MessageToJson(
                example_gen_pb2.Input(splits=[
                    example_gen_pb2.Input.Split(
                        name='bq', pattern='SELECT i, f, s FROM `fake`'),
                ])),
            'output_config':
            json_format.MessageToJson(
                example_gen_pb2.Output(
                    split_config=example_gen_pb2.SplitConfig(splits=[
                        example_gen_pb2.SplitConfig.Split(name='train',
                                                          hash_buckets=2),
                        example_gen_pb2.SplitConfig.Split(name='eval',
                                                          hash_buckets=1)
                    ])))
        }

        # Run executor.
        big_query_example_gen = executor.Executor()
        big_query_example_gen.Do({}, output_dict, exec_properties)

        # Check BigQuery example gen outputs.
        train_output_file = os.path.join(train_examples.uri,
                                         'data_tfrecord-00000-of-00001.gz')
        eval_output_file = os.path.join(eval_examples.uri,
                                        'data_tfrecord-00000-of-00001.gz')
        self.assertTrue(tf.gfile.Exists(train_output_file))
        self.assertTrue(tf.gfile.Exists(eval_output_file))
        self.assertGreater(
            tf.gfile.GFile(train_output_file).size(),
            tf.gfile.GFile(eval_output_file).size())
Esempio n. 28
0
    def test_do(self):
        source_data_dir = os.path.join(
            os.path.dirname(os.path.dirname(__file__)), 'testdata')
        output_data_dir = os.path.join(
            os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()),
            self._testMethodName)

        # Create input dict.
        train_examples = types.TfxArtifact(type_name='ExamplesPath',
                                           split='train')
        eval_examples = types.TfxArtifact(type_name='ExamplesPath',
                                          split='eval')
        eval_examples.uri = os.path.join(source_data_dir,
                                         'csv_example_gen/eval/')
        model_exports = types.TfxArtifact(type_name='ModelExportPath')
        model_exports.uri = os.path.join(source_data_dir, 'trainer/current/')
        input_dict = {
            'examples': [train_examples, eval_examples],
            'model_exports': [model_exports],
        }

        # Create output dict.
        eval_output = types.TfxArtifact('ModelEvalPath')
        eval_output.uri = os.path.join(output_data_dir, 'eval_output')
        output_dict = {'output': [eval_output]}

        # Create exec proterties.
        exec_properties = {
            'feature_slicing_spec':
            json_format.MessageToJson(
                evaluator_pb2.FeatureSlicingSpec(specs=[
                    evaluator_pb2.SingleSlicingSpec(
                        column_for_slicing=['trip_start_hour']),
                    evaluator_pb2.SingleSlicingSpec(
                        column_for_slicing=['trip_start_day', 'trip_miles']),
                ]))
        }

        # Run executor.
        evaluator = executor.Executor()
        evaluator.Do(input_dict, output_dict, exec_properties)

        # Check evaluator outputs.
        self.assertTrue(
            tf.gfile.Exists(os.path.join(eval_output.uri, 'eval_config')))
        self.assertTrue(
            tf.gfile.Exists(os.path.join(eval_output.uri, 'metrics')))
        self.assertTrue(tf.gfile.Exists(os.path.join(eval_output.uri,
                                                     'plots')))
Esempio n. 29
0
    def __init__(self,
                 input_config: example_gen_pb2.Input,
                 output_config: Optional[example_gen_pb2.Output] = None,
                 component_name: Optional[Text] = 'ExampleGen',
                 example_artifacts: Optional[channel.Channel] = None,
                 name: Optional[Text] = None):
        """Construct an QueryBasedExampleGen component.

    Args:
      input_config: An example_gen_pb2.Input instance, providing input
        configuration.
      output_config: An example_gen_pb2.Output instance, providing output
        configuration. If unset, default splits will be 'train' and 'eval' with
        size 2:1.
      component_name: Name of the component, should be unique per component
        class. Default to 'ExampleGen', can be overwritten by sub-classes.
      example_artifacts: Optional channel of 'ExamplesPath' for output train and
        eval examples.
      name: Unique name for every component class instance.
    """
        # Configure outputs.
        output_config = output_config or utils.make_default_output_config(
            input_config)
        example_artifacts = example_artifacts or channel.as_channel([
            types.TfxArtifact('ExamplesPath', split=split_name)
            for split_name in utils.generate_output_split_names(
                input_config, output_config)
        ])
        spec = QueryBasedExampleGenSpec(component_name=component_name,
                                        input_config=input_config,
                                        output_config=output_config,
                                        examples=example_artifacts)
        super(_QueryBasedExampleGen, self).__init__(spec=spec, name=name)
Esempio n. 30
0
    def __init__(self,
                 model_export: channel.Channel,
                 model_blessing: channel.Channel,
                 slack_token: Text,
                 channel_id: Text,
                 timeout_sec: int,
                 slack_blessing: Optional[channel.Channel] = None,
                 name: Optional[Text] = None):
        """Construct a SlackComponent.

        Args:
          model_export: A Channel of 'ModelExportPath' type, usually produced by
            Trainer component.
          model_blessing: A Channel of 'ModelBlessingPath' type, usually produced by
            ModelValidator component.
          slack_token: A token used for setting up connection with Slack server.
          channel_id: Slack channel id to communicate on.
          timeout_sec: Seconds to wait for response before default to reject.
          slack_blessing: Optional output channel of 'ModelBlessingPath' with result
            of blessing; will be created for you if not specified.
          name: Optional unique name. Necessary if multiple Pusher components are
            declared in the same pipeline.
        """
        slack_blessing = slack_blessing or channel.Channel(
            type_name='ModelBlessingPath',
            artifacts=[types.TfxArtifact('ModelBlessingPath')])
        spec = SlackComponentSpec(slack_token=slack_token,
                                  channel_id=channel_id,
                                  timeout_sec=timeout_sec,
                                  model_export=model_export,
                                  model_blessing=model_blessing,
                                  slack_blessing=slack_blessing)
        super(SlackComponent, self).__init__(spec=spec, name=name)