class MetaFeatureGen(base_component.BaseComponent): """Custom MetaFeatureGen that generated meta-features for the dataset.""" SPEC_CLASS = MetaFeatureGenSpec EXECUTOR_SPEC = executor_spec.ExecutorClassSpec( executor.MetaFeatureGenExecutor) def __init__(self, statistics: types.Channel = None, transformed_examples: Optional[types.Channel] = None, custom_config: Optional[Dict[str, Any]] = None, instance_name: Optional[str] = None): """Construct a MetaFeatureGen component. Args: statistics: Output channel from StatisticsGen. transformed_examples: Optional channel from tfx Transform component. custom_config: Optional dict which contains addtional parameters. instance_name: Optional unique instance name. Necessary if multiple MetaFeatureGen components are declared in the same pipeline. """ metafeatures = types.Channel(type=artifacts.MetaFeatures, artifacts=[artifacts.MetaFeatures()]) spec = MetaFeatureGenSpec(metafeatures=metafeatures, transformed_examples=transformed_examples, statistics=statistics, custom_config=custom_config) super(MetaFeatureGen, self).__init__(spec=spec, instance_name=instance_name)
def __init__(self, model: Optional[types.Channel] = None, model_blessing: Optional[types.Channel] = None, infra_blessing: Optional[types.Channel] = None, custom_config: Optional[Dict[str, Any]] = None, pushed_model: Optional[types.Channel] = None): """Construct a Pusher component. Args: model: An optional Channel of type `standard_artifacts.Model`, usually produced by a Trainer component. model_blessing: An optional Channel of type `standard_artifacts.ModelBlessing`, usually produced from an Evaluator component. infra_blessing: An optional Channel of type `standard_artifacts.InfraBlessing`, usually produced from an InfraValidator component. custom_config: A dict which contains the deployment job parameters to be passed to cloud-based training platforms. The [Kubeflow example]( https://github.com/tensorflow/tfx/blob/6ff57e36a7b65818d4598d41e584a42584d361e6/tfx/examples/chicago_taxi_pipeline/taxi_pipeline_kubeflow_gcp.py#L278-L285) contains an example how this can be used by custom executors. pushed_model: Optional output `standard_artifacts.PushedModel` channel with result of push. """ super(Pusher, self).__init__( model=model, model_blessing=model_blessing, infra_blessing=infra_blessing, push_destination=None, custom_config=custom_config, custom_executor_spec=executor_spec.ExecutorClassSpec( executor.Executor), pushed_model=pushed_model)
class PartitionGen(base_component.BaseComponent): """The PartitionGen component. It breaks down data into independent partitions that can be manipulated independently. """ SPEC_CLASS = PartitionSpec EXECUTOR_SPEC = executor_spec.ExecutorClassSpec(Executor) def __init__(self, statistics: types.Channel, schema: types.Channel, examples: types.Channel, partitions: Optional[types.Channel] = None, instance_name: Optional[Text] = None): if not partitions: partitions_artifact = Partitions() partitions = types.Channel(type=Partitions, artifacts=[partitions_artifact]) spec = PartitionSpec(statistics=statistics, schema=schema, examples=examples, partitions=partitions) super().__init__(spec=spec, instance_name=instance_name)
class SchemaGen(base_component.BaseComponent): """A TFX SchemaGen component to generate a schema from the training data. The SchemaGen component uses [TensorFlow Data Validation](https://www.tensorflow.org/tfx/data_validation) to generate a schema from input statistics. The following TFX libraries use the schema: - TensorFlow Data Validation - TensorFlow Transform - TensorFlow Model Analysis In a typical TFX pipeline, the SchemaGen component generates a schema which is is consumed by the other pipeline components. Please see https://www.tensorflow.org/tfx/data_validation for more details. ## Example ``` # Generates schema based on statistics files. infer_schema = SchemaGen(statistics=statistics_gen.outputs['statistics']) ``` """ # TODO(b/123941608): Update pydoc about how to use a user provided schema SPEC_CLASS = SchemaGenSpec EXECUTOR_SPEC = executor_spec.ExecutorClassSpec(executor.Executor) def __init__(self, statistics: Optional[types.Channel] = None, infer_feature_shape: Optional[bool] = False, output: Optional[types.Channel] = None, stats: Optional[types.Channel] = None, instance_name: Optional[Text] = None): """Constructs a SchemaGen component. Args: statistics: A Channel of `ExampleStatistics` type (required if spec is not passed). This should contain at least a `train` split. Other splits are currently ignored. _required_ infer_feature_shape: Boolean value indicating whether or not to infer the shape of features. If the feature shape is not inferred, downstream Tensorflow Transform component using the schema will parse input as tf.SparseTensor. output: Output `Schema` channel for schema result. stats: Backwards compatibility alias for the 'statistics' argument. instance_name: Optional name assigned to this specific instance of SchemaGen. Required only if multiple SchemaGen components are declared in the same pipeline. Either `statistics` or `stats` must be present in the input arguments. """ statistics = statistics or stats output = output or types.Channel( type=standard_artifacts.Schema, artifacts=[standard_artifacts.Schema()]) spec = SchemaGenSpec( stats=statistics, infer_feature_shape=infer_feature_shape, output=output) super(SchemaGen, self).__init__(spec=spec, instance_name=instance_name)
def testComponentCustomExecutor(self): class EmptyComponentSpec(types.ComponentSpec): PARAMETERS = {} INPUTS = {} OUTPUTS = {} class MyComponent(base_component.BaseComponent): SPEC_CLASS = EmptyComponentSpec EXECUTOR_SPEC = executor_spec.ExecutorClassSpec( base_executor.BaseExecutor) class MyCustomExecutor(base_executor.BaseExecutor): pass custom_executor_component = MyComponent( spec=EmptyComponentSpec(), custom_executor_spec=executor_spec.ExecutorClassSpec( MyCustomExecutor)) self.assertEqual( custom_executor_component.executor_spec.executor_class, MyCustomExecutor) with self.assertRaisesRegexp(TypeError, "should be an instance of ExecutorSpec"): MyComponent(spec=EmptyComponentSpec(), custom_executor_spec=object)
class TrainGPT2(base_component.BaseComponent): SPEC_CLASS = TrainGPT2Spec EXECUTOR_SPEC = executor_spec.ExecutorClassSpec(Executor) def __init__(self, dataset_dir: types.Channel, checkpoint_dir: types.Channel, encoding_dir: types.Channel, model_name: Text, train_config: Dict, encoding: Text = 'utf-8', end_token: Text = ""): trained_checkpoint_dir = external_input("TrainGPT2") sample_dir = external_input("TrainGPT2") tensorboard_dir = external_input("TrainGPT2") hyperparameter_dir = external_input("TrainGPT2") metric_dir = external_input("TrainGPT2") spec = TrainGPT2Spec(dataset_dir=dataset_dir, checkpoint_dir=checkpoint_dir, encoding_dir=encoding_dir, model_name=model_name, train_config=train_config, encoding=encoding, trained_checkpoint_dir=trained_checkpoint_dir, sample_dir=sample_dir, hyperparameter_dir=hyperparameter_dir, metric_dir=metric_dir, tensorboard_dir=tensorboard_dir, end_token=end_token) super(TrainGPT2, self).__init__(spec=spec)
class ExampleValidator(base_component.BaseComponent): SPEC_CLASS = ExampleValidatorSpec EXECUTOR_SPEC = executor_spec.ExecutorClassSpec(Executor) def __init__(self, statistics: types.Channel = None, schema: types.Channel = None, output: Optional[types.Channel] = None, stats: Optional[types.Channel] = None, instance_name: Optional[Text] = None): """An ExampleValidator component for examples. TFX has its ExampleValidator component, and this one uses the same executor. The reason for this one to exist, is that the TFX component does not allow to specify the splits to use, it just assumes `train` and `eval`. This component will be unnecessary once TFX Transform allows to set the input and output splits as other components do""" if stats: logging.warning( 'The "stats" argument to the StatisticsGen component has ' 'been renamed to "statistics" and is deprecated. Please update' ' your usage as support for this argument will be removed' ' soon.') statistics = stats anomalies = output or types.Channel( type=standard_artifacts.ExampleAnomalies, artifacts=[standard_artifacts.ExampleAnomalies()]) spec = ExampleValidatorSpec(statistics=statistics, schema=schema, anomalies=anomalies) super(ExampleValidator, self).__init__(spec=spec, instance_name=instance_name)
class IndexEvaluator(base_component.BaseComponent): SPEC_CLASS = IndexEvaluatorSpec EXECUTOR_SPEC = executor_spec.ExecutorClassSpec( ScaNNIndexEvaluatorExecutor) def __init__(self, examples: types.channel, schema: types.channel, model: types.channel, min_recall: float, max_latency: float, evaluation: Optional[types.Channel] = None, blessing: Optional[types.Channel] = None, instance_name=None): blessing = blessing or types.Channel( type=standard_artifacts.ModelBlessing, artifacts=[standard_artifacts.ModelBlessing()]) evaluation = evaluation or types.Channel( type=standard_artifacts.ModelEvaluation, artifacts=[standard_artifacts.ModelEvaluation()]) spec = IndexEvaluatorSpec(examples=examples, schema=schema, model=model, evaluation=evaluation, blessing=blessing, min_recall=min_recall, max_latency=max_latency) super().__init__(spec=spec, instance_name=instance_name)
class CloudAIBulkInferrerComponent(base_component.BaseComponent): """A Cloud AI component to do batch inference on a remote hosted model. BulkInferrer component will push a model to Google Cloud AI Platform, consume examples data, send request to the remote hosted model, and produces the inference results to an external location as PredictionLog proto. After inference, it will delete the model from Google Cloud AI Platform. TODO(b/155325467): Creates a end-to-end test for this component. """ SPEC_CLASS = CloudAIBulkInferrerComponentSpec EXECUTOR_SPEC = executor_spec.ExecutorClassSpec(executor.Executor) def __init__(self, examples: types.Channel = None, model: Optional[types.Channel] = None, model_blessing: Optional[types.Channel] = None, data_spec: Optional[Union[bulk_inferrer_pb2.DataSpec, Dict[Text, Any]]] = None, custom_config: Dict[Text, Any] = None, inference_result: Optional[types.Channel] = None, instance_name: Optional[Text] = None): """Construct an BulkInferrer component. Args: examples: A Channel of type `standard_artifacts.Examples`, usually produced by an ExampleGen component. _required_ model: A Channel of type `standard_artifacts.Model`, usually produced by a Trainer component. model_blessing: A Channel of type `standard_artifacts.ModelBlessing`, usually produced by a ModelValidator component. data_spec: bulk_inferrer_pb2.DataSpec instance that describes data selection. If any field is provided as a RuntimeParameter, data_spec should be constructed as a dict with the same field names as DataSpec proto message. custom_config: A dict which contains the deployment job parameters to be passed to Google Cloud AI Platform. custom_config.ai_platform_serving_args need to contain the serving job parameters. For the full set of parameters, refer to https://cloud.google.com/ml-engine/reference/rest/v1/projects.models inference_result: Channel of type `standard_artifacts.InferenceResult` to store the inference results. instance_name: Optional name assigned to this specific instance of BulkInferrer. Required only if multiple BulkInferrer components are declared in the same pipeline. """ inference_result = inference_result or types.Channel( type=standard_artifacts.InferenceResult, artifacts=[standard_artifacts.InferenceResult()]) spec = CloudAIBulkInferrerComponentSpec( examples=examples, model=model, model_blessing=model_blessing, data_spec=data_spec or bulk_inferrer_pb2.DataSpec(), custom_config=json_utils.dumps(custom_config), inference_result=inference_result) super(CloudAIBulkInferrerComponent, self).__init__( spec=spec, instance_name=instance_name)
class _FakeComponent(base_component.BaseComponent): SPEC_CLASS = types.ComponentSpec EXECUTOR_SPEC = executor_spec.ExecutorClassSpec(base_executor.BaseExecutor) def __init__(self, spec: types.ComponentSpec): super(_FakeComponent, self).__init__(spec=spec)
def testAIPlatformTrainerPipeline(self): """Trainer-only test pipeline on AI Platform Training.""" pipeline_name = 'kubeflow-aip-trainer-test-{}'.format( self._random_id()) pipeline = self._create_pipeline(pipeline_name, [ Trainer(custom_executor_spec=executor_spec.ExecutorClassSpec( ai_platform_trainer_executor.Executor), module_file=self._taxi_module_file, transformed_examples=self._input_artifacts( pipeline_name, self._test_transformed_examples), schema=self._input_artifacts(pipeline_name, self._test_schema), transform_output=self._input_artifacts( pipeline_name, self._test_transform_graph), train_args=trainer_pb2.TrainArgs(num_steps=10000), eval_args=trainer_pb2.EvalArgs(num_steps=5000), custom_config={ 'ai_platform_training_args': { 'project': self._gcp_project_id, 'region': self._gcp_region, 'jobDir': os.path.join(self._pipeline_root(pipeline_name), 'tmp'), 'masterConfig': { 'imageUri': self._container_image, } } }), ]) self._compile_and_run_pipeline(pipeline)
def testAIPlatformTrainerPipeline(self): """Trainer-only test pipeline on AI Platform Training.""" pipeline_name = 'kubeflow-aip-trainer-test-{}'.format( self._random_id()) pipeline = self._create_pipeline(pipeline_name, [ self.schema_importer, self.transformed_examples_importer, self.transform_graph_importer, Trainer(custom_executor_spec=executor_spec.ExecutorClassSpec( ai_platform_trainer_executor.Executor), module_file=self._taxi_module_file, transformed_examples=self.transformed_examples_importer. outputs['result'], schema=self.schema_importer.outputs['result'], transform_graph=self.transform_graph_importer. outputs['result'], train_args=trainer_pb2.TrainArgs(num_steps=10), eval_args=trainer_pb2.EvalArgs(num_steps=5), custom_config={ ai_platform_trainer_executor.TRAINING_ARGS: { 'project': self._gcp_project_id, 'region': self._gcp_region, 'jobDir': os.path.join(self._pipeline_root(pipeline_name), 'tmp'), 'masterConfig': { 'imageUri': self._container_image, } } }), ]) self._compile_and_run_pipeline(pipeline)
class UpdateMongoNews(base_component.BaseComponent): SPEC_CLASS = UpdateMongoNewsSpec EXECUTOR_SPEC = executor_spec.ExecutorClassSpec(Executor) def __init__(self, ip: Text = None, port: Text = None, username: Text = None, password: Text = None, dbname: Text = None, updated_collections: List = [], update_collections: List = []): if not ip: ip = "mongo" if not port: port = "27017" if not username: username = os.environ['MONGO_ROOT_USER'] if not password: password = os.environ['MONGO_ROOT_PASSWORD'] if not dbname: dbname = os.environ['MONGO_DATABASE_NAME'] spec = UpdateMongoNewsSpec(ip=ip, port=port, username=username, password=password, dbname=dbname, update_collections=update_collections, updated_collections=updated_collections, backup_dir="") super(UpdateMongoNews, self).__init__(spec=spec)
def testAIPlatformGenericTrainerPipeline(self): """Trainer-only pipeline on AI Platform Training with GenericTrainer.""" pipeline_name = 'kubeflow-aip-generic-trainer-test-{}'.format( self._random_id()) pipeline = self._create_pipeline(pipeline_name, [ self.schema_importer, self.transformed_examples_importer, self.transform_graph_importer, Trainer( custom_executor_spec=executor_spec.ExecutorClassSpec( ai_platform_trainer_executor.GenericExecutor), module_file=self._trainer_module, transformed_examples=self.transformed_examples_importer .outputs['result'], schema=self.schema_importer.outputs['result'], transform_graph=self.transform_graph_importer.outputs['result'], train_args=trainer_pb2.TrainArgs(num_steps=10), eval_args=trainer_pb2.EvalArgs(num_steps=5), custom_config={ ai_platform_trainer_executor.TRAINING_ARGS_KEY: self.getCaipTrainingArgs(pipeline_name) }) ]) self._compile_and_run_pipeline(pipeline) self.assertNumberOfTrainerOutputIsOne(pipeline_name)
class MongoImport(base_component.BaseComponent): SPEC_CLASS = MongoImportSpec EXECUTOR_SPEC = executor_spec.ExecutorClassSpec(Executor) def __init__(self, rss_feed: types.Channel, colname: Text, ip: Text = None, port: Text = None, username: Text = None, password: Text = None, dbname: Text = None): if not ip: ip = "127.0.0.1" if not port: port = "27017" if not username: username = os.environ['MONGO_ROOT_USER'] if not password: password = os.environ['MONGO_ROOT_PASSWORD'] if not dbname: dbname = os.environ['MONGO_DATABASE_NAME'] spec = MongoImportSpec(ip=ip, port=port, username=username, password=password, dbname=dbname, rss_feed=rss_feed, colname=colname) super(MongoImport, self).__init__(spec=spec)
class TestPredComponent(base_component.BaseComponent): """Custom TFX Hello World Component. This custom component class consists of only a constructor. """ SPEC_CLASS = TestPredComponentSpec EXECUTOR_SPEC = executor_spec.ExecutorClassSpec(executor.Executor) def __init__(self, examples: types.Channel = None, model: types.Channel = None, output_data: types.Channel = None): """Construct a HelloComponent. Args: input_data: A Channel of type `standard_artifacts.String`. output_data: A Channel of type `standard_artifacts.String`. name: Optional unique name. Necessary if multiple Hello components are declared in the same pipeline. """ if not output_data: examples_artifact = standard_artifacts.Examples() output_data = channel_utils.as_channel([examples_artifact]) spec = TestPredComponentSpec(examples=examples, model=model, output_data=output_data) super(TestPredComponent, self).__init__(spec=spec)
def testCanLaunch(self): self.assertTrue( docker_component_launcher.DockerComponentLauncher.can_launch( executor_spec.ExecutorContainerSpec(image='test'))) self.assertFalse( docker_component_launcher.DockerComponentLauncher.can_launch( executor_spec.ExecutorClassSpec(base_executor.BaseExecutor)))
class HelloComponent(base_component.BaseComponent): """Custom TFX Hello World Component. This custom component class consists of only a constructor. """ SPEC_CLASS = HelloComponentSpec EXECUTOR_SPEC = executor_spec.ExecutorClassSpec(executor.Executor) def __init__(self, input_data: types.Channel = None, output_data: Optional[types.Channel] = None, instance_name: Optional[Text] = None): """Construct a HelloComponent. Args: input_data: A Channel of type `standard_artifacts.InferenceResult`. output_data: A Channel of type `standard_artifacts.ExternalArtifact`. instance_name: Optional unique name. Necessary if multiple Hello components are declared in the same pipeline. """ if not output_data: examples_artifact = standard_artifacts.ExternalArtifact() output_data = channel_utils.as_channel([examples_artifact]) spec = HelloComponentSpec(input_data=input_data, output_data=output_data) super(HelloComponent, self).__init__(spec=spec, instance_name=instance_name)
class OldNewsImport(base_component.BaseComponent): SPEC_CLASS = OldNewsImportSpec EXECUTOR_SPEC = executor_spec.ExecutorClassSpec(Executor) def __init__(self, backup_dir: Text, ip: Text = None, port: Text = None, username: Text = None, password: Text = None, dbname: Text = None): if not ip: ip = "mongo" if not port: port = "27017" if not username: username = os.environ['MONGO_ROOT_USER'] if not password: password = os.environ['MONGO_ROOT_PASSWORD'] if not dbname: dbname = os.environ['MONGO_DATABASE_NAME'] spec = OldNewsImportSpec(ip=ip, port=port, username=username, password=password, dbname=dbname, backup_dir=backup_dir) super(OldNewsImport, self).__init__(spec=spec)
def __init__( self, instance_name: Optional[Text] = None, executor_spec: Optional[executor_spec_module.ExecutorSpec] = None, driver_class: Optional[Type[base_driver.BaseDriver]] = None, ): """Initialize a node. Args: instance_name: Optional unique identifying name for this instance of node in the pipeline. Required if two instances of the same node are used in the pipeline. executor_spec: Optional instance of executor_spec.ExecutorSpec which describes how to execute this node (optional, defaults to an empty executor indicates no-op. driver_class: Optional subclass of base_driver.BaseDriver as a custom driver for this node (optional, defaults to base_driver.BaseDriver). Nodes usually use the default driver class, but may override it. """ if executor_spec is None: executor_spec = executor_spec_module.ExecutorClassSpec( base_executor.EmptyExecutor) if driver_class is None: driver_class = base_driver.BaseDriver self._instance_name = instance_name self.executor_spec = executor_spec self.driver_class = driver_class self._upstream_nodes = set() self._downstream_nodes = set()
class _QueryBasedExampleGen(base_component.BaseComponent): """A TFX component to ingest examples from a file system. The _QueryBasedExampleGen component can be extended to ingest examples from query based systems such as Presto or Bigquery. The component will also convert the input data into tf.record](https://www.tensorflow.org/tutorials/load_data/tf_records) and generate train and eval example splits for downsteam components. ## Example ``` _query = "SELECT * FROM `bigquery-public-data.chicago_taxi_trips.taxi_trips`" # Brings data into the pipeline or otherwise joins/converts training data. example_gen = BigQueryExampleGen(query=_query) ``` """ SPEC_CLASS = QueryBasedExampleGenSpec # EXECUTOR_SPEC should be overridden by subclasses. EXECUTOR_SPEC = executor_spec.ExecutorClassSpec(base_executor.BaseExecutor) def __init__(self, input_config: example_gen_pb2.Input, output_config: Optional[example_gen_pb2.Output] = None, custom_config: Optional[example_gen_pb2.CustomConfig] = None, example_artifacts: Optional[types.Channel] = None, instance_name: Optional[Text] = None): """Construct an QueryBasedExampleGen component. Args: input_config: An [example_gen_pb2.Input](https://github.com/tensorflow/tfx/blob/master/tfx/proto/example_gen.proto) instance, providing input configuration. _required_ output_config: An [example_gen_pb2.Output](https://github.com/tensorflow/tfx/blob/master/tfx/proto/example_gen.proto) instance, providing output configuration. If unset, the default splits will be labeled as 'train' and 'eval' with a distribution ratio of 2:1. custom_config: An [example_gen_pb2.CustomConfig](https://github.com/tensorflow/tfx/blob/master/tfx/proto/example_gen.proto) instance, providing custom configuration for ExampleGen. example_artifacts: Channel of 'ExamplesPath' for output train and eval examples. instance_name: Optional unique instance name. Required only if multiple ExampleGen components are declared in the same pipeline. """ # Configure outputs. output_config = output_config or utils.make_default_output_config( input_config) example_artifacts = example_artifacts or channel_utils.as_channel([ standard_artifacts.Examples(split=split_name) for split_name in utils.generate_output_split_names( input_config, output_config) ]) spec = QueryBasedExampleGenSpec( input_config=input_config, output_config=output_config, custom_config=custom_config, examples=example_artifacts) super(_QueryBasedExampleGen, self).__init__( spec=spec, instance_name=instance_name)
def testRun(self, mock_publisher): mock_publisher.return_value.publish_execution.return_value = {} example_gen = FileBasedExampleGen( custom_executor_spec=executor_spec.ExecutorClassSpec( avro_executor.Executor), input=external_input(self.avro_dir_path), input_config=self.input_config, output_config=self.output_config, instance_name='AvroExampleGen') output_data_dir = os.path.join( os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()), self._testMethodName) pipeline_root = os.path.join(output_data_dir, 'Test') tf.io.gfile.makedirs(pipeline_root) pipeline_info = data_types.PipelineInfo(pipeline_name='Test', pipeline_root=pipeline_root, run_id='123') driver_args = data_types.DriverArgs(enable_cache=True) connection_config = metadata_store_pb2.ConnectionConfig() connection_config.sqlite.SetInParent() metadata_connection = metadata.Metadata(connection_config) launcher = in_process_component_launcher.InProcessComponentLauncher.create( component=example_gen, pipeline_info=pipeline_info, driver_args=driver_args, metadata_connection=metadata_connection, beam_pipeline_args=[], additional_pipeline_args={}) self.assertEqual( launcher._component_info.component_type, '.'.join( [FileBasedExampleGen.__module__, FileBasedExampleGen.__name__])) launcher.launch() mock_publisher.return_value.publish_execution.assert_called_once() # Get output paths. component_id = example_gen.id output_path = os.path.join(pipeline_root, component_id, 'examples/1') examples = standard_artifacts.Examples() examples.uri = output_path examples.split_names = artifact_utils.encode_split_names( ['train', 'eval']) # Check Avro example gen outputs. train_output_file = os.path.join(examples.uri, 'train', 'data_tfrecord-00000-of-00001.gz') eval_output_file = os.path.join(examples.uri, 'eval', 'data_tfrecord-00000-of-00001.gz') self.assertTrue(tf.io.gfile.exists(train_output_file)) self.assertTrue(tf.io.gfile.exists(eval_output_file)) self.assertGreater( tf.io.gfile.GFile(train_output_file).size(), tf.io.gfile.GFile(eval_output_file).size())
class BulkInferrer(base_component.BaseComponent): """A TFX component to do batch inference on a model with unlabelled examples. BulkInferrer consumes examples data and a model, and produces the inference results to an external location as PredictionLog proto. BulkInferrer will infer on validated model. ## Example ``` # Uses BulkInferrer to inference on examples. bulk_inferrer = BulkInferrer( examples=example_gen.outputs['examples'], model=trainer.outputs['output']) ``` """ SPEC_CLASS = BulkInferrerSpec EXECUTOR_SPEC = executor_spec.ExecutorClassSpec(executor.Executor) def __init__(self, examples: types.Channel = None, model_export: Optional[types.Channel] = None, model_blessing: Optional[types.Channel] = None, data_spec: Optional[bulk_inferrer_pb2.DataSpec] = None, model_spec: Optional[bulk_inferrer_pb2.ModelSpec] = None, output: Optional[types.Channel] = None, instance_name: Optional[Text] = None): """Construct an BulkInferrer component. Args: examples: A Channel of 'ExamplesPath' type, usually produced by ExampleGen component. _required_ model_export: A Channel of 'ModelExportPath' type, usually produced by Trainer component. model_blessing: A Channel of 'ModelBlessingPath' type, usually produced by Model Validator component. data_spec: bulk_inferrer_pb2.DataSpec instance that describes data selection. model_spec: bulk_inferrer_pb2.ModelSpec instance that describes model specification. output: Channel of `InferenceResult` to store the inference results. instance_name: Optional name assigned to this specific instance of BulkInferrer. Required only if multiple BulkInferrer components are declared in the same pipeline. """ output = output or types.Channel( type=standard_artifacts.InferenceResult, artifacts=[standard_artifacts.InferenceResult()]) spec = BulkInferrerSpec(examples=examples, model_export=model_export, model_blessing=model_blessing, data_spec=data_spec or bulk_inferrer_pb2.DataSpec(), model_spec=model_spec or bulk_inferrer_pb2.ModelSpec(), output=output) super(BulkInferrer, self).__init__(spec=spec, instance_name=instance_name)
def testConstructNoDestinationCustomExecutor(self): pusher = component.Pusher( model_export=self.model_export, model_blessing=self.model_blessing, custom_executor_spec=executor_spec.ExecutorClassSpec( self._MyCustomPusherExecutor), ) self.assertEqual('ModelPushPath', pusher.outputs['model_push'].type_name)
def testConstructCustomExecutor(self): example_gen = component.FileBasedExampleGen( input_base='path', custom_executor_spec=executor_spec.ExecutorClassSpec( TestExampleGenExecutor)) self.assertEqual(driver.Driver, example_gen.driver_class) self.assertEqual(standard_artifacts.Examples.TYPE_NAME, example_gen.outputs['examples'].type_name)
class EmptyComponent(base_component.BaseComponent): SPEC_CLASS = EmptyComponentSpec EXECUTOR_SPEC = executor_spec.ExecutorClassSpec(base_executor.BaseExecutor) def __init__(self, name): super(EmptyComponent, self).__init__(spec=EmptyComponentSpec(), instance_name=name)
def testEnableCache(self): input_base = standard_artifacts.ExternalArtifact() custom_config = example_gen_pb2.CustomConfig( custom_config=any_pb2.Any()) example_gen_1 = component.FileBasedExampleGen( input=channel_utils.as_channel([input_base]), custom_config=custom_config, custom_executor_spec=executor_spec.ExecutorClassSpec( TestExampleGenExecutor)) self.assertEqual(None, example_gen_1.enable_cache) example_gen_2 = component.FileBasedExampleGen( input=channel_utils.as_channel([input_base]), custom_config=custom_config, custom_executor_spec=executor_spec.ExecutorClassSpec( TestExampleGenExecutor), enable_cache=True) self.assertEqual(True, example_gen_2.enable_cache)
class Tuner(tuner_component.Tuner): """TFX component for model hyperparameter tuning on AI Platform Training.""" # TODO(b/160260359): Decide if custom_executor_spec should be added to # TunerSpec, or deprecate other use of custom_executor_spec # and the interface to swap Executor for a component # entirely, to standarize around custom components. EXECUTOR_SPEC = executor_spec.ExecutorClassSpec(executor.Executor)
class Tuner(base_component.BaseComponent): """A TFX component for model hyperparameter tuning.""" SPEC_CLASS = TunerSpec EXECUTOR_SPEC = executor_spec.ExecutorClassSpec(executor.Executor) def __init__(self, examples: types.Channel = None, schema: types.Channel = None, module_file: Optional[Text] = None, tuner_fn: Optional[Text] = None, model: Optional[types.Channel] = None, best_hyperparameters: Optional[types.Channel] = None, instance_name: Optional[Text] = None): """Construct a Tuner component. Args: examples: A Channel of type `standard_artifacts.Examples`, serving as the source of examples that are used in tuning (required). Transformed examples are not yet supported. schema: A Channel of type `standard_artifacts.Schema`, serving as the schema of training and eval data. module_file: A path to python module file containing UDF KerasTuner definition. Exactly one of 'module_file' or 'tuner_fn' must be supplied. The module_file must implement a function named `tuner_fn` at its top level. The function takes working dir path, train data path, eval data path and tensorflow_metadata.proto.v0.schema_pb2.Schema and generates a namedtuple TunerFnResult which contains: - 'tuner': A KerasTuner that will be used for tuning. - 'train_dataset': A tf.data.Dataset of training data. - 'eval_dataset': A tf.data.Dataset of eval data. tuner_fn: A python path to UDF model definition function. See 'module_file' for the required signature of the UDF. Exactly one of 'module_file' or 'tuner_fn' must be supplied. model: Optional Channel of type `standard_artifacts.Model` for result of best model. best_hyperparameters: Optional Channel of type `standard_artifacts.HyperParameters` for result of the best hparams. instance_name: Optional unique instance name. Necessary if multiple Tuner components are declared in the same pipeline. """ if bool(module_file) == bool(tuner_fn): raise ValueError( "Exactly one of 'module_file' or 'tuner_fn' must be supplied") model = model or types.Channel(type=standard_artifacts.Model, artifacts=[standard_artifacts.Model()]) best_hyperparameters = best_hyperparameters or types.Channel( type=standard_artifacts.HyperParameters, artifacts=[standard_artifacts.HyperParameters()]) spec = TunerSpec(examples=examples, schema=schema, module_file=module_file, tuner_fn=tuner_fn, model_export_path=model, best_hyperparameters=best_hyperparameters) super(Tuner, self).__init__(spec=spec, instance_name=instance_name)
def testConstructNoDestinationCustomExecutor(self): pusher = component.Pusher( model=self.model, model_blessing=self.model_blessing, custom_executor_spec=executor_spec.ExecutorClassSpec( self._MyCustomPusherExecutor), ) self.assertEqual(standard_artifacts.PushedModel.TYPE_NAME, pusher.outputs['pushed_model'].type_name)