def __init__(self, input_data: types.Channel = None, output_data: types.Channel = None, name: Optional[Text] = None): """Construct a HelloComponent. Args: input_data: A Channel of type `standard_artifacts.Examples`. This will often contain two splits: 'train', and 'eval'. output_data: A Channel of type `standard_artifacts.Examples`. This will usually contain the same splits as input_data. name: Optional unique name. Necessary if multiple Hello components are declared in the same pipeline. """ # output_data will contain a list of Channels for each split of the data, # by default a 'train' split and an 'eval' split. Since HelloComponent # passes the input data through to output, the splits in output_data will # be the same as the splits in input_data, which were generated by the # upstream component. if not output_data: examples_artifact = standard_artifacts.Examples() examples_artifact.split_names = input_data.get()[0].split_names output_data = channel_utils.as_channel([examples_artifact]) spec = HelloComponentSpec(input_data=input_data, output_data=output_data, name=name) super(HelloComponent, self).__init__(spec=spec)
def __init__(self, statistics: types.Channel = None, schema: types.Channel = None, exclude_splits: Optional[List[Text]] = None, output: Optional[types.Channel] = None, stats: Optional[types.Channel] = None, instance_name: Optional[Text] = None): """Construct an ExampleValidator component. Args: statistics: A Channel of type `standard_artifacts.ExampleStatistics`. This should contain at least 'eval' split. Other splits are currently ignored. schema: A Channel of type `standard_artifacts.Schema`. _required_ exclude_splits: Names of splits that the example validator should not validate. Default behavior (when exclude_splits is set to None) is excluding no splits. output: Output channel of type `standard_artifacts.ExampleAnomalies`. stats: Backwards compatibility alias for the 'statistics' argument. instance_name: Optional name assigned to this specific instance of ExampleValidator. Required only if multiple ExampleValidator components are declared in the same pipeline. Either `stats` or `statistics` must be present in the arguments. """ if stats: logging.warning( 'The "stats" argument to the StatisticsGen component has ' 'been renamed to "statistics" and is deprecated. Please update your ' 'usage as support for this argument will be removed soon.') statistics = stats if exclude_splits is None: exclude_splits = [] logging.info('Excluding no splits because exclude_splits is not set.') anomalies = output if not anomalies: anomalies_artifact = standard_artifacts.ExampleAnomalies() statistics_split_names = artifact_utils.decode_split_names( artifact_utils.get_single_instance(list( statistics.get())).split_names) split_names = [ split for split in statistics_split_names if split not in exclude_splits ] anomalies_artifact.split_names = artifact_utils.encode_split_names( split_names) anomalies = types.Channel( type=standard_artifacts.ExampleAnomalies, artifacts=[anomalies_artifact]) spec = ExampleValidatorSpec( statistics=statistics, schema=schema, exclude_splits=json_utils.dumps(exclude_splits), anomalies=anomalies) super(ExampleValidator, self).__init__( spec=spec, instance_name=instance_name)
def __init__(self, examples: types.Channel = None, schema: Optional[types.Channel] = None, stats_options: Optional[tfdv.StatsOptions] = None, output: Optional[types.Channel] = None, input_data: Optional[types.Channel] = None, instance_name: Optional[Text] = None, enable_cache: Optional[bool] = None): """Construct a StatisticsGen component. Args: examples: A Channel of `ExamplesPath` type, likely generated by the [ExampleGen component](https://www.tensorflow.org/tfx/guide/examplegen). This needs to contain two splits labeled `train` and `eval`. _required_ schema: A `Schema` channel to use for automatically configuring the value of stats options passed to TFDV. stats_options: The StatsOptions instance to configure optional TFDV behavior. When stats_options.schema is set, it will be used instead of the `schema` channel input. Due to the requirement that stats_options be serialized, the slicer functions and custom stats generators are dropped and are therefore not usable. output: `ExampleStatisticsPath` channel for statistics of each split provided in the input examples. input_data: Backwards compatibility alias for the `examples` argument. instance_name: Optional name assigned to this specific instance of StatisticsGen. Required only if multiple StatisticsGen components are declared in the same pipeline. enable_cache: Optional boolean to indicate if cache is enabled for the StatisticsGen component. If not specified, defaults to the value specified for pipeline's enable_cache parameter. """ if input_data: absl.logging.warning( 'The "input_data" argument to the StatisticsGen component has ' 'been renamed to "examples" and is deprecated. Please update your ' 'usage as support for this argument will be removed soon.') examples = input_data if not output: statistics_artifact = standard_artifacts.ExampleStatistics() statistics_artifact.split_names = artifact_utils.get_single_instance( list(examples.get())).split_names output = types.Channel(type=standard_artifacts.ExampleStatistics, artifacts=[statistics_artifact]) # TODO(b/150802589): Move jsonable interface to tfx_bsl and use json_utils. stats_options_json = stats_options.to_json() if stats_options else None spec = StatisticsGenSpec(examples=examples, schema=schema, stats_options_json=stats_options_json, statistics=output) super(StatisticsGen, self).__init__(spec=spec, instance_name=instance_name, enable_cache=enable_cache)
def __init__(self, input_examples: types.Channel, data_view: types.Channel, output_examples: Optional[types.Channel] = None, instance_name: Optional[Text] = None): if not output_examples: output_artifact = standard_artifacts.Examples() output_artifact.copy_from( artifact_utils.get_single_instance(list(input_examples.get()))) output_examples = channel_utils.as_channel([output_artifact]) spec = _DataViewBinderComponentSpec( input_examples=input_examples, data_view=data_view, output_examples=output_examples) super().__init__(spec=spec, instance_name=instance_name)