Beispiel #1
0
  def __init__(self,
               statistics: types.Channel = None,
               schema: types.Channel = None,
               exclude_splits: Optional[List[Text]] = None,
               anomalies: Optional[Text] = None,
               instance_name: Optional[Text] = None):
    """Construct an ExampleValidator component.

    Args:
      statistics: A Channel of type `standard_artifacts.ExampleStatistics`.
      schema: A Channel of type `standard_artifacts.Schema`. _required_
      exclude_splits: Names of splits that the example validator should not
        validate. Default behavior (when exclude_splits is set to None)
        is excluding no splits.
      anomalies: Output channel of type `standard_artifacts.ExampleAnomalies`.
      instance_name: Optional name assigned to this specific instance of
        ExampleValidator. Required only if multiple ExampleValidator components
        are declared in the same pipeline.  Either `stats` or `statistics` must
        be present in the arguments.
    """
    if exclude_splits is None:
      exclude_splits = []
      logging.info('Excluding no splits because exclude_splits is not set.')
    if not anomalies:
      anomalies = types.Channel(type=standard_artifacts.ExampleAnomalies)
    spec = ExampleValidatorSpec(
        statistics=statistics,
        schema=schema,
        exclude_splits=json_utils.dumps(exclude_splits),
        anomalies=anomalies)
    super(ExampleValidator, self).__init__(
        spec=spec, instance_name=instance_name)
Beispiel #2
0
    def __init__(self,
                 stats: types.Channel = None,
                 schema: types.Channel = None,
                 output: Optional[types.Channel] = None,
                 statistics: Optional[types.Channel] = None,
                 instance_name: Optional[Text] = None):
        """Construct an ExampleValidator component.

    Args:
      stats: A Channel of 'ExampleStatisticsPath` type. This should contain at
        least 'eval' split. Other splits are ignored currently.  Will be
        deprecated in the future for the `statistics` parameter.
      schema: A Channel of "SchemaPath' type. _required_
      output: Output channel of 'ExampleValidationPath' type.
      statistics: Future replacement of the 'stats' argument.
      instance_name: Optional name assigned to this specific instance of
        ExampleValidator. Required only if multiple ExampleValidator components
        are declared in the same pipeline.

    Either `stats` or `statistics` must be present in the arguments.
    """
        stats = stats or statistics
        output = output or types.Channel(
            type=standard_artifacts.ExampleAnomalies,
            artifacts=[standard_artifacts.ExampleAnomalies()])
        spec = ExampleValidatorSpec(stats=stats, schema=schema, output=output)
        super(ExampleValidator, self).__init__(spec=spec,
                                               instance_name=instance_name)
Beispiel #3
0
    def __init__(self,
                 statistics: types.Channel = None,
                 schema: types.Channel = None,
                 output: Optional[types.Channel] = None,
                 stats: Optional[types.Channel] = None,
                 instance_name: Optional[Text] = None):
        """Construct an ExampleValidator component.

    Args:
      statistics: A Channel of type `standard_artifacts.ExampleStatistics`.
        This should contain at least 'eval' split. Other splits are currently
        ignored.
      schema: A Channel of type `standard_artifacts.Schema`. _required_
      output: Output channel of type `standard_artifacts.ExampleAnomalies`.
      stats: Backwards compatibility alias for the 'statistics' argument.
      instance_name: Optional name assigned to this specific instance of
        ExampleValidator. Required only if multiple ExampleValidator components
        are declared in the same pipeline.

    Either `stats` or `statistics` must be present in the arguments.
    """
        statistics = statistics or stats
        output = output or types.Channel(
            type=standard_artifacts.ExampleAnomalies,
            artifacts=[standard_artifacts.ExampleAnomalies()])
        spec = ExampleValidatorSpec(stats=statistics,
                                    schema=schema,
                                    output=output)
        super(ExampleValidator, self).__init__(spec=spec,
                                               instance_name=instance_name)
Beispiel #4
0
    def __init__(self,
                 statistics: types.Channel = None,
                 schema: types.Channel = None,
                 output: Optional[types.Channel] = None,
                 stats: Optional[types.Channel] = None,
                 instance_name: Optional[Text] = None):
        """An ExampleValidator component for examples.

     TFX has its ExampleValidator component, and this one uses the same
     executor. The reason for this one to exist, is that the TFX
     component does not allow to specify the splits to use, it just
     assumes `train` and `eval`. This component will be unnecessary
     once TFX Transform allows to set the input and output splits
     as other components do"""

        if stats:
            logging.warning(
                'The "stats" argument to the StatisticsGen component has '
                'been renamed to "statistics" and is deprecated. Please update'
                ' your usage as support for this argument will be removed'
                ' soon.')
            statistics = stats
        anomalies = output or types.Channel(
            type=standard_artifacts.ExampleAnomalies,
            artifacts=[standard_artifacts.ExampleAnomalies()])
        spec = ExampleValidatorSpec(statistics=statistics,
                                    schema=schema,
                                    anomalies=anomalies)
        super(ExampleValidator, self).__init__(spec=spec,
                                               instance_name=instance_name)
Beispiel #5
0
  def __init__(self,
               statistics: types.Channel = None,
               schema: types.Channel = None,
               exclude_splits: Optional[List[Text]] = None,
               output: Optional[types.Channel] = None,
               stats: Optional[types.Channel] = None,
               instance_name: Optional[Text] = None):
    """Construct an ExampleValidator component.

    Args:
      statistics: A Channel of type `standard_artifacts.ExampleStatistics`. This
        should contain at least 'eval' split. Other splits are currently
        ignored.
      schema: A Channel of type `standard_artifacts.Schema`. _required_
      exclude_splits: Names of splits that the example validator should not
        validate. Default behavior (when exclude_splits is set to None)
        is excluding no splits.
      output: Output channel of type `standard_artifacts.ExampleAnomalies`.
      stats: Backwards compatibility alias for the 'statistics' argument.
      instance_name: Optional name assigned to this specific instance of
        ExampleValidator. Required only if multiple ExampleValidator components
        are declared in the same pipeline.  Either `stats` or `statistics` must
        be present in the arguments.
    """
    if stats:
      logging.warning(
          'The "stats" argument to the StatisticsGen component has '
          'been renamed to "statistics" and is deprecated. Please update your '
          'usage as support for this argument will be removed soon.')
      statistics = stats
    if exclude_splits is None:
      exclude_splits = []
      logging.info('Excluding no splits because exclude_splits is not set.')
    anomalies = output
    if not anomalies:
      anomalies_artifact = standard_artifacts.ExampleAnomalies()
      statistics_split_names = artifact_utils.decode_split_names(
          artifact_utils.get_single_instance(list(
              statistics.get())).split_names)
      split_names = [
          split for split in statistics_split_names
          if split not in exclude_splits
      ]
      anomalies_artifact.split_names = artifact_utils.encode_split_names(
          split_names)
      anomalies = types.Channel(
          type=standard_artifacts.ExampleAnomalies,
          artifacts=[anomalies_artifact])
    spec = ExampleValidatorSpec(
        statistics=statistics,
        schema=schema,
        exclude_splits=json_utils.dumps(exclude_splits),
        anomalies=anomalies)
    super(ExampleValidator, self).__init__(
        spec=spec, instance_name=instance_name)
Beispiel #6
0
    def __init__(self,
                 stats: types.Channel,
                 schema: types.Channel,
                 output: Optional[types.Channel] = None,
                 name: Optional[Text] = None):
        """Construct an ExampleValidator component.

    Args:
      stats: A Channel of 'ExampleStatisticsPath' type. This should contain at
        least 'eval' split. Other splits are ignored currently.
      schema: A Channel of "SchemaPath' type.
      output: Optional output channel of 'ExampleValidationPath' type.
      name: Optional unique name. Necessary iff multiple ExampleValidator
        components are declared in the same pipeline.
    """
        output = output or types.Channel(
            type=standard_artifacts.ExampleAnomalies,
            artifacts=[standard_artifacts.ExampleAnomalies()])
        spec = ExampleValidatorSpec(stats=stats, schema=schema, output=output)
        super(ExampleValidator, self).__init__(spec=spec, name=name)
Beispiel #7
0
    def __init__(self,
                 statistics: types.Channel = None,
                 schema: types.Channel = None,
                 output: Optional[types.Channel] = None,
                 stats: Optional[types.Channel] = None,
                 instance_name: Optional[Text] = None,
                 enable_cache: Optional[bool] = None):
        """Construct an ExampleValidator component.

    Args:
      statistics: A Channel of type `standard_artifacts.ExampleStatistics`. This
        should contain at least 'eval' split. Other splits are currently
        ignored.
      schema: A Channel of type `standard_artifacts.Schema`. _required_
      output: Output channel of type `standard_artifacts.ExampleAnomalies`.
      stats: Backwards compatibility alias for the 'statistics' argument.
      instance_name: Optional name assigned to this specific instance of
        ExampleValidator. Required only if multiple ExampleValidator components
        are declared in the same pipeline.  Either `stats` or `statistics` must
        be present in the arguments.
      enable_cache: Optional boolean to indicate if cache is enabled for the
        ExampleValidator component. If not specified, defaults to the value
        specified for pipeline's enable_cache parameter.
    """
        if stats:
            absl.logging.warning(
                'The "stats" argument to the StatisticsGen component has '
                'been renamed to "statistics" and is deprecated. Please update your '
                'usage as support for this argument will be removed soon.')
            statistics = stats
        anomalies = output or types.Channel(
            type=standard_artifacts.ExampleAnomalies,
            artifacts=[standard_artifacts.ExampleAnomalies()])
        spec = ExampleValidatorSpec(statistics=statistics,
                                    schema=schema,
                                    anomalies=anomalies)
        super(ExampleValidator, self).__init__(spec=spec,
                                               instance_name=instance_name,
                                               enable_cache=enable_cache)
Beispiel #8
0
    def __init__(self,
                 statistics: types.Channel = None,
                 schema: types.Channel = None,
                 exclude_splits: Optional[List[Text]] = None):
        """Construct an ExampleValidator component.

    Args:
      statistics: A Channel of type `standard_artifacts.ExampleStatistics`.
      schema: A Channel of type `standard_artifacts.Schema`. _required_
      exclude_splits: Names of splits that the example validator should not
        validate. Default behavior (when exclude_splits is set to None)
        is excluding no splits.
    """
        if exclude_splits is None:
            exclude_splits = []
            logging.info(
                'Excluding no splits because exclude_splits is not set.')
        anomalies = types.Channel(type=standard_artifacts.ExampleAnomalies)
        spec = ExampleValidatorSpec(
            statistics=statistics,
            schema=schema,
            exclude_splits=json_utils.dumps(exclude_splits),
            anomalies=anomalies)
        super(ExampleValidator, self).__init__(spec=spec)