Example #1
0
def get_tfxio_factory_from_artifact(
    examples: artifact.Artifact,
    telemetry_descriptors: List[Text],
    schema: Optional[schema_pb2.Schema] = None,
    read_as_raw_records: bool = False,
    raw_record_column_name: Optional[Text] = None
) -> Callable[[Text], tfxio.TFXIO]:
    """Returns a factory function that creates a proper TFXIO.

  Args:
    examples: The Examples artifact that the TFXIO is intended to access.
    telemetry_descriptors: A set of descriptors that identify the component
      that is instantiating the TFXIO. These will be used to construct the
      namespace to contain metrics for profiling and are therefore expected to
      be identifiers of the component itself and not individual instances of
      source use.
    schema: TFMD schema. Note that without a schema, some TFXIO interfaces
      in certain TFXIO implementations might not be available.
    read_as_raw_records: If True, ignore the payload type of `examples`. Always
      use RawTfRecord TFXIO.
    raw_record_column_name: If provided, the arrow RecordBatch produced by
      the TFXIO will contain a string column of the given name, and the contents
      of that column will be the raw records. Note that not all TFXIO supports
      this option, and an error will be raised in that case. Required if
      read_as_raw_records == True.

  Returns:
    A function that takes a file pattern as input and returns a TFXIO
    instance.

  Raises:
    NotImplementedError: when given an unsupported example payload type.
  """
    assert examples.type is standard_artifacts.Examples, (
        'examples must be of type standard_artifacts.Examples')
    # In case that the payload format custom property is not set.
    # Assume tf.Example.
    payload_format = examples_utils.get_payload_format(examples)
    data_view_uri = None
    if payload_format == example_gen_pb2.PayloadFormat.FORMAT_PROTO:
        data_view_uri = examples.get_string_custom_property(
            constants.DATA_VIEW_URI_PROPERTY_KEY)
        if not data_view_uri:
            data_view_uri = None
    return lambda file_pattern: make_tfxio(  # pylint:disable=g-long-lambda
        file_pattern=file_pattern,
        telemetry_descriptors=telemetry_descriptors,
        payload_format=payload_format,
        data_view_uri=data_view_uri,
        schema=schema,
        read_as_raw_records=read_as_raw_records,
        raw_record_column_name=raw_record_column_name)
Example #2
0
def _get_data_view_info(
        examples: artifact.Artifact) -> Optional[Tuple[str, int]]:
    """Returns the payload format and data view URI and ID from examples."""
    assert examples.type is standard_artifacts.Examples, (
        'examples must be of type standard_artifacts.Examples')
    payload_format = examples_utils.get_payload_format(examples)
    if payload_format == example_gen_pb2.PayloadFormat.FORMAT_PROTO:
        data_view_uri = examples.get_string_custom_property(
            constants.DATA_VIEW_URI_PROPERTY_KEY)
        if data_view_uri:
            data_view_create_time = examples.get_int_custom_property(
                constants.DATA_VIEW_CREATE_TIME_KEY)
            return data_view_uri, data_view_create_time

    return None
Example #3
0
def is_artifact_version_older_than(artifact: Artifact,
                                   artifact_version: Text) -> bool:
  """Check if artifact belongs to old version."""
  if artifact.mlmd_artifact.state == metadata_store_pb2.Artifact.UNKNOWN:
    # Newly generated artifact should use the latest artifact payload format.
    return False

  # For artifact that resolved from MLMD.
  if not artifact.has_custom_property(ARTIFACT_TFX_VERSION_CUSTOM_PROPERTY_KEY):
    # Artifact without version.
    return True

  if (version.parse(
      artifact.get_string_custom_property(
          ARTIFACT_TFX_VERSION_CUSTOM_PROPERTY_KEY)) <
      version.parse(artifact_version)):
    # Artifact with old version.
    return True
  else:
    return False