Exemple #1
0
  def testDetermineReadyTensorsAndTableInitializers(
      self, create_graph_fn, feeds, replaced_tensors_ready, should_be_ready,
      num_ready_table_initializers):
    """Test determine_ready_tensors_and_table_initializers.

    Args:
      create_graph_fn: A function that adds ops to a graph and returns a dict
          mapping tensor names to `Tensor` or `SparseTensor`s.
      feeds: A list of keys in the dict returned by create_graph_fn that are fed
          in the main run (but not table initialization run).
      replaced_tensors_ready: A dict whose keys are keys in the dict returned by
          create_graph_fn and values are a bools indicating whether that tensor
          is ready to be replaced in this phase.
      should_be_ready: A dict whose keys are keys in the dict returned by
          create_graph_fn and value are bools indicating whether a tensor can be
          calculated in this phase.
      num_ready_table_initializers: The number of table initializers that are
          ready to run in the table initialization run of this phase.
    """
    tensors = create_graph_fn()
    replaced_tensors_ready = {tensors[name]: ready
                              for name, ready in replaced_tensors_ready.items()}

    graph_analyzer = graph_tools.InitializableGraphAnalyzer(
        tf.compat.v1.get_default_graph(), {x: tensors[x] for x in feeds},
        replaced_tensors_ready)
    self.assertEqual(len(graph_analyzer.ready_table_initializers),
                     num_ready_table_initializers)

    for name, ready in should_be_ready.items():
      tensor = tensors[name]
      self.assertEqual(graph_analyzer.ready_to_run(tensor), ready)
Exemple #2
0
  def testInitializableGraphAnalyzerReadyToRunRaises(
      self, create_graph_fn, feeds, replaced_tensors_ready, fetch,
      error_msg_regex):
    """Test determine_ready_tensors_and_table_initializers.

    Args:
      create_graph_fn: A function that adds ops to a graph and returns a dict
          mapping tensor names to `Tensor` or `SparseTensor`s.
      feeds: A list of keys in the dict returned by create_graph_fn that are fed
          in the main run (but not table initialization run).
      replaced_tensors_ready: A dict whose keys are keys in the dict returned by
          create_graph_fn and values are a bools indicating whether that tensor
          is ready to be replaced in this phase.
      fetch: The tensor to fetch.  Should be a key in the dict returned by
          create_graph_fn.
      error_msg_regex: The expected error message.
    """
    tensors = create_graph_fn()
    replaced_tensors_ready = {tensors[name]: ready
                              for name, ready in replaced_tensors_ready.items()}
    graph_analyzer = graph_tools.InitializableGraphAnalyzer(
        tf.compat.v1.get_default_graph(), {x: tensors[x] for x in feeds},
        replaced_tensors_ready)
    with self.assertRaisesRegexp(ValueError, error_msg_regex):
      tensor = tensors[fetch]
      graph_analyzer.ready_to_run(tensor)
    def testInitializableGraphAnalyzerConstructorRaises(
            self, create_graph_fn, feeds, replaced_tensors_ready,
            error_msg_regex):
        """Test determine_ready_tensors_and_table_initializers.

    Args:
      create_graph_fn: A function that adds ops to a graph and returns a dict
          mapping tensor names to `Tensor` or `SparseTensor`s.
      feeds: A list of keys in the dict returned by create_graph_fn that are fed
          in the main run (but not table initialization run).
      replaced_tensors_ready: A dict whose keys are keys in the dict returned by
          create_graph_fn and values are a bools indicating whether that tensor
          is ready to be replaced in this phase.
      error_msg_regex: The expected error message.
    """
        with tf.compat.v1.Graph().as_default() as graph:
            tensors = create_graph_fn()
        replaced_tensors_ready = [
            (tensors[name], ready)
            for name, ready in replaced_tensors_ready.items()
        ]
        with self.assertRaisesRegexp(ValueError, error_msg_regex):
            graph_tools.InitializableGraphAnalyzer(
                graph, {x: tensors[x]
                        for x in feeds}, replaced_tensors_ready)
    def testGetUniquePath(self,
                          create_graph_fn,
                          feeds,
                          replaced_tensors_ready,
                          expected_calls_dict,
                          skip_test_check_fn=None):

        # TODO(b/138934800): Remove this once TF 1.15 has the same results in all
        # environments.
        if skip_test_check_fn:
            skip_test_check_fn('This test is not currently supported.')

        with tf.compat.v1.Graph().as_default() as graph:
            tensors = create_graph_fn()
        replaced_tensors_ready = [
            (tensors[name], ready)
            for name, ready in replaced_tensors_ready.items()
        ]
        for name in expected_calls_dict:

            # This is used to construct the debugging string below.
            actual_needed_matchers_to_pass = []

            def describe_path_fn(x, parents=None):
                if parents is None:
                    parents_str = ''
                else:
                    parents_str = ', parents={}'.format(
                        list(map(_value_to_matcher, parents)))
                actual_needed_matchers_to_pass.append('({}{}),'.format(  # pylint: disable=cell-var-from-loop
                    _value_to_matcher(x, True), parents_str))

                if isinstance(x, tf.Operation):
                    return x.node_def.name
                if isinstance(x, tf.Tensor):
                    self.assertLessEqual(len(parents), 1)
                    return x.name
                if isinstance(x, (six.text_type, str, bytes)):
                    return x
                raise ValueError('Unexpected type: {}'.format(x))

            path_cb_mock = mock.MagicMock(side_effect=describe_path_fn)

            graph_analyzer = graph_tools.InitializableGraphAnalyzer(
                graph, {x: tensors[x]
                        for x in feeds}, replaced_tensors_ready, path_cb_mock)

            graph_analyzer.get_unique_path(tensors[name])

            try:
                path_cb_mock.assert_has_calls(expected_calls_dict[name])
                self.assertEqual(
                    path_cb_mock.call_count, len(expected_calls_dict[name]),
                    'Number of expected calls != number of actual calls for {}: {}'
                    .format(name, path_cb_mock.call_args_list))
            except AssertionError:
                tf.compat.v1.logging.error(
                    'The following is a list of matchers for {}:\n{}'.format(
                        name, '\n'.join(actual_needed_matchers_to_pass)))
                raise
 def _transform_raw_features_internal(self,
                                      raw_features,
                                      drop_unused_features=False):
   """Transforms raw features and returns an asset_map as well."""
   unbounded_raw_features, transformed_features, assets_map = (
       saved_transform_io.partially_apply_saved_transform_internal(
           self.transform_savedmodel_dir, raw_features))
   if drop_unused_features:
     graph = tf.compat.v1.get_default_graph()
     graph_analyzer = graph_tools.InitializableGraphAnalyzer(
         graph, raw_features,
         [(t, False) for t in six.itervalues(unbounded_raw_features)])
     transformed_features = {
         name: feature
         for name, feature in six.iteritems(transformed_features)
         if graph_analyzer.ready_to_run(feature)
     }
   return transformed_features, assets_map
Exemple #6
0
 def _transform_raw_features_compat_v1(self, raw_features,
                                       drop_unused_features):
     """Takes a dict of tensors representing raw features and transforms them."""
     unbounded_raw_features, transformed_features = (
         saved_transform_io.partially_apply_saved_transform_internal(
             self.transform_savedmodel_dir, raw_features))
     if drop_unused_features:
         graph = tf.compat.v1.get_default_graph()
         graph_analyzer = graph_tools.InitializableGraphAnalyzer(
             graph, raw_features,
             [(t, False) for t in six.itervalues(unbounded_raw_features)])
         return {
             name: feature
             for name, feature in six.iteritems(transformed_features)
             if graph_analyzer.ready_to_run(feature)
         }
     else:
         return transformed_features
Exemple #7
0
    def transform_raw_features(self, raw_features, drop_unused_features=False):
        """Takes a dict of tensors representing raw features and transforms them.

    Takes a dictionary of `Tensor`s or `SparseTensor`s that represent the raw
    features, and applies the transformation defined by tf.Transform.

    By default it returns all transformed features defined by tf.Transform. To
    only return features transformed from the given 'raw_features', set
    `drop_unused_features` to True.

    Args:
      raw_features: A dict whose keys are feature names and values are `Tensor`s
        or `SparseTensor`s.
      drop_unused_features: If True, the result will be filtered. Only the
        features that are transformed from 'raw_features' will be included in
        the returned result. If a feature is transformed from multiple raw
        features (e.g, feature cross), it will only be included if all its base
        raw features are present in `raw_features`.

    Returns:
      A dict whose keys are feature names and values are `Tensor`s or
          `SparseTensor`s representing transformed features.
    """
        unbounded_raw_features, transformed_features = (
            saved_transform_io.partially_apply_saved_transform_internal(
                self.transform_savedmodel_dir, raw_features))
        # TODO(b/124051570): Consider making drop_unused_features default to true.
        if drop_unused_features:
            graph = tf.compat.v1.get_default_graph()
            graph_analyzer = graph_tools.InitializableGraphAnalyzer(
                graph, raw_features,
                {t: False
                 for t in six.itervalues(unbounded_raw_features)})
            return {
                name: feature
                for name, feature in six.iteritems(transformed_features)
                if graph_analyzer.ready_to_run(feature)
            }
        else:
            return transformed_features
def build(graph,
          input_signature,
          output_signature,
          dataset_keys=None,
          cache_dict=None):
    """Returns a list of `Phase`s describing how to execute the pipeline.

  The default graph is assumed to contain some `Analyzer`s which must be
  executed by doing a full pass over the dataset, and passing the inputs for
  that analyzer into some implementation, then taking the results and replacing
  the `Analyzer`s outputs with constants in the graph containing these results.

  The execution plan is described by a list of `Phase`s.  Each phase contains
  a list of `Analyzer`s, which are the `Analyzer`s which are ready to run in
  that phase, together with a list of ops, which are the table initializers that
  are ready to run in that phase.

  An `Analyzer` or op is ready to run when all its dependencies in the graph
  have been computed.  Thus if the graph is constructed by

  def preprocessing_fn(input)
    x = inputs['x']
    scaled_0 = x - tft.min(x)
    scaled_0_1 = scaled_0 / tft.max(scaled_0)

  Then the first phase will contain the analyzer corresponding to the call to
  `min`, because `x` is an input and so is ready to compute in the first phase,
  while the second phase will contain the analyzer corresponding to the call to
  `max` since `scaled_1` depends on the result of the call to `tft.min` which
  is computed in the first phase.

  More generally, we define a level for each op and each `Analyzer` by walking
  the graph, assigning to each operation the max level of its inputs, to each
  `Tensor` the level of its operation, unless it's the output of an `Analyzer`
  in which case we assign the level of its `Analyzer` plus one.

  Args:
    graph: A `tf.Graph`.
    input_signature: A dict whose keys are strings and values are `Tensor`s or
      `SparseTensor`s.
    output_signature: A dict whose keys are strings and values are `Tensor`s or
      `SparseTensor`s.
    dataset_keys: (Optional) A set of strings which are dataset keys, they
      uniquely identify these datasets across analysis runs.
    cache_dict: (Optional): A cache dictionary.

  Returns:
    A pair of:
      * list of `Phase`s
      * A dictionary of output cache `ValueNode`s.

  Raises:
    ValueError: if the graph cannot be analyzed.
  """
    tensor_sinks = graph.get_collection(analyzer_nodes.TENSOR_REPLACEMENTS)
    graph.clear_collection(analyzer_nodes.TENSOR_REPLACEMENTS)
    phase = 0
    tensor_bindings = []
    sink_tensors_ready = {
        tf_utils.hashable_tensor_or_op(tensor_sink.tensor): False
        for tensor_sink in tensor_sinks
    }
    translate_visitor = _TranslateVisitor()
    translate_traverser = nodes.Traverser(translate_visitor)

    analyzers_input_signature = {}
    graph_analyzer = None

    extracted_input_node = nodes.apply_operation(
        beam_nodes.ExtractInputForSavedModel,
        dataset_key=analyzer_cache._make_flattened_dataset_key(),  # pylint: disable=protected-access
        label='ExtractInputForSavedModel[FlattenedDataset]')

    while not all(sink_tensors_ready.values()):
        infix = 'Phase{}'.format(phase)
        # Determine which table init ops are ready to run in this phase
        # Determine which keys of pending_tensor_replacements are ready to run
        # in this phase, based in whether their dependencies are ready.
        graph_analyzer = graph_tools.InitializableGraphAnalyzer(
            graph, input_signature, list(sink_tensors_ready.items()),
            graph_tools.describe_path_as_analyzer_cache_hash)
        ready_traverser = nodes.Traverser(_ReadyVisitor(graph_analyzer))

        # Now create and apply a SavedModel with all tensors in tensor_bindings
        # bound, which outputs all the tensors in the required tensor tuples.
        intermediate_output_signature = collections.OrderedDict()
        saved_model_future = nodes.apply_operation(
            beam_nodes.CreateSavedModel,
            *tensor_bindings,
            table_initializers=tuple(graph_analyzer.ready_table_initializers),
            output_signature=intermediate_output_signature,
            label='CreateSavedModelForAnalyzerInputs[{}]'.format(infix))

        extracted_values_dict = nodes.apply_operation(
            beam_nodes.ApplySavedModel,
            saved_model_future,
            extracted_input_node,
            phase=phase,
            label='ApplySavedModel[{}]'.format(infix))

        translate_visitor.phase = phase
        translate_visitor.intermediate_output_signature = (
            intermediate_output_signature)
        translate_visitor.extracted_values_dict = extracted_values_dict
        for tensor, value_node, is_asset_filepath in tensor_sinks:
            hashable_tensor = tf_utils.hashable_tensor_or_op(tensor)
            # Don't compute a binding/sink/replacement that's already been computed
            if sink_tensors_ready[hashable_tensor]:
                continue

            if not ready_traverser.visit_value_node(value_node):
                continue

            translated_value_node = translate_traverser.visit_value_node(
                value_node)

            name = _tensor_name(tensor)
            tensor_bindings.append(
                nodes.apply_operation(
                    beam_nodes.CreateTensorBinding,
                    translated_value_node,
                    tensor_name=str(tensor.name),
                    dtype_enum=tensor.dtype.as_datatype_enum,
                    is_asset_filepath=is_asset_filepath,
                    label=analyzer_nodes.sanitize_label(
                        'CreateTensorBinding[{}]'.format(name))))
            sink_tensors_ready[hashable_tensor] = True

        analyzers_input_signature.update(intermediate_output_signature)
        phase += 1

    # We need to make sure that the representation of this output_signature is
    # deterministic.
    output_signature = collections.OrderedDict(
        sorted(output_signature.items(), key=lambda t: t[0]))

    # TODO(KesterTong): check all table initializers are ready, check all output
    # tensors are ready.
    saved_model_future = nodes.apply_operation(
        beam_nodes.CreateSavedModel,
        *tensor_bindings,
        table_initializers=tuple(
            graph.get_collection(tf.compat.v1.GraphKeys.TABLE_INITIALIZERS)),
        output_signature=output_signature,
        label='CreateSavedModel')

    tensor_keys_to_paths = {
        tensor_key:
        graph_analyzer.get_unique_path(analyzers_input_signature[tensor_key])
        for tensor_key in analyzers_input_signature
    }
    (optimized_saved_model_future, output_cache_value_nodes,
     detached_sideeffect_leafs) = _perform_cache_optimization(
         saved_model_future, dataset_keys, tensor_keys_to_paths, cache_dict,
         phase)

    (optimized_saved_model_future, output_cache_value_nodes) = (
        combiner_packing_util.perform_combiner_packing_optimization(
            optimized_saved_model_future, output_cache_value_nodes, phase))

    global _ANALYSIS_GRAPH
    _ANALYSIS_GRAPH = optimized_saved_model_future
    return (optimized_saved_model_future, output_cache_value_nodes,
            detached_sideeffect_leafs)
Exemple #9
0
def build(graph, input_signature, output_signature):
    """Returns a list of `Phase`s describing how to execute the pipeline.

  The default graph is assumed to contain some `Analyzer`s which must be
  executed by doing a full pass over the dataset, and passing the inputs for
  that analyzer into some implementation, then taking the results and replacing
  the `Analyzer`s outputs with constants in the graph containing these results.

  The execution plan is described by a list of `Phase`s.  Each phase contains
  a list of `Analyzer`s, which are the `Analyzer`s which are ready to run in
  that phase, together with a list of ops, which are the table initializers that
  are ready to run in that phase.

  An `Analyzer` or op is ready to run when all its dependencies in the graph
  have been computed.  Thus if the graph is constructed by

  def preprocessing_fn(input)
    x = inputs['x']
    scaled_0 = x - tft.min(x)
    scaled_0_1 = scaled_0 / tft.max(scaled_0)

  Then the first phase will contain the analyzer corresponding to the call to
  `min`, because `x` is an input and so is ready to compute in the first phase,
  while the second phase will contain the analyzer corresponding to the call to
  `max` since `scaled_1` depends on the result of the call to `tft.min` which
  is computed in the first phase.

  More generally, we define a level for each op and each `Analyzer` by walking
  the graph, assigning to each operation the max level of its inputs, to each
  `Tensor` the level of its operation, unless it's the output of an `Analyzer`
  in which case we assign the level of its `Analyzer` plus one.

  Args:
    graph: A `tf.Graph`.
    input_signature: A dict whose keys are strings and values are `Tensor`s or
        `SparseTensor`s.
    output_signature: A dict whose keys are strings and values are `Tensor`s or
        `SparseTensor`s.

  Returns:
    A list of `Phase`s.

  Raises:
    ValueError: if the graph cannot be analyzed.
  """
    tensor_sinks = graph.get_collection(analyzer_nodes.TENSOR_REPLACEMENTS)
    graph.clear_collection(analyzer_nodes.TENSOR_REPLACEMENTS)
    phase = 0
    tensor_bindings = []
    sink_tensors_ready = {
        tensor_sink.tensor: False
        for tensor_sink in tensor_sinks
    }
    translate_visitor = _TranslateVisitor()
    translate_traverser = nodes.Traverser(translate_visitor)

    while not all(sink_tensors_ready.values()):
        # Determine which table init ops are ready to run in this phase
        # Determine which keys of pending_tensor_replacements are ready to run
        # in this phase, based in whether their dependencies are ready.
        graph_analyzer = graph_tools.InitializableGraphAnalyzer(
            graph, input_signature.values(), sink_tensors_ready)
        ready_traverser = nodes.Traverser(_ReadyVisitor(graph_analyzer))

        # Now create and apply a SavedModel with all tensors in tensor_bindings
        # bound, which outputs all the tensors in the required tensor tuples.
        intermediate_output_signature = collections.OrderedDict()
        saved_model_future = nodes.apply_operation(
            beam_nodes.CreateSavedModel,
            *tensor_bindings,
            table_initializers=tuple(graph_analyzer.ready_table_initializers),
            output_signature=intermediate_output_signature,
            label='CreateSavedModelForAnalyzerInputs[{}]'.format(phase))
        extracted_values_dict = nodes.apply_operation(
            beam_nodes.ApplySavedModel,
            saved_model_future,
            phase=phase,
            label='ApplySavedModel[{}]'.format(phase))

        translate_visitor.phase = phase
        translate_visitor.intermediate_output_signature = (
            intermediate_output_signature)
        translate_visitor.extracted_values_dict = extracted_values_dict
        for tensor, value_node, is_asset_filepath in tensor_sinks:
            # Don't compute a binding/sink/replacement that's already been computed
            if sink_tensors_ready[tensor]:
                continue

            if not ready_traverser.visit_value_node(value_node):
                continue

            translated_value_node = translate_traverser.visit_value_node(
                value_node)

            name = _tensor_name(tensor)
            tensor_bindings.append(
                nodes.apply_operation(
                    beam_nodes.CreateTensorBinding,
                    translated_value_node,
                    tensor=str(tensor.name),
                    is_asset_filepath=is_asset_filepath,
                    label='CreateTensorBinding[{}]'.format(name)))
            sink_tensors_ready[tensor] = True

        phase += 1

    # We need to make sure that the representation of this output_signature is
    # deterministic.
    output_signature = collections.OrderedDict(
        sorted(output_signature.items(), key=lambda t: t[0]))

    return nodes.apply_operation(beam_nodes.CreateSavedModel,
                                 *tensor_bindings,
                                 table_initializers=tuple(
                                     graph.get_collection(
                                         tf.GraphKeys.TABLE_INITIALIZERS)),
                                 output_signature=output_signature,
                                 label='CreateSavedModel')