def _visit_apply_savedmodel_operation(self, operation_def, upstream_views):
        if any(v.fine_grained_view for v in upstream_views):
            raise ValueError(
                'Was not expecting a fine_grained_view input for ApplySavedModel'
            )
        (saved_model_path_upstream_view, input_upstream_view) = upstream_views

        fine_grained_view = collections.OrderedDict()
        for (dataset_idx, dataset_key) in enumerate(self._sorted_dataset_keys):
            infix = 'AnalysisIndex{}'.format(dataset_idx)
            input_node = nodes.apply_operation(
                beam_nodes.ExtractInputForSavedModel,
                dataset_key=dataset_key,
                label='ExtractInputForSavedModel[{}]'.format(infix))
            # We use an index for the label in order to make beam labels more stable.
            (fine_grained_view[dataset_key], ) = (nodes.OperationNode(
                operation_def._replace(
                    label='{}[{}]'.format(operation_def.label, infix)),
                (saved_model_path_upstream_view.flattened_view,
                 input_node)).outputs)

        (flattened_view, ) = nodes.OperationNode(
            operation_def, (saved_model_path_upstream_view.flattened_view,
                            input_upstream_view.flattened_view)).outputs

        return (_OptimizationView(prefer_fine_grained_view=False,
                                  flattened_view=flattened_view,
                                  fine_grained_view=fine_grained_view,
                                  hashed_path=b'APPLY_SAVEDMODEL'), )
    def _apply_operation_on_fine_grained_view(self, operation_def,
                                              fine_grained_views,
                                              next_hashed_path):
        """Applies a shardable operation on a fine grained view.

    This also updates `cache_output_nodes` when necessary.

    Args:
      operation_def: A shardable `OperationDef`.
      fine_grained_views: A tuple of `_OptimizationView.fine_grained_view`s.
      next_hashed_path: The hashed path for the currently processed
        operation_def.

    Returns:
      The resulting list of `_OptimizationView.fine_grained_view`s.
    """
        result_fine_grained_view = collections.OrderedDict()

        cache_entry_key = analyzer_cache.make_cache_entry_key(
            tf.compat.as_bytes(operation_def.label) + b'-' + next_hashed_path)

        for (dataset_idx, dataset_key) in enumerate(self._sorted_dataset_keys):
            # We use an index for the label in order to make beam labels more stable.
            infix = 'AnalysisIndex{}'.format(dataset_idx)
            if (operation_def.cache_coder and self._cache_dict.get(
                    dataset_key, {}).get(cache_entry_key) is not None):
                self._dataset_has_cache_misses[dataset_key] |= False
                decode_cache = analyzer_nodes.DecodeCache(
                    dataset_key,
                    cache_entry_key,
                    coder=operation_def.cache_coder,
                    label='DecodeCache[{}][{}]'.format(operation_def.label,
                                                       infix))
                (op_output, ) = nodes.OperationNode(decode_cache,
                                                    tuple()).outputs
            else:
                value_nodes = tuple(v[dataset_key] for v in fine_grained_views)
                (op_output, ) = nodes.OperationNode(
                    operation_def._replace(
                        label='{}[{}]'.format(operation_def.label, infix)),
                    value_nodes).outputs
                if operation_def.cache_coder:
                    self._dataset_has_cache_misses[dataset_key] = True
                    encode_cache = nodes.apply_operation(
                        analyzer_nodes.EncodeCache,
                        op_output,
                        coder=operation_def.cache_coder,
                        label='EncodeCache[{}][{}]'.format(
                            operation_def.label, infix))
                    self.cache_output_nodes[(dataset_key,
                                             cache_entry_key)] = encode_cache
            result_fine_grained_view[dataset_key] = op_output

        return result_fine_grained_view
Exemple #3
0
    def _apply_operation_on_fine_grained_view(self, operation_def,
                                              fine_grained_view):
        """Applies a shardable operation on a fine grained view.

    This also updates `cache_output_nodes` when necessary.

    Args:
      operation_def: A shardable `OperationDef`.
      fine_grained_view: A `_OptimizationView.fine_grained_view`.

    Returns:
      The resulting list of `_OptimizationView.fine_grained_view`s.
    """
        result_fine_grained_view = collections.OrderedDict()

        # TODO(b/37788560): Use a better cache key than label. A good alternative is
        # to reuse graph_tools logic to compose names that include properties and
        # fingerprint it.
        cache_entry_key = analyzer_cache.make_cache_entry_key(
            operation_def.label)
        for dataset_key in self._dataset_keys:

            # TODO(b/37788560): Add instrumentation.

            if self._cache_dict.get(dataset_key,
                                    {}).get(cache_entry_key) is not None:
                (op_output, ) = nodes.OperationNode(
                    analyzer_nodes.DecodeCache(
                        dataset_key,
                        cache_entry_key,
                        coder=operation_def.cache_coder), tuple()).outputs
            else:
                value_node = fine_grained_view[dataset_key]
                (op_output, ) = nodes.OperationNode(
                    operation_def._replace(label='{}[{}]'.format(
                        operation_def.label, dataset_key)),
                    (value_node, )).outputs
                if operation_def.cache_coder:
                    encoded_cache = nodes.apply_operation(
                        analyzer_nodes.EncodeCache,
                        op_output,
                        coder=operation_def.cache_coder,
                        label='EncodeCache[{}][{}]'.format(
                            operation_def.label, dataset_key))
                    self.cache_output_nodes[(dataset_key,
                                             cache_entry_key)] = encoded_cache
            result_fine_grained_view[dataset_key] = op_output

        return result_fine_grained_view
  def _apply_operation_on_fine_grained_view(self, operation_def,
                                            fine_grained_view,
                                            next_hashed_path):
    """Applies a shardable operation on a fine grained view.

    This also updates `cache_output_nodes` when necessary.

    Args:
      operation_def: A shardable `OperationDef`.
      fine_grained_view: A `_OptimizationView.fine_grained_view`.
      next_hashed_path: The hashed path for the currently processed
        operation_def.

    Returns:
      The resulting list of `_OptimizationView.fine_grained_view`s.
    """
    result_fine_grained_view = collections.OrderedDict()

    cache_entry_key = analyzer_cache.make_cache_entry_key(
        tf.compat.as_bytes(operation_def.label) + b'-' + next_hashed_path)

    for dataset_key in self._dataset_keys:

      if (operation_def.cache_coder and self._cache_dict.get(
          dataset_key, {}).get(cache_entry_key) is not None):
        (op_output,) = nodes.OperationNode(
            analyzer_nodes.DecodeCache(
                dataset_key,
                cache_entry_key,
                operation_def.label,
                coder=operation_def.cache_coder), tuple()).outputs
      else:
        value_node = fine_grained_view[dataset_key]
        (op_output,) = nodes.OperationNode(
            operation_def._replace(
                label='{}[{}]'.format(operation_def.label, dataset_key)),
            (value_node,)).outputs
        if operation_def.cache_coder:
          encoded_cache = nodes.apply_operation(
              analyzer_nodes.EncodeCache,
              op_output,
              coder=operation_def.cache_coder,
              label='EncodeCache[{}][{}]'.format(operation_def.label,
                                                 dataset_key))
          self.cache_output_nodes[(dataset_key,
                                   cache_entry_key)] = encoded_cache
      result_fine_grained_view[dataset_key] = op_output

    return result_fine_grained_view
 def visit(self, operation_def, input_values):
     # If we see a combine node which can be packed, create the packed combine
     # node and cache it as we will use the same packed node for all the combines
     # in the group.
     if operation_def.label in self._combine_to_grand_parent:
         return self._get_packed_combine(operation_def, input_values)
     return nodes.OperationNode(operation_def, input_values).outputs
    def _visit_partitionable_operation(self, operation_def, upstream_views):

        # This is a hint for whether or not the `fine_grained_view` should be used
        # downstream.  It should be set to true if either the upstream view has
        # cacheing operations that haven't been flattened yet, or the current
        # operation is cacheable.
        all_fine_grained_views_available = all(v.fine_grained_view
                                               for v in upstream_views)
        prefer_fine_grained_view = (any(v.prefer_fine_grained_view
                                        for v in upstream_views)
                                    or all_fine_grained_views_available
                                    and operation_def.cache_coder is not None)

        next_hashed_path = self._make_next_hashed_path(
            [v.hashed_path for v in upstream_views], operation_def)
        if all_fine_grained_views_available:
            fine_grained_views = (self._apply_operation_on_fine_grained_view(
                operation_def,
                tuple(v.fine_grained_view for v in upstream_views),
                next_hashed_path), )
        else:
            fine_grained_views = (None, ) * operation_def.num_outputs

        flattened_views = nodes.OperationNode(
            operation_def,
            tuple(v.flattened_view for v in upstream_views)).outputs

        assert len(fine_grained_views) == len(flattened_views)
        return tuple(
            _OptimizationView(  # pylint: disable=g-complex-comprehension
                prefer_fine_grained_view=prefer_fine_grained_view,
                flattened_view=flat,
                fine_grained_view=fine,
                hashed_path=next_hashed_path)
            for flat, fine in zip(flattened_views, fine_grained_views))
Exemple #7
0
  def _remove_redundant_nodes(self, operation_def, input_values):
    # Input values to be used as input to CreateSavedModel.
    # Since some of the input values are generated from the redundant nodes,
    # those needs to be reconstructed with the final packed merge node.
    reconstructed_input_values = []

    redundant_values, non_redundant_values = (
        self._get_redundant_and_non_redundant_input_values(input_values))

    # Keep track of the final packed merge combine node. For those input nodes
    # which are descendants of the redundant nodes, we would create a new node
    # generated from the final packed merge combine node.
    (final_packed_merge_combine, final_packed_merge_combine_tensor_bindings) = (
        self._get_final_packed_combine_and_tensor_bindings(redundant_values))
    reconstructed_input_values.extend(
        final_packed_merge_combine_tensor_bindings)

    # Add the non-redundant nodes to the input values.
    reconstructed_input_values.extend(non_redundant_values)

    # Keep track of the info needed to reconstruct the descendents of the
    # redundant nodes.
    to_be_created_tensor_bindings = (
        self._get_to_be_created_tensor_bindings_info(redundant_values))

    reconstructed_input_values.extend(self._create_tensor_bindings(
        to_be_created_tensor_bindings, final_packed_merge_combine))
    assert len(input_values) == len(reconstructed_input_values)
    return nodes.OperationNode(
        operation_def, tuple(reconstructed_input_values)).outputs
Exemple #8
0
 def visit(self, operation_def, input_values):
   self.validate_operation_def(operation_def)
   if input_values and isinstance(operation_def, beam_nodes.CreateSavedModel):
     # This will only be called once since this is a single phase analysis
     # graph and in that case only the final CreateSavedModel node has inputs.
     return self._remove_redundant_nodes(operation_def, input_values)
   return nodes.OperationNode(operation_def, input_values).outputs
    def _visit_partitionable_operation(self, operation_def, upstream_views):
        # TODO(b/37788560) Possibly support partitionable operations with multiple
        # inputs.
        (upstream_view, ) = upstream_views

        # This is a hint for whether or not the `fine_grained_view` should be used
        # downstream.  It should be set to true if either the upstream view has
        # cacheing operations that haven't been flattened yet, or the current
        # operation is cacheable.
        prefer_fine_grained_view = (upstream_view.prefer_fine_grained_view
                                    or upstream_view.fine_grained_view
                                    and operation_def.cache_coder is not None)

        next_hashed_path = self._make_next_hashed_path(
            [v.hashed_path for v in upstream_views], operation_def)
        if upstream_view.fine_grained_view:
            fine_grained_views = (self._apply_operation_on_fine_grained_view(
                operation_def, upstream_view.fine_grained_view,
                next_hashed_path), )
        else:
            fine_grained_views = (None, ) * operation_def.num_outputs

        flattened_views = nodes.OperationNode(
            operation_def, (upstream_view.flattened_view, )).outputs

        assert len(fine_grained_views) == len(flattened_views)
        return tuple(
            _OptimizationView(  # pylint: disable=g-complex-comprehension
                prefer_fine_grained_view=prefer_fine_grained_view,
                flattened_view=flat,
                fine_grained_view=fine,
                hashed_path=next_hashed_path)
            for flat, fine in zip(flattened_views, fine_grained_views))
Exemple #10
0
    def _visit_apply_savedmodel_operation(self, operation_def, upstream_views):
        (upstream_view, ) = upstream_views
        if upstream_view.fine_grained_view:
            raise ValueError(
                'Was not expecting a fine_grained_view input for ApplySavedModel'
            )

        fine_grained_view = collections.OrderedDict()
        for key in self._dataset_keys:
            (fine_grained_view[key], ) = (nodes.OperationNode(
                operation_def._replace(dataset_key=key,
                                       label='{}[{}]'.format(
                                           operation_def.label, key)),
                (upstream_view.flattened_view, )).outputs)

        (flattened_view, ) = nodes.OperationNode(
            operation_def, (upstream_view.flattened_view, )).outputs

        return (_OptimizationView(prefer_fine_grained_view=False,
                                  flattened_view=flattened_view,
                                  fine_grained_view=fine_grained_view), )
 def visit(self, operation_def, input_values):
   if isinstance(operation_def, analyzer_nodes.TensorSource):
     tensors = operation_def.tensors
     label = operation_def.label
     # Add tensor to signature so it gets produced by the SavedModel.
     for tensor in tensors:
       self.intermediate_output_signature[_tensor_name(tensor)] = tensor
     keys = tuple(map(_tensor_name, tensors))
     output = nodes.apply_operation(
         beam_nodes.ExtractFromDict, self.extracted_values_dict,
         keys=keys, label=label)
     return (output,)
   else:
     return nodes.OperationNode(operation_def, input_values).outputs
Exemple #12
0
    def visit(self, operation_def, input_values):
        self._validate_operation_def(operation_def)

        # TODO(b/37788560): Possibly make this generic instead of special casing the
        # ApplySavedModel operation.
        if (isinstance(operation_def, beam_nodes.ApplySavedModel)
                and operation_def.phase == 0):
            return self._visit_apply_savedmodel_operation(
                operation_def, input_values)

        # When self._cache_dict is None this means that we shouldn't do any cacheing
        # for this pipeline, and so there's no need to create any fine grained
        # views.
        if self._cache_dict is not None and operation_def.is_partitionable:
            return self._visit_partitionable_operation(operation_def,
                                                       input_values)

        if input_values and any(
                v.fine_grained_view and v.prefer_fine_grained_view
                for v in input_values):
            # We can 'flatten' the cached outputs of the parent operation since this
            # operation doesn't support partitioning.
            disaggregated_input_values = []
            for view in input_values:
                disaggregated_input_values.extend(
                    view.fine_grained_view.values())

            # Checking that all cache has the same size.
            assert len({len(value)
                        for value in disaggregated_input_values}) == 1

            next_inputs = nodes.apply_multi_output_operation(
                beam_nodes.Flatten,
                *disaggregated_input_values,
                label='FlattenCache[{}]'.format(operation_def.label))
        else:
            # Parent operation output is not cacheable, therefore we can just use
            # a flattened view.
            next_inputs = tuple(v.flattened_view for v in input_values)

        flattened_view = nodes.OperationNode(operation_def,
                                             next_inputs).outputs

        return tuple(
            _OptimizationView(  # pylint: disable=g-complex-comprehension
                prefer_fine_grained_view=False,
                flattened_view=flat,
                fine_grained_view=None,
                hashed_path=None) for flat in flattened_view)
Exemple #13
0
    def visit(self, operation_def, input_values):
        self._validate_operation_def(operation_def)

        if (isinstance(operation_def, beam_nodes.ApplySavedModel)
                and operation_def.phase == 0):
            return self._visit_apply_savedmodel_operation(
                operation_def, input_values)

        if self._cache_location and operation_def.is_partitionable:
            return self._visit_partitionable_operation(operation_def,
                                                       input_values)

        if input_values and any(
                v.fine_grained_view and v.prefer_fine_grained_view
                for v in input_values):
            # We can 'flatten' the cached outputs of the parent operation since this
            # operation doesn't support partitioning.
            disaggregated_input_values = []
            for view in input_values:
                disaggregated_input_values.extend(
                    view.fine_grained_view.values())

            # Checking that all cache has the same size.
            assert len({len(value)
                        for value in disaggregated_input_values}) == 1

            next_inputs = nodes.apply_multi_output_operation(
                beam_nodes.Flatten,
                *disaggregated_input_values,
                label='FlattenCache[{}]'.format(operation_def.label))
        else:
            # Parent operation output is not cacheable, therefore we can just use
            # a flattened view.
            next_inputs = tuple(v.flattened_view for v in input_values)

        flattened_view = nodes.OperationNode(operation_def,
                                             next_inputs).outputs

        return tuple(
            _OptimizationView(prefer_fine_grained_view=False,
                              flattened_view=flat,
                              fine_grained_view=None)
            for flat in flattened_view)
Exemple #14
0
    def _visit_partitionable_operation(self, operation_def, upstream_views):
        (upstream_view, ) = upstream_views
        prefer_fine_grained_view = (upstream_view.prefer_fine_grained_view
                                    or upstream_view.fine_grained_view
                                    and operation_def.cache_coder is not None)

        if upstream_view.fine_grained_view:
            value_nodes = collections.OrderedDict()
            for key in self._dataset_keys:

                if operation_def.cache_coder is not None:
                    cache_file_path = analyzer_cache.make_cache_file_path(
                        key, operation_def.label)
                    pattern = '{}-00000*.gz'.format(
                        os.path.join(self._cache_location.input_cache_dir,
                                     cache_file_path))
                    try:
                        if tf.gfile.Glob(pattern):
                            op_outputs = nodes.apply_multi_output_operation(
                                analyzer_nodes.ReadCache,
                                path=cache_file_path,
                                coder=operation_def.cache_coder,
                                label='ReadCache[{}][{}]'.format(
                                    operation_def.label, key))
                            value_nodes[key] = op_outputs
                            continue
                    except tf.errors.NotFoundError:
                        pass
                else:
                    cache_file_path = None

                values = upstream_view.fine_grained_view[key]
                op_outputs = nodes.OperationNode(
                    operation_def._replace(
                        label='{}[{}]'.format(operation_def.label, key)),
                    (values, )).outputs
                if cache_file_path is not None:
                    op_outputs = nodes.apply_multi_output_operation(
                        analyzer_nodes.WriteCache,
                        *op_outputs,
                        path=cache_file_path,
                        coder=operation_def.cache_coder,
                        label='WriteCache[{}][{}]'.format(
                            operation_def.label, key))
                value_nodes[key] = op_outputs

            fine_grained_views = ([collections.OrderedDict()] *
                                  operation_def.num_outputs)
            for key in self._dataset_keys:
                for idx in range(operation_def.num_outputs):
                    fine_grained_views[idx][key] = value_nodes[key][idx]
        else:
            fine_grained_views = (None, ) * operation_def.num_outputs

        flattened_views = nodes.OperationNode(
            operation_def, (upstream_view.flattened_view, )).outputs

        return tuple(
            _OptimizationView(
                prefer_fine_grained_view=prefer_fine_grained_view,
                flattened_view=flat,
                fine_grained_view=fine)
            for flat, fine in zip(flattened_views, fine_grained_views))
Exemple #15
0
  def _visit_partitionable_operation(self, operation_def, upstream_views):
    # TODO(b/37788560) Possibly support partitionable operations with multiple
    # inputs.
    (upstream_view,) = upstream_views
    prefer_fine_grained_view = (
        upstream_view.prefer_fine_grained_view or
        upstream_view.fine_grained_view and
        operation_def.cache_coder is not None)

    if upstream_view.fine_grained_view:
      value_nodes = collections.OrderedDict()
      for key in self._dataset_keys:

        if operation_def.cache_coder is not None:
          # TODO(b/37788560): Add instrumentation.
          # TODO(b/37788560): Use a better cache key than label. A good
          # alternative is to reuse graph_tools logic to compose names that
          # include properties and fingerprint it.
          cache_file_path = analyzer_cache.make_cache_file_path(
              key, operation_def.label)
          # TODO(b/37788560): Come up with a more abstract way to do this that
          # also ensures concistency.
          pattern = '{}-00000*.gz'.format(
              os.path.join(self._cache_location.input_cache_dir,
                           cache_file_path))
          try:
            if tf.gfile.Glob(pattern):
              op_outputs = nodes.apply_multi_output_operation(
                  analyzer_nodes.ReadCache,
                  path=cache_file_path,
                  coder=operation_def.cache_coder,
                  label='ReadCache[{}][{}]'.format(operation_def.label, key))
              value_nodes[key] = op_outputs
              continue
          except tf.errors.NotFoundError:
            pass
        else:
          cache_file_path = None

        values = upstream_view.fine_grained_view[key]
        op_outputs = nodes.OperationNode(
            operation_def._replace(
                label='{}[{}]'.format(operation_def.label, key)),
            (values,)).outputs
        if cache_file_path is not None:
          op_outputs = nodes.apply_multi_output_operation(
              analyzer_nodes.WriteCache,
              *op_outputs,
              path=cache_file_path,
              coder=operation_def.cache_coder,
              label='WriteCache[{}][{}]'.format(operation_def.label, key))
        value_nodes[key] = op_outputs

      fine_grained_views = (
          [collections.OrderedDict()] * operation_def.num_outputs)
      for key in self._dataset_keys:
        for idx in range(operation_def.num_outputs):
          fine_grained_views[idx][key] = value_nodes[key][idx]
    else:
      fine_grained_views = (None,) * operation_def.num_outputs

    flattened_views = nodes.OperationNode(
        operation_def, (upstream_view.flattened_view,)).outputs

    return tuple(
        _OptimizationView(
            prefer_fine_grained_view=prefer_fine_grained_view,
            flattened_view=flat,
            fine_grained_view=fine)
        for flat, fine in zip(flattened_views, fine_grained_views))
Exemple #16
0
 def testOperationNodeWithBadInputs(self):
     with self.assertRaisesRegexp(TypeError, 'inputs must be a tuple, got'):
         nodes.OperationNode(_Concat(label='Concat'), 'not a tuple')
Exemple #17
0
 def _maybe_create_node(op_def, inputs):
   if op_def.label in labels_to_new_nodes:
     return labels_to_new_nodes[op_def.label]
   new_node = nodes.OperationNode(op_def, inputs).outputs
   labels_to_new_nodes[op_def.label] = new_node
   return new_node
Exemple #18
0
 def testOperationNodeWithBadInput(self):
     a = nodes.apply_operation(_Constant, value='a', label='Constant[a]')
     with self.assertRaisesRegexp(
             TypeError, 'Inputs to Operation must be a ValueNode, got'):
         nodes.OperationNode(_Concat(label='Concat'),
                             (a, 'not a value_node'))
Exemple #19
0
 def testOperationNodeWithBadOperatonDef(self):
     with self.assertRaisesRegexp(
             TypeError, 'operation_def must be an OperationDef, got'):
         nodes.OperationNode('not a operation_def', ())
 def visit(self, operation_def, input_values):
     if isinstance(operation_def, beam_nodes.ExtractInputForSavedModel):
         self._required_dataset_keys.add(operation_def.dataset_key)
     return nodes.OperationNode(operation_def, input_values).outputs
 def visit(self, operation_def, input_values):
     self._maybe_add_packable_combine(operation_def, input_values)
     return nodes.OperationNode(operation_def, input_values).outputs
Exemple #22
0
 def visit(self, operation_def, input_values):
   self.validate_operation_def(operation_def)
   # We look for the ExtractOutputs node of packable combines
   if operation_def.label in self._packable_combine_extract_outputs:
     return self._add_flatten_placeholder(operation_def, input_values)
   return nodes.OperationNode(operation_def, input_values).outputs
 def visit(self, operation_def, input_values):
     if isinstance(operation_def, analyzer_nodes.TensorSource):
         for tensor in operation_def.tensors:
             self.sourced_tensors.append(tensor)
     return nodes.OperationNode(operation_def, input_values).outputs
Exemple #24
0
 def testValueNodeWithTooHighValueIndex(self):
     parent = nodes.OperationNode(_Constant('a'), ())
     with self.assertRaisesWithLiteralMatch(
             ValueError,
             'value_index was 2 but parent_operation had 1 outputs'):
         nodes.ValueNode(parent, 2)