Beispiel #1
0
def _ragged_as_leaf_node(
        ragged_tensor: tf.RaggedTensor, is_repeated: bool,
        reference_ragged_tensor: tf.RaggedTensor,
        options: calculate_options.Options) -> prensor.LeafNodeTensor:
    """Creates a ragged tensor as a leaf node."""
    assertions = []
    size_dim = tf.compat.dimension_at_index(ragged_tensor.shape, 0).value
    reference_size_dim = tf.compat.dimension_at_index(
        reference_ragged_tensor.shape, 0).value
    if (size_dim is not None and reference_size_dim is not None):
        if size_dim != reference_size_dim:
            raise ValueError("Returned ragged tensor is not the right size.")
    elif options.ragged_checks:
        assertions.append(
            tf.assert_equal(ragged_tensor.nrows(),
                            reference_ragged_tensor.nrows()))

    if not is_repeated:
        rowids = ragged_tensor.value_rowids()
        if options.ragged_checks:
            assertions.append(
                tf.compat.v1.assert_positive(rowids[1:] - rowids[:-1]))
    if assertions:
        with tf.control_dependencies(assertions):
            parent_index = ragged_tensor.value_rowids()
            return prensor.LeafNodeTensor(parent_index, ragged_tensor.values,
                                          is_repeated)
    else:
        parent_index = ragged_tensor.value_rowids()
        return prensor.LeafNodeTensor(parent_index, ragged_tensor.values,
                                      is_repeated)
Beispiel #2
0
    def testMultipleColumnsTwoRowGroupsAndEqualBatchSize_OutputsPrensor(self):
        """Tests that the correct prensor for three columns is outputted."""
        pq_ds = parquet.ParquetDataset(filenames=self._rowgroup_test_filenames,
                                       value_paths=[
                                           "DocId", "Name.Language.Code",
                                           "Name.Language.Country"
                                       ],
                                       batch_size=2)
        expected_prensor = prensor.create_prensor_from_descendant_nodes({
            path.Path([]):
            prensor.RootNodeTensor(tf.constant(2, dtype=tf.int64)),
            path.Path(["DocId"]):
            prensor.LeafNodeTensor(tf.constant([0, 1], dtype=tf.int64),
                                   tf.constant([10, 20], dtype=tf.int64),
                                   True),
            path.Path(["Name"]):
            prensor.ChildNodeTensor(tf.constant([0, 0, 0, 1], dtype=tf.int64),
                                    True),
            path.Path(["Name", "Language"]):
            prensor.ChildNodeTensor(tf.constant([0, 0, 2], dtype=tf.int64),
                                    True),
            path.Path(["Name", "Language", "Code"]):
            prensor.LeafNodeTensor(tf.constant([0, 1, 2], dtype=tf.int64),
                                   tf.constant([b"en-us", b"en", b"en-gb"]),
                                   True),
            path.Path(["Name", "Language", "Country"]):
            prensor.LeafNodeTensor(tf.constant([0, 2], dtype=tf.int64),
                                   tf.constant([b"us", b"gb"]), True)
        })

        for i, pren in enumerate(pq_ds):
            if i == 0:
                self._assertPrensorEqual(pren, expected_prensor)
def _ragged_as_leaf_node(ragged_tensor, is_repeated, reference_ragged_tensor,
                         options):
    """Creates a ragged tensor as a leaf node."""
    assertions = []
    if (ragged_tensor.shape[0].value is not None
            and reference_ragged_tensor.shape[0].value is not None):
        if ragged_tensor.shape[0].value != reference_ragged_tensor.shape[
                0].value:
            raise ValueError("Returned ragged tensor is not the right size.")
    elif options.ragged_checks:
        assertions.append(
            tf.assert_equal(ragged_tensor.nrows(),
                            reference_ragged_tensor.nrows()))

    if not is_repeated:
        rowids = ragged_tensor.value_rowids()
        if options.ragged_checks:
            assertions.append(
                tf.compat.v1.assert_positive(rowids[1:] - rowids[:-1]))
    if assertions:
        with tf.control_dependencies(assertions):
            parent_index = ragged_tensor.value_rowids()
            return prensor.LeafNodeTensor(parent_index, ragged_tensor.values,
                                          is_repeated)
    else:
        parent_index = ragged_tensor.value_rowids()
        return prensor.LeafNodeTensor(parent_index, ragged_tensor.values,
                                      is_repeated)
Beispiel #4
0
    def calculate(
            self,
            sources: Sequence[prensor.NodeTensor],
            destinations: Sequence[expression.Expression],
            options: calculate_options.Options,
            side_info: Optional[prensor.Prensor] = None) -> prensor.NodeTensor:
        [origin_value, parent_value] = sources

        # We should never be recalculating a RootNodeTensor.
        assert not isinstance(origin_value,
                              prensor.RootNodeTensor), origin_value

        # The parent cannot be a LeafNodeTensor or RootNodeTensor, because
        #  a) a leaf node cannot have a submessage
        #  b) you cannot broadcast into a root
        assert isinstance(parent_value, prensor.ChildNodeTensor), parent_value

        # We use equi_join_any_indices on the parent's `index_to_value` because it
        # represents which child nodes were duplicated. Thus, which origin values
        # also need to be duplicated.
        [broadcasted_to_sibling_index,
         index_to_values] = struct2tensor_ops.equi_join_any_indices(
             parent_value.index_to_value, origin_value.parent_index)

        if isinstance(origin_value, prensor.LeafNodeTensor):
            new_values = tf.gather(origin_value.values, index_to_values)
            return prensor.LeafNodeTensor(broadcasted_to_sibling_index,
                                          new_values, self.is_repeated)
        else:
            return prensor.ChildNodeTensor(broadcasted_to_sibling_index,
                                           self.is_repeated, index_to_values)
Beispiel #5
0
    def calculate(self, sources, destinations, options):

        [origin_value, origin_parent_value] = sources
        if not isinstance(origin_value,
                          (prensor.LeafNodeTensor, prensor.ChildNodeTensor)):
            raise ValueError(
                "origin_value must be a LeafNodeTensor or a ChildNodeTensor, "
                "but was a " + str(type(origin_value)))

        if not isinstance(origin_parent_value,
                          (prensor.ChildNodeTensor, prensor.RootNodeTensor)):
            raise ValueError("origin_parent_value must be a ChildNodeTensor "
                             "or a RootNodeTensor, but was a " +
                             str(type(origin_parent_value)))

        parent_index = origin_value.parent_index
        num_parent_protos = origin_parent_value.size
        # A vector of 1s of the same size as the parent_index.
        updates = tf.ones(tf.shape(parent_index), dtype=tf.int64)
        indices = tf.expand_dims(parent_index, 1)
        # This is incrementing the size by 1 for each element.
        # Obviously, not the fastest way to do this.
        values = tf.scatter_nd(indices, updates,
                               tf.reshape(num_parent_protos, [1]))

        # Need to create a new_parent_index = 0,1,2,3,4...n.
        new_parent_index = tf.range(num_parent_protos, dtype=tf.int64)
        return prensor.LeafNodeTensor(new_parent_index, values, False)
Beispiel #6
0
 def calculate_from_parsed_field(
         self,
         parsed_field: struct2tensor_ops._ParsedField,  # pylint: disable=protected-access
         destinations: Sequence[expression.Expression],
         options: calculate_options.Options) -> prensor.NodeTensor:
     return prensor.LeafNodeTensor(parsed_field.index, parsed_field.value,
                                   self.is_repeated)
def _to_leaf_prensor_helper(rt: tf.RaggedTensor,
                            default_field_name: path.Step) -> prensor.Prensor:
    """Converts a fully partitioned ragged tensor to a leaf prensor.

  It is assumed that this is a fully partitioned ragged tensor. Specifically,
  the values at the end are a vector, not a 2D tensor.

  Args:
    rt: a fully partitioned ragged tensor (see
      _fully_partitioned_ragged_tensor).
    default_field_name: a path.Step for unnamed dimensions.

  Returns:
    a prensor, with a leaf as the root node.
  """
    row_partition = rt._row_partition  # pylint: disable=protected-access
    if rt.ragged_rank == 1:
        values = rt.values
        leaf = prensor.LeafNodeTensor(row_partition.value_rowids(), values,
                                      True)
        return prensor.create_prensor_from_root_and_children(leaf, {})
    else:
        return _one_child_prensor(
            row_partition,
            _to_leaf_prensor_helper(rt.values, default_field_name),
            default_field_name)
Beispiel #8
0
 def calculate(self, sources, destinations, options):
     [origin] = sources
     if isinstance(origin,
                   (prensor.LeafNodeTensor, prensor.ChildNodeTensor)):
         return prensor.LeafNodeTensor(
             origin.parent_index, prensor_util.get_positional_index(origin),
             self.is_repeated)
     raise ValueError("Cannot calculate the positional index of the root")
def _as_leaf_node_no_checks(sparse_tensor, is_repeated):
    """Take a SparseTensor and create a LeafNodeTensor, no checks."""
    if is_repeated:
        parent_index = tf.transpose(sparse_tensor.indices)[0]
    else:
        parent_index = tf.reshape(sparse_tensor.indices, [-1])
    return prensor.LeafNodeTensor(parent_index, sparse_tensor.values,
                                  is_repeated)
Beispiel #10
0
 def calculate(self, sources, destinations, options):
     source_leaves = [_leaf_node_or_error(s) for s in sources]
     source_values = [s.values for s in source_leaves]
     # TODO(martinz): Check that:
     # source_values have equal parent_index.
     # output_value has the same size as the input.
     return prensor.LeafNodeTensor(source_leaves[0].parent_index,
                                   self._operation(*source_values),
                                   self._is_repeated)
Beispiel #11
0
    def testPromoteAndProjectExpression(self):
        filenames = [
            "struct2tensor/testdata/parquet_testdata/dremel_example.parquet"
        ]
        batch_size = 2
        exp = parquet.create_expression_from_parquet_file(filenames)
        new_exp = promote.promote(exp, path.Path(["Name", "Language", "Code"]),
                                  "new_code")
        new_code_project_exp = project.project(
            new_exp, [path.Path(["Name", "new_code"])])
        docid_project_exp = project.project(exp, [path.Path(["DocId"])])

        pqds = parquet.calculate_parquet_values(
            [new_code_project_exp, docid_project_exp], exp, filenames,
            batch_size)

        new_code_expected = prensor.create_prensor_from_descendant_nodes({
            path.Path([]):
            prensor.RootNodeTensor(tf.constant(2, dtype=tf.int64)),
            path.Path(["Name"]):
            prensor.ChildNodeTensor(tf.constant([0, 0, 0, 1], dtype=tf.int64),
                                    True),
            path.Path(["Name", "new_code"]):
            prensor.LeafNodeTensor(tf.constant([0, 0, 2], dtype=tf.int64),
                                   tf.constant([b"en-us", b"en", b"en-gb"]),
                                   True)
        })

        docid_expected = prensor.create_prensor_from_descendant_nodes({
            path.Path([]):
            prensor.RootNodeTensor(tf.constant(2, dtype=tf.int64)),
            path.Path(["DocId"]):
            prensor.LeafNodeTensor(tf.constant([0, 1], dtype=tf.int64),
                                   tf.constant([10, 20], dtype=tf.int64),
                                   False)
        })

        for ele in pqds:
            new_code_pren = ele[0]
            docid_pren = ele[1]

            self._assertPrensorEqual(new_code_pren, new_code_expected)
            self._assertPrensorEqual(docid_pren, docid_expected)
Beispiel #12
0
    def testPlaceholderExpression(self):
        pren = prensor_test_util.create_nested_prensor()
        expected_pren = prensor.create_prensor_from_descendant_nodes({
            path.Path([]):
            prensor.RootNodeTensor(tf.constant(3, dtype=tf.int64)),
            path.Path(["new_friends"]):
            prensor.LeafNodeTensor(
                tf.constant([0, 1, 1, 1, 2], dtype=tf.int64),
                tf.constant(["a", "b", "c", "d", "e"], dtype=tf.string), True)
        })

        root_schema = mpp.create_schema(is_repeated=True,
                                        children={
                                            "doc": {
                                                "is_repeated": True,
                                                "children": {
                                                    "bar": {
                                                        "is_repeated": True,
                                                        "dtype": tf.string
                                                    },
                                                    "keep_me": {
                                                        "is_repeated": False,
                                                        "dtype": tf.bool
                                                    }
                                                }
                                            },
                                            "user": {
                                                "is_repeated": True,
                                                "children": {
                                                    "friends": {
                                                        "is_repeated": True,
                                                        "dtype": tf.string
                                                    }
                                                }
                                            }
                                        })

        exp = placeholder.create_expression_from_schema(root_schema)
        promote_exp = promote.promote(exp, path.Path(["user", "friends"]),
                                      "new_friends")
        project_exp = project.project(promote_exp,
                                      [path.Path(["new_friends"])])
        new_friends_exp = project_exp.get_descendant(path.Path(["new_friends"
                                                                ]))

        result = calculate.calculate_values([new_friends_exp],
                                            feed_dict={exp: pren})

        res_node = result[0]
        exp_node = expected_pren.get_descendant(path.Path(["new_friends"
                                                           ])).node

        self.assertAllEqual(res_node.is_repeated, exp_node.is_repeated)
        self.assertAllEqual(res_node.values, exp_node.values)
        self.assertAllEqual(res_node.parent_index, exp_node.parent_index)
Beispiel #13
0
 def calculate(self, sources, destinations, options, side_info=None):
     [origin_value, origin_parent_value] = sources
     if not isinstance(origin_value, prensor.LeafNodeTensor):
         raise ValueError("origin_value must be a leaf")
     if not isinstance(origin_parent_value, prensor.ChildNodeTensor):
         raise ValueError("origin_parent_value must be a child node")
     parent_to_grandparent_index = origin_parent_value.parent_index
     new_parent_index = tf.gather(parent_to_grandparent_index,
                                  origin_value.parent_index)
     return prensor.LeafNodeTensor(new_parent_index, origin_value.values,
                                   self.is_repeated)
Beispiel #14
0
    def calculate(self, sources, destinations, options):
        [positional_index, size_value] = sources
        if not isinstance(positional_index, prensor.LeafNodeTensor):
            raise ValueError("positional_index must be a LeafNodeTensor")
        if not isinstance(size_value, prensor.LeafNodeTensor):
            raise ValueError("size_value must be a LeafNodeTensor")

        size_per_index = tf.gather(size_value.values,
                                   positional_index.parent_index)
        return prensor.LeafNodeTensor(positional_index.parent_index,
                                      positional_index.values - size_per_index,
                                      self.is_repeated)
Beispiel #15
0
 def calculate(self, sources, destinations, options, side_info=None):
     [root_node] = sources
     # The following check ensures not just that we can calculate the value,
     # but that no "improper" reroots were done.
     if isinstance(root_node, prensor.RootNodeTensor):
         return prensor.LeafNodeTensor(
             _get_proto_index_parent_index(root_node),
             _get_input_proto_index(root_node),
             is_repeated=False)
     raise ValueError(
         "Illegal operation: expected a true root node: got {}".format(
             str(root_node)))
def create_repeated_leaf_node(parent_index, values):
    """Creates a repeated PrensorField.

  Args:
    parent_index: a list of integers that is converted to a 1-D int64 tensor.
    values: a list of whatever type that the field represents.

  Returns:
    A PrensorField with the parent_index and values set appropriately.
  """
    return prensor.LeafNodeTensor(tf.constant(parent_index, dtype=tf.int64),
                                  tf.constant(values), True)
Beispiel #17
0
 def calculate(
         self,
         sources: Sequence[prensor.NodeTensor],
         destinations: Sequence[expression.Expression],
         options: calculate_options.Options,
         side_info: Optional[prensor.Prensor] = None) -> prensor.NodeTensor:
     [origin] = sources
     if isinstance(origin,
                   (prensor.LeafNodeTensor, prensor.ChildNodeTensor)):
         return prensor.LeafNodeTensor(
             origin.parent_index, prensor_util.get_positional_index(origin),
             self.is_repeated)
     raise ValueError("Cannot calculate the positional index of the root")
Beispiel #18
0
def create_optional_leaf_node(parent_index: Sequence[int],
                              values: Sequence[Any]) -> prensor.LeafNodeTensor:
  """Creates an optional leaf node.

  Args:
    parent_index: a list of integers that is converted to a 1-D int64 tensor.
    values: a list of whatever type that the field represents.

  Returns:
    A PrensorField with the parent_index and values set appropriately.
  """
  return prensor.LeafNodeTensor(
      tf.constant(parent_index, dtype=tf.int64), tf.constant(values), False)
Beispiel #19
0
 def calculate(
         self,
         sources: Sequence[prensor.NodeTensor],
         destinations: Sequence[expression.Expression],
         options: calculate_options.Options,
         side_info: Optional[prensor.Prensor] = None) -> prensor.NodeTensor:
     source_leaves = [_leaf_node_or_error(s) for s in sources]
     source_values = [s.values for s in source_leaves]
     # TODO(martinz): Check that:
     # source_values have equal parent_index.
     # output_value has the same size as the input.
     return prensor.LeafNodeTensor(source_leaves[0].parent_index,
                                   self._operation(*source_values),
                                   self._is_repeated)
Beispiel #20
0
def _filter_by_parent_indices_to_keep(
    node_value,
    parent_indices_to_keep):
  """Filter by parent indices to keep."""
  [new_parent_index, self_indices_to_keep
  ] = struct2tensor_ops.equi_join_indices(parent_indices_to_keep,
                                          node_value.parent_index)
  if isinstance(node_value, prensor.ChildNodeTensor):
    return _FilterChildNodeTensor(new_parent_index, node_value.is_repeated,
                                  self_indices_to_keep)
  if isinstance(node_value, prensor.LeafNodeTensor):
    return prensor.LeafNodeTensor(
        new_parent_index, tf.gather(node_value.values, self_indices_to_keep),
        node_value.is_repeated)
  raise ValueError("Unknown NodeValue type")
Beispiel #21
0
 def calculate(
         self,
         sources: Sequence[prensor.NodeTensor],
         destinations: Sequence[expression.Expression],
         options: calculate_options.Options,
         side_info: Optional[prensor.Prensor] = None) -> prensor.NodeTensor:
     [origin_value, origin_parent_value] = sources
     if not isinstance(origin_value, prensor.LeafNodeTensor):
         raise ValueError("origin_value must be a leaf")
     if not isinstance(origin_parent_value, prensor.ChildNodeTensor):
         raise ValueError("origin_parent_value must be a child node")
     new_parent_index = tf.gather(origin_parent_value.parent_index,
                                  origin_value.parent_index)
     return prensor.LeafNodeTensor(new_parent_index, origin_value.values,
                                   self.is_repeated)
Beispiel #22
0
  def calculate(
      self,
      sources: Sequence[prensor.NodeTensor],
      destinations: Sequence[expression.Expression],
      options: calculate_options.Options,
      side_info: Optional[prensor.Prensor] = None) -> prensor.NodeTensor:
    [positional_index, size_value] = sources
    if not isinstance(positional_index, prensor.LeafNodeTensor):
      raise ValueError("positional_index must be a LeafNodeTensor")
    if not isinstance(size_value, prensor.LeafNodeTensor):
      raise ValueError("size_value must be a LeafNodeTensor")

    size_per_index = tf.gather(size_value.values, positional_index.parent_index)
    return prensor.LeafNodeTensor(positional_index.parent_index,
                                  positional_index.values - size_per_index,
                                  self.is_repeated)
Beispiel #23
0
def _filter_by_self_indices_to_keep(node_value,
                                    self_indices_to_keep
                                   ):
  """Filter the node by the indices you want to keep."""
  if isinstance(node_value, prensor.RootNodeTensor):
    return _FilterRootNodeTensor(
        tf.size(self_indices_to_keep), self_indices_to_keep)
  if isinstance(node_value, prensor.ChildNodeTensor):
    return _FilterChildNodeTensor(
        tf.gather(node_value.parent_index, self_indices_to_keep),
        node_value.is_repeated, self_indices_to_keep)
  if isinstance(node_value, prensor.LeafNodeTensor):
    return prensor.LeafNodeTensor(
        tf.gather(node_value.parent_index, self_indices_to_keep),
        tf.gather(node_value.values, self_indices_to_keep),
        node_value.is_repeated)
  raise ValueError("Unknown NodeValue type")
Beispiel #24
0
def get_mock_leaf(is_repeated,
                  my_type,
                  name=None,
                  source_expressions=None,
                  calculate_is_identity=False):
    """Gets a leaf expression."""
    if calculate_is_identity:
        calculate_output = source_expressions[0].calculate_output
    else:
        calculate_output = prensor.LeafNodeTensor(
            tf.constant([], dtype=tf.int64), tf.constant([], dtype=my_type),
            is_repeated)
    return MockExpression(is_repeated,
                          my_type,
                          name=name,
                          source_expressions=source_expressions,
                          calculate_output=calculate_output,
                          calculate_is_identity=calculate_is_identity)
Beispiel #25
0
 def calculate(self, sources, destinations, options):
     [origin_value, sibling_value] = sources
     if not isinstance(origin_value, prensor.LeafNodeTensor):
         raise ValueError("origin not a LeafNodeTensor")
     if not isinstance(sibling_value, prensor.ChildNodeTensor):
         raise ValueError("sibling value is not a ChildNodeTensor")
     sibling_to_parent_index = sibling_value.parent_index
     # For each i, for each v, if there exist exactly n values j such that:
     # sibling_to_parent_index[i]==origin_value.parent_index[j]
     # then there exists exactly n values k such that:
     # new_parent_index[k] = i
     # new_values[k] = origin_value.values[j]
     # (Ordering is also preserved).
     [broadcasted_to_sibling_index, index_to_values
      ] = struct2tensor_ops.equi_join_indices(sibling_to_parent_index,
                                              origin_value.parent_index)
     new_values = tf.gather(origin_value.values, index_to_values)
     return prensor.LeafNodeTensor(broadcasted_to_sibling_index, new_values,
                                   self.is_repeated)
def get_mock_broken_leaf(
    declared_is_repeated,
    declared_type,
    actual_is_repeated,
    actual_type,
    name = None,
    source_expressions = None,
    calculate_is_identity = False):
  """Gets a leaf expression flexible enough not to typecheck.

  If declared_is_repeated != actual_is_repeated,
  or declared_type != actual_type, then this will not typecheck
  when _ExpressionNode.calculate() is called.

  Args:
    declared_is_repeated: the is_repeated of the expression.
    declared_type: the type of the expression.
    actual_is_repeated: the is_repeated of the NodeTensor.
    actual_type: the type of the NodeTensor.
    name: a name of the expression.
    source_expressions: the result of get_source expressions()
    calculate_is_identity: true iff this should say it is the identity.

  Returns:
    An expression.

  """
  if calculate_is_identity:
    calculate_output = source_expressions[0].calculate_output
  else:
    calculate_output = prensor.LeafNodeTensor(
        tf.constant([], dtype=tf.int64), tf.constant([], dtype=actual_type),
        actual_is_repeated)
  return MockExpression(
      declared_is_repeated,
      declared_type,
      name=name,
      source_expressions=source_expressions,
      calculate_output=calculate_output,
      calculate_is_identity=calculate_is_identity)
Beispiel #27
0
 def calculate_from_parsed_field(self, parsed_field, destinations):
     return prensor.LeafNodeTensor(parsed_field.index, parsed_field.value,
                                   self.is_repeated)
Beispiel #28
0
 def calculate_from_parsed_field(
         self, parsed_field: struct2tensor_ops._ParsedField,
         destinations: Sequence[expression.Expression]
 ) -> prensor.NodeTensor:
     return prensor.LeafNodeTensor(parsed_field.index, parsed_field.value,
                                   self.is_repeated)