Beispiel #1
0
def structured_tensor_to_prensor(
    st: structured_tensor.StructuredTensor,
    default_field_name: path.Step = "data") -> prensor.Prensor:
  """Converts a structured tensor to a prensor.

  Certain rank information must be known. For more details about the
  transformation, see the notes above.

  Args:
    st: the structured tensor to convert.
    default_field_name: the name to use when there is an unnamed dimension.

  Returns:
    a logically equivalent Prensor.

  Raises:
    ValueError: if there is an issue with the structured tensor.
  """
  row_partitions = st.row_partitions
  if len(row_partitions) >= 1:
    child_prensor = _structured_tensor_to_child_prensor(st, default_field_name)
    return prensor.create_prensor_from_root_and_children(
        prensor.RootNodeTensor((st).nrows()),
        {default_field_name: child_prensor})
  elif st.rank == 1:
    return prensor.create_prensor_from_root_and_children(
        prensor.RootNodeTensor((st).nrows()),
        _structured_tensor_prensor_map(st, default_field_name))
  else:
    # st is a scalar StructuredTensor.
    return structured_tensor_to_prensor(_expand_dims(st, 0), default_field_name)
Beispiel #2
0
    def testMultipleColumnsTwoRowGroupsAndEqualBatchSize_OutputsPrensor(self):
        """Tests that the correct prensor for three columns is outputted."""
        pq_ds = parquet.ParquetDataset(filenames=self._rowgroup_test_filenames,
                                       value_paths=[
                                           "DocId", "Name.Language.Code",
                                           "Name.Language.Country"
                                       ],
                                       batch_size=2)
        expected_prensor = prensor.create_prensor_from_descendant_nodes({
            path.Path([]):
            prensor.RootNodeTensor(tf.constant(2, dtype=tf.int64)),
            path.Path(["DocId"]):
            prensor.LeafNodeTensor(tf.constant([0, 1], dtype=tf.int64),
                                   tf.constant([10, 20], dtype=tf.int64),
                                   True),
            path.Path(["Name"]):
            prensor.ChildNodeTensor(tf.constant([0, 0, 0, 1], dtype=tf.int64),
                                    True),
            path.Path(["Name", "Language"]):
            prensor.ChildNodeTensor(tf.constant([0, 0, 2], dtype=tf.int64),
                                    True),
            path.Path(["Name", "Language", "Code"]):
            prensor.LeafNodeTensor(tf.constant([0, 1, 2], dtype=tf.int64),
                                   tf.constant([b"en-us", b"en", b"en-gb"]),
                                   True),
            path.Path(["Name", "Language", "Country"]):
            prensor.LeafNodeTensor(tf.constant([0, 2], dtype=tf.int64),
                                   tf.constant([b"us", b"gb"]), True)
        })

        for i, pren in enumerate(pq_ds):
            if i == 0:
                self._assertPrensorEqual(pren, expected_prensor)
def _as_root_node_tensor(node_tensor):
    if isinstance(node_tensor, prensor.RootNodeTensor):
        return node_tensor
    if isinstance(node_tensor, prensor.ChildNodeTensor):
        return prensor.RootNodeTensor(node_tensor.size)
    raise ValueError("Must be child or root node tensor (found {})".format(
        type(node_tensor)))
Beispiel #4
0
    def testPromoteAndProjectExpression(self):
        filenames = [
            "struct2tensor/testdata/parquet_testdata/dremel_example.parquet"
        ]
        batch_size = 2
        exp = parquet.create_expression_from_parquet_file(filenames)
        new_exp = promote.promote(exp, path.Path(["Name", "Language", "Code"]),
                                  "new_code")
        new_code_project_exp = project.project(
            new_exp, [path.Path(["Name", "new_code"])])
        docid_project_exp = project.project(exp, [path.Path(["DocId"])])

        pqds = parquet.calculate_parquet_values(
            [new_code_project_exp, docid_project_exp], exp, filenames,
            batch_size)

        new_code_expected = prensor.create_prensor_from_descendant_nodes({
            path.Path([]):
            prensor.RootNodeTensor(tf.constant(2, dtype=tf.int64)),
            path.Path(["Name"]):
            prensor.ChildNodeTensor(tf.constant([0, 0, 0, 1], dtype=tf.int64),
                                    True),
            path.Path(["Name", "new_code"]):
            prensor.LeafNodeTensor(tf.constant([0, 0, 2], dtype=tf.int64),
                                   tf.constant([b"en-us", b"en", b"en-gb"]),
                                   True)
        })

        docid_expected = prensor.create_prensor_from_descendant_nodes({
            path.Path([]):
            prensor.RootNodeTensor(tf.constant(2, dtype=tf.int64)),
            path.Path(["DocId"]):
            prensor.LeafNodeTensor(tf.constant([0, 1], dtype=tf.int64),
                                   tf.constant([10, 20], dtype=tf.int64),
                                   False)
        })

        for ele in pqds:
            new_code_pren = ele[0]
            docid_pren = ele[1]

            self._assertPrensorEqual(new_code_pren, new_code_expected)
            self._assertPrensorEqual(docid_pren, docid_expected)
Beispiel #5
0
    def testPlaceholderExpression(self):
        pren = prensor_test_util.create_nested_prensor()
        expected_pren = prensor.create_prensor_from_descendant_nodes({
            path.Path([]):
            prensor.RootNodeTensor(tf.constant(3, dtype=tf.int64)),
            path.Path(["new_friends"]):
            prensor.LeafNodeTensor(
                tf.constant([0, 1, 1, 1, 2], dtype=tf.int64),
                tf.constant(["a", "b", "c", "d", "e"], dtype=tf.string), True)
        })

        root_schema = mpp.create_schema(is_repeated=True,
                                        children={
                                            "doc": {
                                                "is_repeated": True,
                                                "children": {
                                                    "bar": {
                                                        "is_repeated": True,
                                                        "dtype": tf.string
                                                    },
                                                    "keep_me": {
                                                        "is_repeated": False,
                                                        "dtype": tf.bool
                                                    }
                                                }
                                            },
                                            "user": {
                                                "is_repeated": True,
                                                "children": {
                                                    "friends": {
                                                        "is_repeated": True,
                                                        "dtype": tf.string
                                                    }
                                                }
                                            }
                                        })

        exp = placeholder.create_expression_from_schema(root_schema)
        promote_exp = promote.promote(exp, path.Path(["user", "friends"]),
                                      "new_friends")
        project_exp = project.project(promote_exp,
                                      [path.Path(["new_friends"])])
        new_friends_exp = project_exp.get_descendant(path.Path(["new_friends"
                                                                ]))

        result = calculate.calculate_values([new_friends_exp],
                                            feed_dict={exp: pren})

        res_node = result[0]
        exp_node = expected_pren.get_descendant(path.Path(["new_friends"
                                                           ])).node

        self.assertAllEqual(res_node.is_repeated, exp_node.is_repeated)
        self.assertAllEqual(res_node.values, exp_node.values)
        self.assertAllEqual(res_node.parent_index, exp_node.parent_index)
Beispiel #6
0
    def testCreateExpressionFromSchema(self):
        root_schema = mpp.create_schema(is_repeated=True, children={})
        exp = placeholder.create_expression_from_schema(root_schema)
        pren = prensor.create_prensor_from_descendant_nodes({
            path.Path([]):
            prensor.RootNodeTensor(tf.constant(1, dtype=tf.int64))
        })
        result = calculate.calculate_values([exp], feed_dict={exp: pren})
        res_node = result[0]
        exp_node = pren.get_descendant(path.Path([])).node

        self.assertAllEqual(res_node.is_repeated, exp_node.is_repeated)
        self.assertAllEqual(res_node.size, exp_node.size)
Beispiel #7
0
def create_root_node(size: int) -> prensor.RootNodeTensor:
  return prensor.RootNodeTensor(tf.constant(size, dtype=tf.int64))