Ejemplo n.º 1
0
 def test_dash_in_name(self):
     simple_step = create_path("foo-bar.baz")
     self.assertEqual(str(simple_step), "foo-bar.baz")
     first_extension = create_path("(foo-bar.bak).baz")
     self.assertEqual(str(first_extension), "(foo-bar.bak).baz")
     last_extension = create_path("(foo.bar-bak).baz")
     self.assertEqual(str(last_extension), "(foo.bar-bak).baz")
Ejemplo n.º 2
0
 def promote_and_broadcast(
         self, path_dictionary: Mapping[path.Step, CoercableToPath],
         dest_path_parent: CoercableToPath) -> "Expression":
     return promote_and_broadcast.promote_and_broadcast(
         self, {k: path.create_path(v)
                for k, v in path_dictionary.items()},
         path.create_path(dest_path_parent))
Ejemplo n.º 3
0
 def test_is_ancestor(self):
     ancestor = create_path("foo.bar")
     descendant = create_path("foo.bar.baz")
     not_ancestor = create_path("fuzz")
     self.assertEqual(ancestor.is_ancestor(descendant), True)
     self.assertEqual(ancestor.is_ancestor(ancestor), True)
     self.assertEqual(not_ancestor.is_ancestor(descendant), False)
     self.assertEqual(descendant.is_ancestor(ancestor), False)
Ejemplo n.º 4
0
    def test_add(self):
        # Test add two paths.
        self.assertEqual(
            create_path("foo.bar") + create_path("baz.bax"),
            create_path("foo.bar.baz.bax"))

        # Test add a path with a string.
        self.assertEqual(
            create_path("foo.bar") + "baz.bax", create_path("foo.bar.baz.bax"))
Ejemplo n.º 5
0
 def test_cmp(self):
     self.assertGreater(Path([1]), Path("foo"))
     self.assertLess(Path("foo"), Path([1]))
     self.assertGreater(Path([1]), Path([0]))
     self.assertGreater(create_path("foo.baz"), create_path("foo"))
     self.assertGreater(create_path("foo.baz"), create_path("foo.bar"))
     self.assertLess(create_path("foo"), create_path("foo.bar"))
     self.assertLess(create_path("foo.bar"), create_path("foo.baz"))
     self.assertEqual(create_path("foo.baz"), create_path("foo.baz"))
Ejemplo n.º 6
0
 def test_valid_map_indexing_step(self):
     self.assertSequenceEqual(
         ["my_map[some_key]", "some_value"],
         create_path("my_map[some_key].some_value").field_list)
     self.assertSequenceEqual(["my_map[]", "some_value"],
                              create_path("my_map[].some_value").field_list)
     self.assertSequenceEqual(
         ["my_map[key.1]", "some_value"],
         create_path("my_map[key.1].some_value").field_list)
     self.assertSequenceEqual(
         ["my_map[(key)]", "some_value"],
         create_path("my_map[(key)].some_value").field_list)
     self.assertSequenceEqual(
         ["my_map[[]", "some_value"],
         create_path("my_map[[].some_value").field_list)
Ejemplo n.º 7
0
  def _create_prensor_spec(self) -> prensor._PrensorTypeSpec:  # pylint: disable=protected-access
    """Creates the prensor type spec based on value_paths.

    Returns:
      a root _PrensorTypeSpec.
    """

    metadata = pq.ParquetFile(self._filenames[0]).metadata
    parquet_schema = metadata.schema
    arrow_schema = parquet_schema.to_arrow_schema()

    # pylint: disable=protected-access
    # Sort the paths by number of fields.
    paths = [path.create_path(p) for p in self._value_paths]
    mapped = zip(paths, self._value_paths, self._value_dtypes)
    sorted_mapped = sorted(mapped, key=lambda x: len(x[0].field_list))
    paths, self._value_paths, self._value_dtypes = zip(*sorted_mapped)

    # Creates an ordered dictionary mapping step to a list of children fields.
    # This will allow us to find paths that share a parent.
    curr_steps_as_set = collections.OrderedDict()
    for (i, p) in enumerate(paths):
      step = p.field_list[0]
      if step in curr_steps_as_set:
        curr_steps_as_set[step].append((i, p.field_list[1:]))
      else:
        curr_steps_as_set[step] = [(i, p.field_list[1:])]

    return prensor._PrensorTypeSpec(
        None, prensor._PrensorTypeSpec._NodeType.ROOT, tf.int64, [
            self._create_children_spec(
                arrow_schema.field(step), curr_steps_as_set[step])
            for step in curr_steps_as_set
        ])
Ejemplo n.º 8
0
 def test_get_sparse_tensor(self):
     expression = prensor_test_util.create_simple_prensor()
     sparse_tensor = prensor._get_sparse_tensor(expression,
                                                path.create_path("foo"))
     self.assertAllEqual(sparse_tensor.indices, [[0], [1], [2]])
     self.assertAllEqual(sparse_tensor.dense_shape, [3])
     self.assertAllEqual(sparse_tensor.values, [9, 8, 7])
Ejemplo n.º 9
0
 def test_prensor_to_ragged_tensor(self):
     for options in _OPTIONS_TO_TEST:
         pren = prensor_test_util.create_nested_prensor()
         ragged_tensor = pren.get_ragged_tensor(path.create_path("doc.bar"),
                                                options)
         self.assertAllEqual(ragged_tensor,
                             [[[b"a"]], [[b"b", b"c"], [b"d"]], []])
Ejemplo n.º 10
0
    def test_filter_by_child_create_nested_prensor_2(self):
        """Tests filter_by_child.

    In particular, it checks for the case where parent_index != self index.
    """
        root = create_expression.create_expression_from_prensor(
            _create_nested_prensor_2())
        root_2 = filter_expression.filter_by_child(root,
                                                   path.create_path("doc"),
                                                   "keep_me", "new_doc")
        [result] = calculate.calculate_prensors([root_2])
        self.assertAllEqual(
            result.get_descendant_or_error(path.Path(["new_doc"
                                                      ])).node.parent_index,
            [1])
        self.assertAllEqual(
            result.get_descendant_or_error(path.Path(["new_doc", "keep_me"
                                                      ])).node.parent_index,
            [0])
        self.assertAllEqual(
            result.get_descendant_or_error(path.Path(["new_doc",
                                                      "keep_me"])).node.values,
            [True])
        self.assertAllEqual(
            result.get_descendant_or_error(path.Path(["new_doc", "bar"
                                                      ])).node.parent_index,
            [0, 0])
        self.assertAllEqual(
            result.get_descendant_or_error(path.Path(["new_doc",
                                                      "bar"])).node.values,
            [b"b", b"c"])
Ejemplo n.º 11
0
  def map_ragged_tensors(self, parent_path,
                         source_fields,
                         operator,
                         is_repeated, dtype,
                         new_field_name):
    """Maps a set of primitive fields of a message to a new field.

    Unlike map_field_values, this operation allows you to some degree reshape
    the field. For instance, you can take two optional fields and create a
    repeated field, or perform a reduce_sum on the last dimension of a repeated
    field and create an optional field. The key constraint is that the operator
    must return a sparse tensor of the correct dimension: i.e., a
    2D sparse tensor if is_repeated is true, or a 1D sparse tensor if
    is_repeated is false. Moreover, the first dimension of the sparse tensor
    must be equal to the first dimension of the input tensor.

    Args:
      parent_path: the parent of the input and output fields.
      source_fields: the nonempty list of names of the source fields.
      operator: an operator that takes len(source_fields) sparse tensors and
        returns a sparse tensor of the appropriate shape.
      is_repeated: whether the output is repeated.
      dtype: the dtype of the result.
      new_field_name: the name of the resulting field.

    Returns:
      A new query.
    """
    return map_prensor.map_ragged_tensor(
        self, path.create_path(parent_path),
        [path.Path([f]) for f in source_fields], operator, is_repeated, dtype,
        new_field_name)
Ejemplo n.º 12
0
  def testDeepStructuredTensor(self):
    rt = tf.RaggedTensor.from_value_rowids(
        tf.constant([[1, 2], [3, 4], [5, 6]]), [0, 0, 1])

    struct = _make_structured_tensor([2], {"r": rt})
    struct_2 = struct.partition_outer_dimension(
        RowPartition.from_row_splits([0, 1, 2]))

    p = structured_tensor_to_prensor.structured_tensor_to_prensor(struct_2)
    rt_value = p.get_descendant(path.create_path("data.r.data"))
    self.assertAllEqual(rt_value.node.parent_index, [0, 0, 1, 1, 2, 2])
    self.assertAllEqual(rt_value.node.values, [1, 2, 3, 4, 5, 6])
    p_data = p.get_descendant(path.create_path("data"))
    self.assertAllEqual(p_data.node.parent_index, [0, 1])
    p_data_r = p.get_descendant(path.create_path("data.r"))
    self.assertAllEqual(p_data_r.node.parent_index, [0, 0, 1])
Ejemplo n.º 13
0
 def test_filter_by_child(self):
     """Tests filter_by_child."""
     root = create_expression.create_expression_from_prensor(
         prensor_test_util.create_big_prensor())
     root_2 = filter_expression.filter_by_child(root,
                                                path.create_path("doc"),
                                                "keep_me", "new_doc")
     [result] = calculate.calculate_prensors([root_2])
     self.assertAllEqual(
         result.get_descendant_or_error(path.Path(["new_doc"
                                                   ])).node.parent_index,
         [1])
     self.assertAllEqual(
         result.get_descendant_or_error(path.Path(["new_doc", "keep_me"
                                                   ])).node.parent_index,
         [0])
     self.assertAllEqual(
         result.get_descendant_or_error(path.Path(["new_doc",
                                                   "keep_me"])).node.values,
         [True])
     self.assertAllEqual(
         result.get_descendant_or_error(path.Path(["new_doc", "bar"
                                                   ])).node.parent_index,
         [0, 0])
     self.assertAllEqual(
         result.get_descendant_or_error(path.Path(["new_doc",
                                                   "bar"])).node.values,
         [b"b", b"c"])
 def test_filter_by_child_create_nested_prensor(self):
     """Tests filter_by_child."""
     with self.session(use_gpu=False) as sess:
         root = create_expression.create_expression_from_prensor(
             _create_nested_prensor())
         root_2 = filter_expression.filter_by_child(root,
                                                    path.create_path("doc"),
                                                    "keep_me", "new_doc")
         result = prensor_value.materialize(
             calculate.calculate_prensors([root_2])[0], sess)
         self.assertAllEqual(
             result.get_descendant_or_error(path.Path(
                 ["new_doc"])).node.parent_index, [1])
         self.assertAllEqual(
             result.get_descendant_or_error(
                 path.Path(["new_doc", "keep_me"])).node.parent_index, [0])
         self.assertAllEqual(
             result.get_descendant_or_error(
                 path.Path(["new_doc", "keep_me"])).node.values, [True])
         self.assertAllEqual(
             result.get_descendant_or_error(path.Path(
                 ["new_doc", "bar"])).node.parent_index, [0, 0])
         self.assertAllEqual(
             result.get_descendant_or_error(path.Path(["new_doc",
                                                       "bar"])).node.values,
             [b"b", b"c"])
Ejemplo n.º 15
0
 def test_prensor_to_sparse_tensor(self):
     for options in _OPTIONS_TO_TEST:
         pren = prensor_test_util.create_simple_prensor()
         sparse_tensor = pren.get_sparse_tensor(path.create_path("foo"),
                                                options=options)
         self.assertAllEqual(sparse_tensor.indices, [[0], [1], [2]])
         self.assertAllEqual(sparse_tensor.dense_shape, [3])
         self.assertAllEqual(sparse_tensor.values, [9, 8, 7])
Ejemplo n.º 16
0
 def test_get_ragged_tensor(self):
     """Tests get_ragged_tensor on a deep field."""
     for options in _OPTIONS_TO_TEST:
         expression = prensor_test_util.create_nested_prensor()
         ragged_tensor = prensor._get_ragged_tensor(
             expression, path.create_path("doc.bar"), options)
         self.assertAllEqual(ragged_tensor,
                             [[[b"a"]], [[b"b", b"c"], [b"d"]], []])
Ejemplo n.º 17
0
 def test_prefix(self):
     original = create_path("foo.bar.baz")
     self.assertEqual(str(original.prefix(0)), "")
     self.assertEqual(str(original.prefix(1)), "foo")
     self.assertEqual(str(original.prefix(2)), "foo.bar")
     self.assertEqual(str(original.prefix(3)), "foo.bar.baz")
     self.assertEqual(str(original.prefix(-1)), "foo.bar")
     self.assertEqual(str(original.prefix(-2)), "foo")
Ejemplo n.º 18
0
  def testStructuredTensorCreation(self):
    rt = tf.RaggedTensor.from_value_rowids(
        tf.constant([[1, 2], [3, 4], [5, 6]]), [0, 0, 1])

    struct = _make_structured_tensor([2], {"r": rt})
    p = structured_tensor_to_prensor.structured_tensor_to_prensor(struct)
    rt_value = p.get_descendant(path.create_path("r.data"))
    self.assertAllEqual(rt_value.node.parent_index, [0, 0, 1, 1, 2, 2])
    self.assertAllEqual(rt_value.node.values, [1, 2, 3, 4, 5, 6])
Ejemplo n.º 19
0
    def test_filter_by_sibling(self):
        r"""Tests filter_by_sibling.

    Beginning with the struct:
         -----*----------------------------------------------------
        /                       \                                  \
     root0                    root1-----------------------      root2 (empty)
      /   \                   /    \               \      \
      |  keep_my_sib0:False  |  keep_my_sib1:True   | keep_my_sib2:False
    doc0-----               doc1---------------    doc2--------
     |       \                \           \    \               \
    bar:"a"  keep_me:False    bar:"b" bar:"c" keep_me:True      bar:"d"

    Filter doc with keep_my_sib:

    End with the struct (suppressing original doc):
         -----*----------------------------------------------------
        /                       \                                  \
    root0                    root1------------------        root2 (empty)
        \                   /    \                  \
        keep_my_sib0:False  |  keep_my_sib1:True   keep_my_sib2:False
                           new_doc0-----------
                             \           \    \
                             bar:"b" bar:"c" keep_me:True

    """
        root = create_expression.create_expression_from_prensor(
            _create_nested_prensor())
        root_2 = filter_expression.filter_by_sibling(root,
                                                     path.create_path("doc"),
                                                     "keep_my_sib", "new_doc")
        [result] = calculate.calculate_prensors([root_2])
        self.assertAllEqual(
            result.get_descendant_or_error(path.Path(["new_doc"
                                                      ])).node.parent_index,
            [1])
        self.assertAllEqual(
            result.get_descendant_or_error(path.Path(["new_doc", "keep_me"
                                                      ])).node.parent_index,
            [0])
        self.assertAllEqual(
            result.get_descendant_or_error(path.Path(["new_doc",
                                                      "keep_me"])).node.values,
            [True])
        self.assertAllEqual(
            result.get_descendant_or_error(path.Path(["new_doc", "bar"
                                                      ])).node.parent_index,
            [0, 0])
        self.assertAllEqual(
            result.get_descendant_or_error(path.Path(["new_doc",
                                                      "bar"])).node.values,
            [b"b", b"c"])
Ejemplo n.º 20
0
    def map_field_values(self, source_path, operator, dtype, new_field_name):
        """Map a primitive field to create a new primitive field.

    Note: the dtype argument is added since the v1 API.

    Args:
      source_path: the origin path.
      operator: an element-wise operator that takes a 1-dimensional vector.
      dtype: the type of the output.
      new_field_name: the name of a new sibling of source_path.

    Returns:
      the resulting root expression.
    """
        return map_values.map_values(self, path.create_path(source_path),
                                     operator, dtype, new_field_name)
Ejemplo n.º 21
0
    def slice(self, source_path, new_field_name, begin=None, end=None):
        """Creates a slice copy of source_path at new_field_path.

    Note that if begin or end is negative, it is considered relative to
    the size of the array. e.g., slice(...,begin=-1) will get the last
    element of every array.

    Args:
      source_path: the source of the slice.
      new_field_name: the new field that is generated.
      begin: the beginning of the slice (inclusive).
      end: the end of the slice (exclusive).

    Returns:
      An Expression object representing the result of the operation.
    """
        return slice_expression.slice_expression(self,
                                                 path.create_path(source_path),
                                                 new_field_name, begin, end)
Ejemplo n.º 22
0
 def testField(self,
               shape,
               fields,
               path_to_check=None,
               parent_indices=None,
               root_size=None,
               values=None):
   struct = _make_structured_tensor(shape, fields)
   prensor = structured_tensor_to_prensor.structured_tensor_to_prensor(struct)
   if root_size is not None:
     self.assertAllEqual(root_size, prensor.node.size)
   if path_to_check is not None:
     my_path = path.create_path(path_to_check)
     descendant = prensor.get_descendant(my_path)
     self.assertIsNotNone(descendant)
     my_node = descendant.node
     if parent_indices is not None:
       self.assertAllEqual(my_node.parent_index, parent_indices)
     if values is not None:
       self.assertAllEqual(my_node.values, values)
Ejemplo n.º 23
0
 def create_has_field(self, source_path,
                      new_field_name):
   """Creates a field that is the presence of the source path."""
   return size.has(self, path.create_path(source_path), new_field_name)
Ejemplo n.º 24
0
 def reroot(self, new_root):
   """Returns a new list of protocol buffers available at new_root."""
   return reroot.reroot(self, path.create_path(new_root))
Ejemplo n.º 25
0
 def create_size_field(self, source_path,
                       new_field_name):
   """Creates a field that is the size of the source path."""
   return size.size(self, path.create_path(source_path), new_field_name)
Ejemplo n.º 26
0
 def promote_and_broadcast(
     self, path_dictionary,
     dest_path_parent):
   return promote_and_broadcast.promote_and_broadcast(
       self, {k: path.create_path(v) for k, v in path_dictionary.items()},
       path.create_path(dest_path_parent))
Ejemplo n.º 27
0
 def project(self, path_list):
   """Constrains the paths to those listed."""
   return project.project(self, [path.create_path(x) for x in path_list])
Ejemplo n.º 28
0
 def broadcast(self, source_path, sibling_field,
               new_field_name):
   """Broadcasts the existing field at source_path to the sibling_field."""
   return broadcast_module.broadcast(self, path.create_path(source_path),
                                     sibling_field, new_field_name)
Ejemplo n.º 29
0
 def promote(self, source_path, new_field_name):
   """Promotes source_path to be a field new_field_name in its grandparent."""
   return promote.promote(self, path.create_path(source_path), new_field_name)
Ejemplo n.º 30
0
def create_transformed_field(
        expr: expression.Expression, source_path: path.CoercableToPath,
        dest_field: StrStep,
        transform_fn: TransformFn) -> expression.Expression:
    """Create an expression that transforms serialized proto tensors.

  The transform_fn argument should take the form:

  def transform_fn(parent_indices, values):
    ...
    return (transformed_parent_indices, transformed_values)

  Given:
  - parent_indices: an int64 vector of non-decreasing parent message indices.
  - values: a string vector of serialized protos having the same shape as
    `parent_indices`.
  `transform_fn` must return new parent indices and serialized values encoding
  the same proto message as the passed in `values`.  These two vectors must
  have the same size, but it need not be the same as the input arguments.

  Args:
    expr: a source expression containing `source_path`.
    source_path: the path to the field to reverse.
    dest_field: the name of the newly created field. This field will be a
      sibling of the field identified by `source_path`.
    transform_fn: a callable that accepts parent_indices and serialized proto
      values and returns a posibly modified parent_indices and values.

  Returns:
    An expression.

  Raises:
    ValueError: if the source path is not a proto message field.
  """
    source_path = path.create_path(source_path)
    source_expr = expr.get_descendant_or_error(source_path)
    if not isinstance(source_expr, _ProtoChildExpression):
        raise ValueError(
            "Expected _ProtoChildExpression for field {}, but found {}.".
            format(str(source_path), source_expr))

    if isinstance(source_expr, _TransformProtoChildExpression):
        # In order to be able to propagate fields needed for parsing, the source
        # expression of _TransformProtoChildExpression must always be the original
        # _ProtoChildExpression before any transformation. This means that two
        # sequentially applied _TransformProtoChildExpression would have the same
        # source and would apply the transformation to the source directly, instead
        # of one transform operating on the output of the other.
        # To work around this, the user supplied transform function is wrapped to
        # first call the source's transform function.
        # The downside of this approach is that the initial transform may be
        # applied redundantly if there are other expressions derived directly
        # from it.
        def final_transform(parent_indices: tf.Tensor,
                            values: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]:
            parent_indices, values = source_expr.transform_fn(
                parent_indices, values)
            return transform_fn(parent_indices, values)
    else:
        final_transform = transform_fn

    transformed_expr = _TransformProtoChildExpression(
        parent=source_expr._parent,  # pylint: disable=protected-access
        desc=source_expr._desc,  # pylint: disable=protected-access
        is_repeated=source_expr.is_repeated,
        name_as_field=source_expr.name_as_field,
        transform_fn=final_transform)
    dest_path = source_path.get_parent().get_child(dest_field)
    return expression_add.add_paths(expr, {dest_path: transformed_expr})