Exemple #1
0
def _promote_impl(
        root: expression.Expression, p: path.Path,
        new_field_name: path.Step) -> Tuple[expression.Expression, path.Path]:
    """Promotes a path to be a child of its grandparent, and gives it a name.

  Args:
    root: The root expression.
    p: The path to promote. This can be the path to a leaf or child node.
    new_field_name: The name of the promoted field.

  Returns:
    An _AddPathsExpression that wraps a PromoteExpression.
  """
    if len(p) < 2:
        raise ValueError("Cannot do a promotion beyond the root: {}".format(
            str(p)))
    parent_path = p.get_parent()
    grandparent_path = parent_path.get_parent()

    p_expression = root.get_descendant_or_error(p)
    new_path = grandparent_path.get_child(new_field_name)

    if p_expression.is_leaf:
        promote_expression_factory = PromoteExpression
    else:
        promote_expression_factory = PromoteChildExpression

    return expression_add.add_paths(
        root, {
            new_path:
            promote_expression_factory(
                p_expression, root.get_descendant_or_error(parent_path))
        }), new_path
Exemple #2
0
def get_index_from_end(
        t: expression.Expression, source_path: path.Path,
        new_field_name: path.Step) -> Tuple[expression.Expression, path.Path]:
    """Gets the number of steps from the end of the array.

  Given an array ["a", "b", "c"], with indices [0, 1, 2], the result of this
  is [-3,-2,-1].

  Args:
    t: original expression
    source_path: path in expression to get index of.
    new_field_name: the name of the new field.

  Returns:
    The new expression and the new path as a pair.
  """
    new_path = source_path.get_parent().get_child(new_field_name)
    work_expr, positional_index_path = get_positional_index(
        t, source_path, path.get_anonymous_field())
    work_expr, size_path = size.size_anonymous(work_expr, source_path)
    work_expr = expression_add.add_paths(
        work_expr, {
            new_path:
            _PositionalIndexFromEndExpression(
                work_expr.get_descendant_or_error(positional_index_path),
                work_expr.get_descendant_or_error(size_path))
        })
    # Removing the intermediate anonymous nodes.
    result = expression_add.add_to(t, {new_path: work_expr})
    return result, new_path
def _promote_and_broadcast_name(root, origin, dest_path_parent, field_name):
    new_root, anonymous_path = promote_and_broadcast_anonymous(
        root, origin, dest_path_parent)
    path_result = dest_path_parent.get_child(field_name)
    return expression_add.add_paths(
        new_root,
        {path_result: new_root.get_descendant_or_error(anonymous_path)})
Exemple #4
0
def _broadcast_impl(root, origin, sibling, new_field_name):
    sibling_path = origin.get_parent().get_child(sibling)
    new_expr = _BroadcastExpression(
        root.get_descendant_or_error(origin),
        root.get_descendant_or_error(origin.get_parent().get_child(sibling)))
    new_path = sibling_path.get_child(new_field_name)
    return expression_add.add_paths(root, {new_path: new_expr}), new_path
Exemple #5
0
def filter_by_sibling(expr, p,
                      sibling_field_name,
                      new_field_name):
  """Filter an expression by its sibling.


  This is similar to boolean_mask. The shape of the path being filtered and
  the sibling must be identical (e.g., each parent object must have an
  equal number of source and sibling children).

  Args:
    expr: the root expression.
    p: a path to the source to be filtered.
    sibling_field_name: the sibling to use as a mask.
    new_field_name: a new sibling to create.

  Returns:
    a new root.
  """
  origin = expr.get_descendant_or_error(p)
  parent_path = p.get_parent()
  sibling = expr.get_descendant_or_error(
      parent_path.get_child(sibling_field_name))
  new_expr = _FilterBySiblingExpression(origin, sibling)
  new_path = parent_path.get_child(new_field_name)
  return expression_add.add_paths(expr, {new_path: new_expr})
def map_many_values(root, parent_path, source_fields, operation, dtype,
                    new_field_name):
    """Map multiple sibling fields into a new sibling.

  All source fields must have the same shape, and the shape of the output
  must be the same as well.

  Args:
    root: original root.
    parent_path: parent path of all sources and the new field.
    source_fields: source fields of the operation. Must have the same shape.
    operation: operation from source_fields to new field.
    dtype: type of new field.
    new_field_name: name of the new field.

  Returns:
    The new expression and the new path as a pair.
  """
    new_path = parent_path.get_child(new_field_name)
    return expression_add.add_paths(
        root, {
            new_path:
            _MapValuesExpression([
                root.get_descendant_or_error(parent_path.get_child(f))
                for f in source_fields
            ], operation, dtype)
        }), new_path
Exemple #7
0
 def test_calculate_root_indirect(self):
   """Calculates the value of a node with one source."""
   for options in options_to_test:
     tree = create_expression.create_expression_from_prensor(
         prensor_test_util.create_simple_prensor())
     tree_2 = expression_add.add_paths(tree, {})
     [root_value] = calculate.calculate_values([tree_2], options=options)
     self.assertAllEqual(root_value.size, 3)
def map_prensor_to_prensor(root_expr, source, paths_needed, prensor_op,
                           output_schema):
    r"""Maps an expression to a prensor, and merges that prensor.

  For example, suppose you have an op my_op, that takes a prensor of the form:

    event
     / \
   foo   bar

  and produces a prensor of the form my_result_schema:

     event
      / \
   foo2 bar2

  If you give it an expression original with the schema:

   session
      |
    event
    /  \
  foo   bar

  result = map_prensor_to_prensor(
    original,
    path.Path(["session","event"]),
    my_op,
    my_output_schema)

  Result will have the schema:

   session
      |
    event--------
    /  \    \    \
  foo   bar foo2 bar2

  Args:
    root_expr: the root expression
    source: the path where the prensor op is applied.
    paths_needed: the paths needed for the op.
    prensor_op: the prensor op
    output_schema: the output schema of the op.

  Returns:
    A new expression where the prensor is merged.
  """
    original_child = root_expr.get_descendant_or_error(source).project(
        paths_needed)
    prensor_child = _PrensorOpExpression(original_child, prensor_op,
                                         output_schema)
    paths_map = {
        source.get_child(k): prensor_child.get_child_or_error(k)
        for k in prensor_child.known_field_names()
    }
    result = expression_add.add_paths(root_expr, paths_map)
    return result
Exemple #9
0
 def test_add_to_already_existing_path(self):
     with self.assertRaises(ValueError):
         root = create_expression.create_expression_from_prensor(
             prensor_test_util.create_nested_prensor())
         root_1 = expression_add.add_paths(
             root, {
                 path.Path(["user", "friends_2"]):
                 root.get_descendant_or_error(path.Path(["user", "friends"
                                                         ]))
             })
         root_2 = expression_add.add_paths(
             root_1, {
                 path.Path(["user", "friends_3"]):
                 root_1.get_descendant_or_error(
                     path.Path(["user", "friends_2"]))
             })
         expression_add.add_to(root,
                               {path.Path(["user", "friends"]): root_2})
def _broadcast_impl(
        root: expression.Expression, origin: path.Path, sibling: path.Step,
        new_field_name: path.Step) -> Tuple[expression.Expression, path.Path]:
    sibling_path = origin.get_parent().get_child(sibling)
    new_expr = _BroadcastExpression(
        root.get_descendant_or_error(origin),
        root.get_descendant_or_error(origin.get_parent().get_child(sibling)))
    new_path = sibling_path.get_child(new_field_name)
    return expression_add.add_paths(root, {new_path: new_expr}), new_path
def _map_prensor_impl(root, root_path, paths_needed, operation, is_repeated,
                      dtype, new_field_name):
    """Map prensor implementation."""
    child_expr = root.get_descendant_or_error(root_path)
    sibling_child_expr = project.project(child_expr, paths_needed)
    new_field_expr = _MapPrensorExpression(sibling_child_expr, operation,
                                           is_repeated, dtype)
    new_path = root_path.get_child(new_field_name)
    return expression_add.add_paths(root, {new_path: new_field_expr}), new_path
Exemple #12
0
 def test_create_query_modify_and_calculate_event_value(self):
   """Calculating a child value in a proto tests dependencies."""
   for options in options_to_test:
     root = proto_test_util._get_expression_from_session_empty_user_info()
     root_2 = expression_add.add_paths(
         root, {path.Path(["event_copy"]): root.get_child_or_error("event")})
     [event_value
     ] = calculate.calculate_values([root_2.get_child_or_error("event_copy")],
                                    options=options)
     self.assertAllEqual(event_value.parent_index, [0, 0, 0, 1, 1])
Exemple #13
0
def _size_impl(root, source_path, new_field_name):
    if not source_path:
        raise ValueError("Cannot get the size of the root.")
    if root.get_descendant(source_path) is None:
        raise ValueError("Path not found: {}".format(str(source_path)))
    parent_path = source_path.get_parent()
    new_path = parent_path.get_child(new_field_name)
    return expression_add.add_paths(
        root, {
            new_path:
            SizeExpression(root.get_descendant_or_error(source_path),
                           root.get_descendant_or_error(parent_path))
        }), new_path
Exemple #14
0
def _promote_impl(root, p, new_field_name):
    if len(p) < 2:
        raise ValueError("Cannot do a promotion beyond the root: {}".format(
            str(p)))
    parent_path = p.get_parent()
    grandparent_path = parent_path.get_parent()
    new_path = grandparent_path.get_child(new_field_name)
    return expression_add.add_paths(
        root, {
            new_path:
            PromoteExpression(root.get_descendant_or_error(p),
                              root.get_descendant_or_error(parent_path))
        }), new_path
Exemple #15
0
def _map_prensor_impl(
        root: expression.Expression, root_path: path.Path,
        paths_needed: Sequence[path.Path],
        operation: Callable[[prensor.Prensor, calculate_options.Options],
                            prensor.LeafNodeTensor], is_repeated: bool,
        dtype: tf.DType,
        new_field_name: path.Step) -> Tuple[expression.Expression, path.Path]:
    """Map prensor implementation."""
    child_expr = root.get_descendant_or_error(root_path)
    sibling_child_expr = project.project(child_expr, paths_needed)
    new_field_expr = _MapPrensorExpression(sibling_child_expr, operation,
                                           is_repeated, dtype)
    new_path = root_path.get_child(new_field_name)
    return expression_add.add_paths(root, {new_path: new_field_expr}), new_path
Exemple #16
0
    def test_add_to(self):
        root = create_expression.create_expression_from_prensor(
            prensor_test_util.create_nested_prensor())
        root_1 = expression_add.add_paths(
            root, {
                path.Path(["user", "friends_2"]):
                root.get_descendant_or_error(path.Path(["user", "friends"]))
            })
        root_2 = expression_add.add_paths(
            root_1, {
                path.Path(["user", "friends_3"]):
                root_1.get_descendant_or_error(path.Path(["user", "friends_2"
                                                          ]))
            })
        root_3 = expression_add.add_to(
            root, {path.Path(["user", "friends_3"]): root_2})

        new_field = root_3.get_descendant_or_error(
            path.Path(["user", "friends_3"]))
        self.assertIsNotNone(new_field)
        self.assertTrue(new_field.is_repeated)
        self.assertEqual(new_field.type, tf.string)
        leaf_node = expression_test_util.calculate_value_slowly(new_field)
        self.assertEqual(leaf_node.values.dtype, tf.string)
Exemple #17
0
 def test_add_paths(self):
     expr = create_expression.create_expression_from_prensor(
         prensor_test_util.create_nested_prensor())
     new_root = expression_add.add_paths(
         expr, {
             path.Path(["user", "friends_copy"]):
             expr.get_descendant_or_error(path.Path(["user", "friends"]))
         })
     new_field = new_root.get_descendant_or_error(
         path.Path(["user", "friends_copy"]))
     self.assertIsNotNone(new_field)
     self.assertTrue(new_field.is_repeated)
     self.assertEqual(new_field.type, tf.string)
     self.assertTrue(new_field.is_leaf)
     leaf_node = expression_test_util.calculate_value_slowly(new_field)
     self.assertEqual(leaf_node.values.dtype, tf.string)
     self.assertEqual(new_field.known_field_names(), frozenset())
Exemple #18
0
def _broadcast_impl(
        root: expression.Expression, origin: path.Path, sibling: path.Step,
        new_field_name: path.Step) -> Tuple[expression.Expression, path.Path]:
    """Broadcasts origin to sibling for an expression."""
    sibling_path = origin.get_parent().get_child(sibling)

    origin_expression = root.get_descendant_or_error(origin)

    broadcast_expression_factory = (_BroadcastExpression
                                    if origin_expression.is_leaf else
                                    _BroadcastChildExpression)

    new_expr = broadcast_expression_factory(
        origin_expression,
        root.get_descendant_or_error(origin.get_parent().get_child(sibling)))
    new_path = sibling_path.get_child(new_field_name)
    result = expression_add.add_paths(root, {new_path: new_expr})

    return result, new_path
Exemple #19
0
def get_positional_index(expr, source_path, new_field_name):
    """Gets the positional index.

  Given a field with parent_index [0,1,1,2,3,4,4], this returns:
  parent_index [0,1,1,2,3,4,4] and value [0,0,1,0,0,0,1]

  Args:
    expr: original expression
    source_path: path in expression to get index of.
    new_field_name: the name of the new field.

  Returns:
    The new expression and the new path as a pair.
  """
    new_path = source_path.get_parent().get_child(new_field_name)
    return expression_add.add_paths(
        expr, {
            new_path:
            _PositionalIndexExpression(
                expr.get_descendant_or_error(source_path))
        }), new_path
Exemple #20
0
def filter_by_child(expr, p,
                    child_field_name,
                    new_field_name):
  """Filter an expression by an optional boolean child field.

  If the child field is present and True, then keep that parent.
  Otherwise, drop the parent.

  Args:
    expr: the original expression
    p: the path to filter.
    child_field_name: the boolean child field to use to filter.
    new_field_name: the new, filtered version of path.

  Returns:
    The new root expression.
  """
  origin = expr.get_descendant_or_error(p)
  child = origin.get_child_or_error(child_field_name)
  new_expr = _FilterByChildExpression(origin, child)
  new_path = p.get_parent().get_child(new_field_name)

  return expression_add.add_paths(expr, {new_path: new_expr})
Exemple #21
0
def create_proto_index_field(root, new_field_name):
    return expression_add.add_paths(
        root, {path.Path([new_field_name]): _InputProtoIndexExpression(root)})
Exemple #22
0
def create_transformed_field(
        expr: expression.Expression, source_path: path.CoercableToPath,
        dest_field: StrStep,
        transform_fn: TransformFn) -> expression.Expression:
    """Create an expression that transforms serialized proto tensors.

  The transform_fn argument should take the form:

  def transform_fn(parent_indices, values):
    ...
    return (transformed_parent_indices, transformed_values)

  Given:
  - parent_indices: an int64 vector of non-decreasing parent message indices.
  - values: a string vector of serialized protos having the same shape as
    `parent_indices`.
  `transform_fn` must return new parent indices and serialized values encoding
  the same proto message as the passed in `values`.  These two vectors must
  have the same size, but it need not be the same as the input arguments.

  Note:
    If CalculateOptions.use_string_view (set at calculate time, thus this
    Expression cannot know beforehand) is True, `values` passed to
    `transform_fn` are string views pointing all the way back to the original
    input tensor (of serialized root protos). And `transform_fn` must maintain
    such views and avoid creating new values that are either not string views
    into the root protos or self-owned strings. This is because downstream
    decoding ops will still produce string views referring into its input
    (which are string views into the root proto) and they will only hold a
    reference to the original, root proto tensor, keeping it alive. So the input
    tensor may get destroyed after the decoding op.

    In short, you can do element-wise transforms to `values`, but can't mutate
    the contents of elements in `values` or create new elements.

    To lift this restriction, a decoding op must be told to hold a reference
    of the input tensors of all its upstream decoding ops.


  Args:
    expr: a source expression containing `source_path`.
    source_path: the path to the field to reverse.
    dest_field: the name of the newly created field. This field will be a
      sibling of the field identified by `source_path`.
    transform_fn: a callable that accepts parent_indices and serialized proto
      values and returns a posibly modified parent_indices and values. Note that
      when CalcuateOptions.use_string_view is set, transform_fn should not have
      any stateful side effecting uses of serialized proto inputs. Doing so
      could cause segfaults as the backing string tensor lifetime is not
      guaranteed when the side effecting operations are run.

  Returns:
    An expression.

  Raises:
    ValueError: if the source path is not a proto message field.
  """
    source_path = path.create_path(source_path)
    source_expr = expr.get_descendant_or_error(source_path)
    if not isinstance(source_expr, _ProtoChildExpression):
        raise ValueError(
            "Expected _ProtoChildExpression for field {}, but found {}.".
            format(str(source_path), source_expr))

    if isinstance(source_expr, _TransformProtoChildExpression):
        # In order to be able to propagate fields needed for parsing, the source
        # expression of _TransformProtoChildExpression must always be the original
        # _ProtoChildExpression before any transformation. This means that two
        # sequentially applied _TransformProtoChildExpression would have the same
        # source and would apply the transformation to the source directly, instead
        # of one transform operating on the output of the other.
        # To work around this, the user supplied transform function is wrapped to
        # first call the source's transform function.
        # The downside of this approach is that the initial transform may be
        # applied redundantly if there are other expressions derived directly
        # from it.
        def final_transform(parent_indices: tf.Tensor,
                            values: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]:
            parent_indices, values = source_expr.transform_fn(
                parent_indices, values)
            return transform_fn(parent_indices, values)
    else:
        final_transform = transform_fn

    transformed_expr = _TransformProtoChildExpression(
        parent=source_expr._parent,  # pylint: disable=protected-access
        desc=source_expr._desc,  # pylint: disable=protected-access
        is_repeated=source_expr.is_repeated,
        name_as_field=source_expr.name_as_field,
        transform_fn=final_transform,
        backing_str_tensor=source_expr._backing_str_tensor)  # pylint: disable=protected-access
    dest_path = source_path.get_parent().get_child(dest_field)
    return expression_add.add_paths(expr, {dest_path: transformed_expr})
Exemple #23
0
def create_transformed_field(
        expr: expression.Expression, source_path: path.CoercableToPath,
        dest_field: StrStep,
        transform_fn: TransformFn) -> expression.Expression:
    """Create an expression that transforms serialized proto tensors.

  The transform_fn argument should take the form:

  def transform_fn(parent_indices, values):
    ...
    return (transformed_parent_indices, transformed_values)

  Given:
  - parent_indices: an int64 vector of non-decreasing parent message indices.
  - values: a string vector of serialized protos having the same shape as
    `parent_indices`.
  `transform_fn` must return new parent indices and serialized values encoding
  the same proto message as the passed in `values`.  These two vectors must
  have the same size, but it need not be the same as the input arguments.

  Args:
    expr: a source expression containing `source_path`.
    source_path: the path to the field to reverse.
    dest_field: the name of the newly created field. This field will be a
      sibling of the field identified by `source_path`.
    transform_fn: a callable that accepts parent_indices and serialized proto
      values and returns a posibly modified parent_indices and values.

  Returns:
    An expression.

  Raises:
    ValueError: if the source path is not a proto message field.
  """
    source_path = path.create_path(source_path)
    source_expr = expr.get_descendant_or_error(source_path)
    if not isinstance(source_expr, _ProtoChildExpression):
        raise ValueError(
            "Expected _ProtoChildExpression for field {}, but found {}.".
            format(str(source_path), source_expr))

    if isinstance(source_expr, _TransformProtoChildExpression):
        # In order to be able to propagate fields needed for parsing, the source
        # expression of _TransformProtoChildExpression must always be the original
        # _ProtoChildExpression before any transformation. This means that two
        # sequentially applied _TransformProtoChildExpression would have the same
        # source and would apply the transformation to the source directly, instead
        # of one transform operating on the output of the other.
        # To work around this, the user supplied transform function is wrapped to
        # first call the source's transform function.
        # The downside of this approach is that the initial transform may be
        # applied redundantly if there are other expressions derived directly
        # from it.
        def final_transform(parent_indices: tf.Tensor,
                            values: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]:
            parent_indices, values = source_expr.transform_fn(
                parent_indices, values)
            return transform_fn(parent_indices, values)
    else:
        final_transform = transform_fn

    transformed_expr = _TransformProtoChildExpression(
        parent=source_expr._parent,  # pylint: disable=protected-access
        desc=source_expr._desc,  # pylint: disable=protected-access
        is_repeated=source_expr.is_repeated,
        name_as_field=source_expr.name_as_field,
        transform_fn=final_transform)
    dest_path = source_path.get_parent().get_child(dest_field)
    return expression_add.add_paths(expr, {dest_path: transformed_expr})