def test_map_sparse_tensor_is_repeated(self): for options in options_to_test: expr = create_expression.create_expression_from_prensor( prensor_test_util.create_simple_prensor()) new_root = map_prensor.map_sparse_tensor(expr, path.Path([]), [path.Path(["foorepeated"])], lambda x: x * 2, True, tf.int32, "foorepeated_doubled") leaf_node = expression_test_util.calculate_value_slowly( new_root.get_descendant_or_error(path.Path(["foorepeated_doubled"])), options=options) self.assertAllEqual(leaf_node.parent_index, [0, 1, 1, 2]) self.assertAllEqual(leaf_node.values, [18, 16, 14, 12])
def test_promote_and_broadcast_anonymous(self): """A basic promote and broadcast.""" expr = create_expression.create_expression_from_prensor( prensor_test_util.create_big_prensor()) new_root, p = promote_and_broadcast.promote_and_broadcast_anonymous( expr, path.Path(["user", "friends"]), path.Path(["doc"])) new_field = new_root.get_descendant_or_error(p) self.assertTrue(new_field.is_repeated) self.assertEqual(new_field.type, tf.string) self.assertTrue(new_field.is_leaf) self.assertTrue(new_field.calculation_equal(new_field)) self.assertFalse(new_field.calculation_equal(expr)) leaf_node = expression_test_util.calculate_value_slowly(new_field) self.assertEqual(leaf_node.values.dtype, tf.string) self.assertEqual(new_field.known_field_names(), frozenset())
def _create_one_value_prensor(): """Creates a prensor expression representing a list of flat protocol buffers. Returns: a RootPrensor representing: {} {foo:8} {} """ return prensor.create_prensor_from_descendant_nodes({ path.Path([]): prensor_test_util.create_root_node(3), path.Path(["foo"]): prensor_test_util.create_optional_leaf_node([1], [8]) })
def test_map_ragged_tensor_repeated(self): for options in options_to_test: with self.session(use_gpu=False) as sess: expr = create_expression.create_expression_from_prensor( prensor_test_util.create_simple_prensor()) new_root = map_prensor.map_ragged_tensor( expr, path.Path([]), [path.Path(["foorepeated"])], lambda x: x * 2, False, tf.int32, "foorepeated_doubled") leaf_node = expression_test_util.calculate_value_slowly( new_root.get_descendant_or_error( path.Path(["foorepeated_doubled"])), options=options) [parent_index, values] = sess.run([leaf_node.parent_index, leaf_node.values]) self.assertAllEqual(parent_index, [0, 1, 1, 2]) self.assertAllEqual(values, [18, 16, 14, 12])
def _test_runner(options): with self.session(use_gpu=False) as sess: expr = create_expression.create_expression_from_prensor( prensor_test_util.create_simple_prensor()) new_root = map_prensor.map_sparse_tensor( expr, path.Path([]), [path.Path(["foo"])], lambda x: tf.sparse_concat(0, [x, x]), False, tf.int32, "foo_concat") leaf_node = expression_test_util.calculate_value_slowly( new_root.get_descendant_or_error(path.Path(["foo_concat" ])), options=options) sess.run([leaf_node.parent_index, leaf_node.values])
def test_get_descendants(self): expression = prensor_test_util.create_nested_prensor() expr = create_expression.create_expression_from_prensor(expression) expr_user_friends = expr.get_descendant_or_error( path.Path(["user", "friends"])) self.assertIs(expr_user_friends, expr.get_child_or_error("user").get_child_or_error("friends"))
def test_promote_substructure(self): """Tests promote.promote(...) of substructure.""" expr = create_expression.create_expression_from_prensor( prensor_test_util.create_deep_prensor()) new_root = promote.promote(expr, path.Path(["event", "doc"]), "new_field") new_field = new_root.get_child_or_error("new_field") self.assertIsNotNone(new_field) self.assertTrue(new_field.is_repeated) self.assertEqual(new_field.known_field_names(), frozenset(["bar", "keep_me"])) bar_expr = new_field.get_child_or_error("bar") self.assertIsNotNone(bar_expr) self.assertTrue(bar_expr.is_repeated) self.assertEqual(bar_expr.type, tf.string) self.assertTrue(bar_expr.is_leaf) keep_me_expr = new_field.get_child_or_error("keep_me") self.assertIsNotNone(keep_me_expr) self.assertFalse(keep_me_expr.is_repeated) self.assertEqual(keep_me_expr.type, tf.bool) self.assertTrue(keep_me_expr.is_leaf) child_node = expression_test_util.calculate_value_slowly(new_field) self.assertEqual(child_node.size, 3) self.assertTrue(child_node.is_repeated) bar_node = expression_test_util.calculate_value_slowly(bar_expr) self.assertEqual(bar_node.values.dtype, tf.string) keep_me_node = expression_test_util.calculate_value_slowly( keep_me_expr) self.assertEqual(keep_me_node.values.dtype, tf.bool)
def test_promote_optional_child_of_repeated(self): expr = create_expression.create_expression_from_prensor( prensor_test_util.create_nested_prensor()) new_root, new_field = promote.promote_anonymous( expr, path.Path(["doc", "keep_me"])) new_expr = new_root.get_descendant_or_error(new_field) self.assertTrue(new_expr.is_repeated)
def map_ragged_tensors(self, parent_path, source_fields, operator, is_repeated, dtype, new_field_name): """Maps a set of primitive fields of a message to a new field. Unlike map_field_values, this operation allows you to some degree reshape the field. For instance, you can take two optional fields and create a repeated field, or perform a reduce_sum on the last dimension of a repeated field and create an optional field. The key constraint is that the operator must return a sparse tensor of the correct dimension: i.e., a 2D sparse tensor if is_repeated is true, or a 1D sparse tensor if is_repeated is false. Moreover, the first dimension of the sparse tensor must be equal to the first dimension of the input tensor. Args: parent_path: the parent of the input and output fields. source_fields: the nonempty list of names of the source fields. operator: an operator that takes len(source_fields) sparse tensors and returns a sparse tensor of the appropriate shape. is_repeated: whether the output is repeated. dtype: the dtype of the result. new_field_name: the name of the resulting field. Returns: A new query. """ return map_prensor.map_ragged_tensor( self, path.create_path(parent_path), [path.Path([f]) for f in source_fields], operator, is_repeated, dtype, new_field_name)
def test_reroot_and_create_proto_index_deep(self): expr = create_expression.create_expression_from_prensor( prensor_test_util.create_deep_prensor()) new_root = reroot.reroot(expr, path.Path(["event", "doc"])) proto_index = reroot.create_proto_index_field( new_root, "proto_index").get_child("proto_index") new_field = new_root.get_child("bar") leaf_node = expression_test_util.calculate_value_slowly(new_field) proto_index_node = expression_test_util.calculate_value_slowly( proto_index) self.assertIsNotNone(new_field) self.assertTrue(new_field.is_repeated) self.assertEqual(new_field.type, tf.string) self.assertTrue(new_field.is_leaf) self.assertEqual(new_field.known_field_names(), frozenset()) self.assertEqual(leaf_node.values.dtype, tf.string) self.assertIsNotNone(proto_index) self.assertFalse(proto_index.is_repeated) self.assertEqual(proto_index.type, tf.int64) self.assertTrue(proto_index.is_leaf) self.assertEqual(proto_index.known_field_names(), frozenset()) self.assertEqual(proto_index_node.values.dtype, tf.int64) self.assertAllEqual([b"a", b"b", b"c", b"d"], leaf_node.values) self.assertAllEqual([0, 1, 1, 2], leaf_node.parent_index) self.assertAllEqual([0, 1, 1], proto_index_node.values) self.assertAllEqual([0, 1, 2], proto_index_node.parent_index)
def test_promote_with_schema_dense_fraction(self): """Test when min_fraction is not 1.""" s = prensor_test_util.create_big_prensor_schema() feature_dict = {feature.name: feature for feature in s.feature} user_feature = feature_dict["user"] user_feature.value_count.min = 3 user_feature.value_count.max = 3 user_feature.presence.min_fraction = 1 user_dict = { feature.name: feature for feature in user_feature.struct_domain.feature } friends_feature = user_dict["friends"] friends_feature.presence.min_fraction = 0.9 expr = create_expression.create_expression_from_prensor( prensor_test_util.create_big_prensor()).apply_schema(s) new_root, new_field = promote.promote_anonymous( expr, path.Path(["user", "friends"])) new_field = new_root.get_descendant_or_error(new_field) new_schema_feature = new_field.schema_feature self.assertIsNotNone(new_schema_feature) self.assertEqual(new_schema_feature.presence.min_fraction, 0.3)
def test_create_expression_from_proto_with_any(self): """Test an any field.""" expr = _get_expression_with_any() any_expr = expr.get_child_or_error("my_any") simple_expr = expr.get_descendant_or_error( path.Path([ "my_any", "(type.googleapis.com/struct2tensor.test.AllSimple)" ])) self.assertFalse(simple_expr.is_repeated) self.assertIsNone(simple_expr.type) self.assertFalse(simple_expr.is_leaf) self.assertFalse(simple_expr.calculation_is_identity()) self.assertTrue(simple_expr.calculation_equal(simple_expr)) self.assertFalse(simple_expr.calculation_equal(expr)) child_node = expression_test_util.calculate_value_slowly(simple_expr) self.assertEqual(child_node.parent_index.dtype, tf.int64) self.assertEqual( simple_expr.known_field_names(), frozenset({ "optional_string", "optional_uint64", "repeated_uint64", "repeated_int32", "repeated_string", "optional_int32", "optional_float", "repeated_int64", "optional_uint32", "repeated_float", "repeated_uint32", "optional_double", "optional_int64", "repeated_double" })) sources = simple_expr.get_source_expressions() self.assertLen(sources, 1) self.assertIs(any_expr, sources[0])
def test_create_expression_from_proto_and_calculate_event_id_value(self): """Tests get_sparse_tensors on a deep tree.""" expr = proto_test_util._get_expression_from_session_empty_user_info() event_id_value = expression_test_util.calculate_value_slowly( expr.get_descendant_or_error(path.Path(["event", "event_id"]))) self.assertAllEqual(event_id_value.parent_index, [0, 1, 2, 4]) self.assertAllEqual(event_id_value.values, [b"A", b"B", b"C", b"D"])
def test_apply_empty_schema(self): """Test that applying an empty schema does not filter out paths.""" expr = create_expression.create_expression_from_prensor( prensor_test_util.create_big_prensor()) expr2 = expr.apply_schema(schema_pb2.Schema()) foo_expr = expr2.get_descendant(path.Path(["foo"])) self.assertIsNotNone(foo_expr) foorepeated_expr = expr2.get_descendant(path.Path(["foorepeated"])) self.assertIsNotNone(foorepeated_expr) doc_bar_expr = expr2.get_descendant(path.Path(["doc", "bar"])) self.assertIsNotNone(doc_bar_expr) known_field_names = expr2.known_field_names() self.assertIn("doc", known_field_names) self.assertIn("foo", known_field_names) self.assertIn("foorepeated", known_field_names) self.assertIn("user", known_field_names)
def test_promote_and_calculate_substructure(self): """Tests promoting substructure on a tree with depth of 4.""" expr = create_expression.create_expression_from_prensor( prensor_test_util.create_four_layer_prensor()) new_root, new_path = promote.promote_anonymous( expr, path.Path(["event", "doc", "nested_child"])) new_nested_child = new_root.get_descendant_or_error(new_path) bar_expr = new_root.get_descendant_or_error(new_path.get_child("bar")) keep_me_expr = new_root.get_descendant_or_error( new_path.get_child("keep_me")) # the promoted nested_child's parent index is changed. nested_child_node = expression_test_util.calculate_value_slowly( new_nested_child) self.assertAllEqual(nested_child_node.parent_index, [0, 1, 1, 1]) self.assertTrue(nested_child_node.is_repeated) # bar's parent index should be unchanged. bar_node = expression_test_util.calculate_value_slowly(bar_expr) self.assertAllEqual(bar_node.parent_index, [0, 1, 1, 2]) self.assertAllEqual(bar_node.values, [b"a", b"b", b"c", b"d"]) self.assertTrue(bar_node.is_repeated) # keep_me's parent index should be unchanged. keep_me_node = expression_test_util.calculate_value_slowly( keep_me_expr) self.assertAllEqual(keep_me_node.parent_index, [0, 1]) self.assertAllEqual(keep_me_node.values, [False, True]) self.assertFalse(keep_me_node.is_repeated)
def test_is_leaf(self): """Tests get_sparse_tensors on a deep expression.""" expression = prensor_test_util.create_nested_prensor() self.assertTrue( expression.get_descendant_or_error(path.Path(["doc", "bar"])).is_leaf) self.assertFalse( expression.get_descendant_or_error(path.Path(["doc"])).is_leaf) self.assertFalse( expression.get_descendant_or_error(path.Path([])).is_leaf) self.assertTrue( expression.get_descendant_or_error(path.Path(["user", "friends"])).is_leaf) self.assertTrue( expression.get_descendant_or_error(path.Path(["doc", "keep_me"])).is_leaf)
def test_slice_and_project_mini(self): """Testing a part of query_test.test_slice_and_project. Originally, there was an error with query_test.test_slice_and_project, caused by filter_expression. I used this unit test to find and ultimately fix the error. """ root = _create_slice_and_project_example() root_2 = filter_expression.filter_by_sibling( root, path.Path(["event", "action"]), "action_mask", "taction") calculate_value = expression_test_util.calculate_value_slowly( root_2.get_descendant_or_error(path.Path(["event", "taction"]))) with self.session(use_gpu=False) as sess: value_indices = sess.run(calculate_value.parent_index) self.assertAllEqual(value_indices, [0, 1, 2, 4, 4])
def test_skip_eager_map_ragged_tensor_repeated(self): # This fails in eager, with an inconsistency in the ragged tensor. if tf.executing_eagerly(): return for options in options_to_test: expr = create_expression.create_expression_from_prensor( prensor_test_util.create_simple_prensor()) new_root = map_prensor.map_ragged_tensor( expr, path.Path([]), [path.Path(["foorepeated"])], lambda x: x * 2, False, tf.int32, "foorepeated_doubled") leaf_node = expression_test_util.calculate_value_slowly( new_root.get_descendant_or_error( path.Path(["foorepeated_doubled"])), options=options) self.assertAllEqual(leaf_node.parent_index, [0, 1, 1, 2]) self.assertAllEqual(leaf_node.values, [18, 16, 14, 12])
def create_simple_prensor() -> prensor.Prensor: """Creates a prensor expression representing a list of flat protocol buffers. Returns: a RootPrensor representing: {foo:9, foorepeated:[9]} {foo:8, foorepeated:[8,7]} {foo:7, foorepeated:[6]} """ return prensor.create_prensor_from_descendant_nodes({ path.Path([]): create_root_node(3), path.Path(["foo"]): create_optional_leaf_node([0, 1, 2], [9, 8, 7]), path.Path(["foorepeated"]): create_repeated_leaf_node([0, 1, 1, 2], [9, 8, 7, 6]) })
def test_map_values(self): with self.session(use_gpu=False) as sess: expr = create_expression.create_expression_from_prensor( prensor_test_util.create_simple_prensor()) new_root = map_values.map_values(expr, path.Path(["foo"]), lambda x: x * 2, tf.int64, "foo_doubled") leaf_node = expression_test_util.calculate_value_slowly( new_root.get_descendant_or_error(path.Path(["foo_doubled"]))) [parent_index, values] = sess.run([leaf_node.parent_index, leaf_node.values]) self.assertAllEqual(parent_index, [0, 1, 2]) self.assertAllEqual(values, [18, 16, 14])
def test_add_paths(self): expr = create_expression.create_expression_from_prensor( prensor_test_util.create_nested_prensor()) new_root = expression_add.add_paths( expr, { path.Path(["user", "friends_copy"]): expr.get_descendant_or_error(path.Path(["user", "friends"])) }) new_field = new_root.get_descendant_or_error( path.Path(["user", "friends_copy"])) self.assertIsNotNone(new_field) self.assertTrue(new_field.is_repeated) self.assertEqual(new_field.type, tf.string) self.assertTrue(new_field.is_leaf) leaf_node = expression_test_util.calculate_value_slowly(new_field) self.assertEqual(leaf_node.values.dtype, tf.string) self.assertEqual(new_field.known_field_names(), frozenset())
def test_transformed_field_values_with_transformed_parent( self, use_string_view): expr = proto_test_util._get_expression_from_session_empty_user_info() first_reversed_expr = proto.create_transformed_field( expr, path.Path(["event"]), "reversed_event", _reverse_values) second_reversed_expr = proto.create_transformed_field( first_reversed_expr, path.Path(["reversed_event", "action"]), "reversed_action", _reverse_values) result = expression_test_util.calculate_list_map( second_reversed_expr.project(["reversed_event.reversed_action.doc_id"]), self, options=self._get_calculate_options(use_string_view)) self.assertAllEqual(result["reversed_event.reversed_action.doc_id"], [[[[b"b"], [b"a"], []], [[b"c"]], [[b"f"], [b"e"]]], [[[b"g"], [b"j"]], [[b"i"], [b"h"]]]]) if use_string_view: self._check_string_view()
def test_add_to_already_existing_path(self): with self.assertRaises(ValueError): root = create_expression.create_expression_from_prensor( prensor_test_util.create_nested_prensor()) root_1 = expression_add.add_paths( root, { path.Path(["user", "friends_2"]): root.get_descendant_or_error(path.Path(["user", "friends" ])) }) root_2 = expression_add.add_paths( root_1, { path.Path(["user", "friends_3"]): root_1.get_descendant_or_error( path.Path(["user", "friends_2"])) }) expression_add.add_to(root, {path.Path(["user", "friends"]): root_2})
def test_create_has_field(self): expr = create_expression.create_expression_from_prensor( prensor_test_util.create_big_prensor()) new_root = expr.create_has_field("doc.keep_me", "result") new_field = new_root.get_descendant_or_error( path.Path(["doc", "result"])) leaf_node = expression_test_util.calculate_value_slowly(new_field) self.assertAllEqual(leaf_node.parent_index, [0, 1, 2]) self.assertAllEqual(leaf_node.values, [True, True, False])
def test_get_positional_index_calculate(self): expr = create_expression.create_expression_from_prensor( prensor_test_util.create_nested_prensor()) new_root, new_path = index.get_positional_index( expr, path.Path(["user", "friends"]), path.get_anonymous_field()) new_field = new_root.get_descendant_or_error(new_path) leaf_node = expression_test_util.calculate_value_slowly(new_field) self.assertAllEqual(leaf_node.parent_index, [0, 1, 1, 2, 3]) self.assertAllEqual(leaf_node.values, [0, 0, 1, 0, 0])
def test_size_anonymous(self): expr = create_expression.create_expression_from_prensor( prensor_test_util.create_big_prensor()) new_root, new_path = size.size_anonymous(expr, path.Path(["doc", "bar"])) new_field = new_root.get_descendant_or_error(new_path) leaf_node = expression_test_util.calculate_value_slowly(new_field) self.assertAllEqual(leaf_node.parent_index, [0, 1, 2]) self.assertAllEqual(leaf_node.values, [1, 2, 1])
def test_broadcast_substructure(self): """Tests broadcast of a submessage. The result of broadcasting `user` into `doc` looks like: { foo: 9, foorepeated: [9], doc: [{bar:["a"], keep_me:False, new_user: [{friends:["a"]}]}], user: [{friends:["a"]}] }, { foo: 8, foorepeated: [8, 7], doc: [ { bar: ["b","c"], keep_me: True, new_user: [{friends:["b", "c"]},{friends:["d"]}] }, { bar: ["d"], new_user: [{friends:["b", "c"]},{friends:["d"]}] } ], user: [{friends:["b", "c"]},{friends:["d"]}], }, { foo: 7, foorepeated: [6], user: [{friends:["e"]}] } """ expr = create_expression.create_expression_from_prensor( prensor_test_util.create_big_prensor()) new_root = broadcast.broadcast(expr, path.Path(["user"]), "doc", "new_user") new_user = new_root.get_child("doc").get_child("new_user") self.assertIsNotNone(new_user) self.assertTrue(new_user.is_repeated) self.assertIsNone(new_user.type) self.assertFalse(new_user.is_leaf) new_user_node = expression_test_util.calculate_value_slowly(new_user) self.assertAllEqual(new_user_node.parent_index, [0, 1, 1, 2, 2]) self.assertAllEqual(new_user_node.index_to_value, [0, 1, 2, 1, 2]) new_friends = new_user.get_child("friends") self.assertIsNotNone(new_friends) self.assertTrue(new_friends.is_repeated) self.assertEqual(new_friends.type, tf.string) self.assertTrue(new_friends.is_leaf) new_friends_node = expression_test_util.calculate_value_slowly(new_friends) self.assertEqual(new_friends_node.values.dtype, tf.string) self.assertAllEqual(new_friends_node.values, ["a", "b", "c", "d", "b", "c", "d"]) self.assertAllEqual(new_friends_node.parent_index, [0, 1, 1, 2, 3, 3, 4])
def get_promote_and_project_maps(features: List[Feature], is_context: bool): promote_map = {} project_map = {} if is_context: get_feature_path = get_context_feature_path get_promote_destination = lambda leaf_name: path.Path([leaf_name]) else: get_feature_path = get_example_feature_path get_promote_destination = lambda leaf_name: path.Path( # pylint: disable=g-long-lambda ['examples', leaf_name]) for feature in features: promote_map[get_step_name( feature.name)] = get_feature_path(feature) leaf_name = (get_step_name(feature.name) if feature.default_value is None else get_default_filled_step_name(feature.name)) project_map[feature.name] = get_promote_destination(leaf_name) return promote_map, project_map
def test_create_expression_from_proto_with_any(self): """Test an any field.""" expr = _get_expression_with_any() simple_expr = expr.get_descendant_or_error( path.Path([ "my_any", "(type.googleapis.com/struct2tensor.test.AllSimple)" ])) child_node = expression_test_util.calculate_value_slowly( simple_expr).parent_index self.assertAllEqual(child_node, [0, 2])
def test_promote_and_calculate(self): """Tests promoting a leaf on a nested tree.""" expr = create_expression.create_expression_from_prensor( prensor_test_util.create_nested_prensor()) new_root, new_path = promote.promote_anonymous( expr, path.Path(["user", "friends"])) new_field = new_root.get_descendant_or_error(new_path) leaf_node = expression_test_util.calculate_value_slowly(new_field) self.assertAllEqual(leaf_node.parent_index, [0, 1, 1, 1, 2]) self.assertAllEqual(leaf_node.values, [b"a", b"b", b"c", b"d", b"e"])