def test_promote_substructure(self): """Tests promote.promote(...) of substructure.""" expr = create_expression.create_expression_from_prensor( prensor_test_util.create_deep_prensor()) new_root = promote.promote(expr, path.Path(["event", "doc"]), "new_field") new_field = new_root.get_child_or_error("new_field") self.assertIsNotNone(new_field) self.assertTrue(new_field.is_repeated) self.assertEqual(new_field.known_field_names(), frozenset(["bar", "keep_me"])) bar_expr = new_field.get_child_or_error("bar") self.assertIsNotNone(bar_expr) self.assertTrue(bar_expr.is_repeated) self.assertEqual(bar_expr.type, tf.string) self.assertTrue(bar_expr.is_leaf) keep_me_expr = new_field.get_child_or_error("keep_me") self.assertIsNotNone(keep_me_expr) self.assertFalse(keep_me_expr.is_repeated) self.assertEqual(keep_me_expr.type, tf.bool) self.assertTrue(keep_me_expr.is_leaf) child_node = expression_test_util.calculate_value_slowly(new_field) self.assertEqual(child_node.size, 3) self.assertTrue(child_node.is_repeated) bar_node = expression_test_util.calculate_value_slowly(bar_expr) self.assertEqual(bar_node.values.dtype, tf.string) keep_me_node = expression_test_util.calculate_value_slowly( keep_me_expr) self.assertEqual(keep_me_node.values.dtype, tf.bool)
def test_slice_end(self): with self.session(use_gpu=False) as sess: root = create_expression.create_expression_from_prensor( prensor_test_util.create_big_prensor()) root_2 = slice_expression.slice_expression(root, path.Path(["doc"]), "new_doc", None, 1) result = prensor_value.materialize( calculate.calculate_prensors([root_2])[0], sess) self.assertAllEqual( result.get_descendant_or_error(path.Path( ["new_doc"])).node.parent_index, [0, 1]) self.assertAllEqual( result.get_descendant_or_error( path.Path(["new_doc", "keep_me"])).node.parent_index, [0, 1]) self.assertAllEqual( result.get_descendant_or_error( path.Path(["new_doc", "keep_me"])).node.values, [False, True]) self.assertAllEqual( result.get_descendant_or_error(path.Path( ["new_doc", "bar"])).node.parent_index, [0, 1, 1]) self.assertAllEqual( result.get_descendant_or_error(path.Path(["new_doc", "bar"])).node.values, [b"a", b"b", b"c"])
def test_promote_with_schema_dense_fraction(self): """Test when min_fraction is not 1.""" s = prensor_test_util.create_big_prensor_schema() feature_dict = {feature.name: feature for feature in s.feature} user_feature = feature_dict["user"] user_feature.value_count.min = 3 user_feature.value_count.max = 3 user_feature.presence.min_fraction = 1 user_dict = { feature.name: feature for feature in user_feature.struct_domain.feature } friends_feature = user_dict["friends"] friends_feature.presence.min_fraction = 0.9 expr = create_expression.create_expression_from_prensor( prensor_test_util.create_big_prensor()).apply_schema(s) new_root, new_field = promote.promote_anonymous( expr, path.Path(["user", "friends"])) new_field = new_root.get_descendant_or_error(new_field) new_schema_feature = new_field.schema_feature self.assertIsNotNone(new_schema_feature) self.assertEqual(new_schema_feature.presence.min_fraction, 0.3)
def test_promote_optional_child_of_repeated(self): expr = create_expression.create_expression_from_prensor( prensor_test_util.create_nested_prensor()) new_root, new_field = promote.promote_anonymous( expr, path.Path(["doc", "keep_me"])) new_expr = new_root.get_descendant_or_error(new_field) self.assertTrue(new_expr.is_repeated)
def test_calculate_tree_root_direct(self): """Calculates the value of a tree with no sources.""" for options in options_to_test: tree = create_expression.create_expression_from_prensor( prensor_test_util.create_simple_prensor()) [new_expr] = calculate.calculate_prensors([tree], options=options) self.assertAllEqual(new_expr.node.size, 3)
def test_filter_by_child_create_nested_prensor_2(self): """Tests filter_by_child. In particular, it checks for the case where parent_index != self index. """ root = create_expression.create_expression_from_prensor( _create_nested_prensor_2()) root_2 = filter_expression.filter_by_child(root, path.create_path("doc"), "keep_me", "new_doc") [result] = calculate.calculate_prensors([root_2]) self.assertAllEqual( result.get_descendant_or_error(path.Path(["new_doc" ])).node.parent_index, [1]) self.assertAllEqual( result.get_descendant_or_error(path.Path(["new_doc", "keep_me" ])).node.parent_index, [0]) self.assertAllEqual( result.get_descendant_or_error(path.Path(["new_doc", "keep_me"])).node.values, [True]) self.assertAllEqual( result.get_descendant_or_error(path.Path(["new_doc", "bar" ])).node.parent_index, [0, 0]) self.assertAllEqual( result.get_descendant_or_error(path.Path(["new_doc", "bar"])).node.values, [b"b", b"c"])
def test_promote_and_calculate_substructure(self): """Tests promoting substructure on a tree with depth of 4.""" expr = create_expression.create_expression_from_prensor( prensor_test_util.create_four_layer_prensor()) new_root, new_path = promote.promote_anonymous( expr, path.Path(["event", "doc", "nested_child"])) new_nested_child = new_root.get_descendant_or_error(new_path) bar_expr = new_root.get_descendant_or_error(new_path.get_child("bar")) keep_me_expr = new_root.get_descendant_or_error( new_path.get_child("keep_me")) # the promoted nested_child's parent index is changed. nested_child_node = expression_test_util.calculate_value_slowly( new_nested_child) self.assertAllEqual(nested_child_node.parent_index, [0, 1, 1, 1]) self.assertTrue(nested_child_node.is_repeated) # bar's parent index should be unchanged. bar_node = expression_test_util.calculate_value_slowly(bar_expr) self.assertAllEqual(bar_node.parent_index, [0, 1, 1, 2]) self.assertAllEqual(bar_node.values, [b"a", b"b", b"c", b"d"]) self.assertTrue(bar_node.is_repeated) # keep_me's parent index should be unchanged. keep_me_node = expression_test_util.calculate_value_slowly( keep_me_expr) self.assertAllEqual(keep_me_node.parent_index, [0, 1]) self.assertAllEqual(keep_me_node.values, [False, True]) self.assertFalse(keep_me_node.is_repeated)
def test_get_schema_missing_features(self): # The expr has a number of features: foo, foorepeated, doc, user. expr = create_expression.create_expression_from_prensor( prensor_test_util.create_big_prensor()) # The schema has only a subset of the features on the expr. schema = schema_pb2.Schema() feature = schema.feature.add() feature.name = "foo" feature.type = schema_pb2.FeatureType.INT feature.value_count.min = 1 feature.value_count.max = 1 feature = schema.feature.add() feature.name = "foorepeated" feature.type = schema_pb2.FeatureType.INT feature.value_count.min = 0 feature.value_count.max = 5 feature = schema.feature.add() feature.name = "doc" feature.type = schema_pb2.FeatureType.STRUCT feature.struct_domain.feature.append( schema_pb2.Feature(name="keep_me", type=schema_pb2.FeatureType.INT)) # By default, the output schema has all features present in the expr. expr = expr.apply_schema(schema) output_schema = expr.get_schema() self.assertNotEqual(schema, output_schema) self.assertLen(schema.feature, 3) self.assertLen(output_schema.feature, 4) # With create_schema_features = False, only features on the original schema # propogate to the new schema. output_schema = expr.get_schema(create_schema_features=False) self.assertLen(output_schema.feature, 3)
def test_filter_by_child_create_nested_prensor(self): """Tests filter_by_child.""" with self.session(use_gpu=False) as sess: root = create_expression.create_expression_from_prensor( _create_nested_prensor()) root_2 = filter_expression.filter_by_child(root, path.create_path("doc"), "keep_me", "new_doc") result = prensor_value.materialize( calculate.calculate_prensors([root_2])[0], sess) self.assertAllEqual( result.get_descendant_or_error(path.Path( ["new_doc"])).node.parent_index, [1]) self.assertAllEqual( result.get_descendant_or_error( path.Path(["new_doc", "keep_me"])).node.parent_index, [0]) self.assertAllEqual( result.get_descendant_or_error( path.Path(["new_doc", "keep_me"])).node.values, [True]) self.assertAllEqual( result.get_descendant_or_error(path.Path( ["new_doc", "bar"])).node.parent_index, [0, 0]) self.assertAllEqual( result.get_descendant_or_error(path.Path(["new_doc", "bar"])).node.values, [b"b", b"c"])
def test_get_descendants(self): expression = prensor_test_util.create_nested_prensor() expr = create_expression.create_expression_from_prensor(expression) expr_user_friends = expr.get_descendant_or_error( path.Path(["user", "friends"])) self.assertIs(expr_user_friends, expr.get_child_or_error("user").get_child_or_error("friends"))
def test_apply_schema(self): expr = create_expression.create_expression_from_prensor( prensor_test_util.create_big_prensor()) expr2 = expr.apply_schema( prensor_test_util.create_big_prensor_schema()) foo_expr = expr2.get_descendant(path.Path(["foo"])) self.assertIsNotNone(foo_expr) foorepeated_expr = expr2.get_descendant(path.Path(["foorepeated"])) self.assertIsNotNone(foorepeated_expr) doc_bar_expr = expr2.get_descendant(path.Path(["doc", "bar"])) self.assertIsNotNone(doc_bar_expr) # Test that a domain already in the feature is maintained. self.assertEqual(foo_expr.schema_feature.int_domain.max, 10) # Test that an int_domain specified at the schema level is inserted # correctly. self.assertEqual(foorepeated_expr.schema_feature.int_domain.max, 10) # Test that a string_domain specified at the schema level is inserted # correctly. self.assertEqual(doc_bar_expr.schema_feature.string_domain.value[0], "a") self.assertIsNotNone( expr2.get_descendant(path.Path(["user", "friends"]))) self.assertIsNotNone( expr2.get_descendant(path.Path(["doc", "keep_me"])))
def test_promote_substructure_with_schema(self): expr = create_expression.create_expression_from_prensor( prensor_test_util.create_deep_prensor()).apply_schema( prensor_test_util.create_deep_prensor_schema()) original_schema = expr.get_descendant_or_error( path.Path(["event", "doc"])).schema_feature new_root, new_field_path = promote.promote_anonymous( expr, path.Path(["event", "doc"])) new_field = new_root.get_descendant_or_error(new_field_path) new_schema_feature = new_field.schema_feature self.assertIsNotNone(new_schema_feature) # The struct_domain of this feature should not be changed. self.assertProtoEquals(new_schema_feature.struct_domain, original_schema.struct_domain) bar_schema = new_root.get_descendant_or_error( new_field_path.concat(path.Path(["bar"]))).schema_feature self.assertIsNotNone(bar_schema) self.assertEqual(bar_schema.string_domain.value[0], "a") keep_me_schema = new_root.get_descendant_or_error( new_field_path.concat(path.Path(["keep_me"]))).schema_feature self.assertIsNotNone(keep_me_schema) self.assertEqual(keep_me_schema.presence.min_count, 1)
def test_reroot_and_create_proto_index(self): expr = create_expression.create_expression_from_prensor( prensor_test_util.create_big_prensor()).reroot( "doc").create_proto_index("proto_index") proto_index = expr.get_child("proto_index") new_field = expr.get_child("bar") leaf_node = expression_test_util.calculate_value_slowly(new_field) proto_index_node = expression_test_util.calculate_value_slowly( proto_index) self.assertIsNotNone(new_field) self.assertTrue(new_field.is_repeated) self.assertEqual(new_field.type, tf.string) self.assertTrue(new_field.is_leaf) self.assertEqual(new_field.known_field_names(), frozenset()) self.assertEqual(leaf_node.values.dtype, tf.string) self.assertIsNotNone(proto_index) self.assertFalse(proto_index.is_repeated) self.assertEqual(proto_index.type, tf.int64) self.assertTrue(proto_index.is_leaf) self.assertEqual(proto_index.known_field_names(), frozenset()) self.assertEqual(proto_index_node.values.dtype, tf.int64) self.assertAllEqual([b"a", b"b", b"c", b"d"], leaf_node.values) self.assertAllEqual([0, 1, 1, 2], leaf_node.parent_index) self.assertAllEqual([0, 1, 1], proto_index_node.values) self.assertAllEqual([0, 1, 2], proto_index_node.parent_index)
def test_map_many_values(self): with self.session(use_gpu=False) as sess: expr = create_expression.create_expression_from_prensor( prensor.create_prensor_from_descendant_nodes({ path.Path([]): prensor_test_util.create_root_node(3), path.Path(["foo"]): prensor_test_util.create_optional_leaf_node([0, 2, 3], [9, 8, 7]), path.Path(["bar"]): prensor_test_util.create_optional_leaf_node([0, 2, 3], [10, 20, 30]) })) new_root, p = map_values.map_many_values(expr, path.Path([]), ["foo", "bar"], lambda x, y: x + y, tf.int64, "new_field") leaf_node = expression_test_util.calculate_value_slowly( new_root.get_descendant_or_error(p)) [parent_index, values] = sess.run([leaf_node.parent_index, leaf_node.values]) self.assertAllEqual(parent_index, [0, 2, 3]) self.assertAllEqual(values, [19, 28, 37])
def test_filter_by_child(self): """Tests filter_by_child.""" root = create_expression.create_expression_from_prensor( prensor_test_util.create_big_prensor()) root_2 = filter_expression.filter_by_child(root, path.create_path("doc"), "keep_me", "new_doc") [result] = calculate.calculate_prensors([root_2]) self.assertAllEqual( result.get_descendant_or_error(path.Path(["new_doc" ])).node.parent_index, [1]) self.assertAllEqual( result.get_descendant_or_error(path.Path(["new_doc", "keep_me" ])).node.parent_index, [0]) self.assertAllEqual( result.get_descendant_or_error(path.Path(["new_doc", "keep_me"])).node.values, [True]) self.assertAllEqual( result.get_descendant_or_error(path.Path(["new_doc", "bar" ])).node.parent_index, [0, 0]) self.assertAllEqual( result.get_descendant_or_error(path.Path(["new_doc", "bar"])).node.values, [b"b", b"c"])
def test_calculate_root_indirect(self): """Calculates the value of a node with one source.""" for options in options_to_test: tree = create_expression.create_expression_from_prensor( prensor_test_util.create_simple_prensor()) tree_2 = expression_add.add_paths(tree, {}) [root_value] = calculate.calculate_values([tree_2], options=options) self.assertAllEqual(root_value.size, 3)
def test_size_anonymous(self): expr = create_expression.create_expression_from_prensor( prensor_test_util.create_big_prensor()) new_root, new_path = size.size_anonymous(expr, path.Path(["doc", "bar"])) new_field = new_root.get_descendant_or_error(new_path) leaf_node = expression_test_util.calculate_value_slowly(new_field) self.assertAllEqual(leaf_node.parent_index, [0, 1, 2]) self.assertAllEqual(leaf_node.values, [1, 2, 1])
def test_get_positional_index_calculate(self): expr = create_expression.create_expression_from_prensor( prensor_test_util.create_nested_prensor()) new_root, new_path = index.get_positional_index( expr, path.Path(["user", "friends"]), path.get_anonymous_field()) new_field = new_root.get_descendant_or_error(new_path) leaf_node = expression_test_util.calculate_value_slowly(new_field) self.assertAllEqual(leaf_node.parent_index, [0, 1, 1, 2, 3]) self.assertAllEqual(leaf_node.values, [0, 0, 1, 0, 0])
def test_get_known_descendants(self): expression = prensor_test_util.create_nested_prensor() expr = create_expression.create_expression_from_prensor(expression) expr_map = expr.get_known_descendants() self.assertIn(path.Path(["doc"]), expr_map) self.assertIn(path.Path(["doc", "bar"]), expr_map) self.assertIn(path.Path(["doc", "keep_me"]), expr_map) self.assertIn(path.Path(["user"]), expr_map) self.assertIn(path.Path(["user", "friends"]), expr_map)
def test_create_has_field(self): expr = create_expression.create_expression_from_prensor( prensor_test_util.create_big_prensor()) new_root = expr.create_has_field("doc.keep_me", "result") new_field = new_root.get_descendant_or_error( path.Path(["doc", "result"])) leaf_node = expression_test_util.calculate_value_slowly(new_field) self.assertAllEqual(leaf_node.parent_index, [0, 1, 2]) self.assertAllEqual(leaf_node.values, [True, True, False])
def test_size_missing_value(self): expr = create_expression.create_expression_from_prensor( prensor_test_util.create_big_prensor()) new_root = size.size(expr, path.Path(["doc", "keep_me"]), "result") new_field = new_root.get_descendant_or_error( path.Path(["doc", "result"])) leaf_node = expression_test_util.calculate_value_slowly(new_field) self.assertAllEqual(leaf_node.parent_index, [0, 1, 2]) self.assertAllEqual(leaf_node.values, [1, 1, 0])
def test_broadcast_substructure(self): """Tests broadcast of a submessage. The result of broadcasting `user` into `doc` looks like: { foo: 9, foorepeated: [9], doc: [{bar:["a"], keep_me:False, new_user: [{friends:["a"]}]}], user: [{friends:["a"]}] }, { foo: 8, foorepeated: [8, 7], doc: [ { bar: ["b","c"], keep_me: True, new_user: [{friends:["b", "c"]},{friends:["d"]}] }, { bar: ["d"], new_user: [{friends:["b", "c"]},{friends:["d"]}] } ], user: [{friends:["b", "c"]},{friends:["d"]}], }, { foo: 7, foorepeated: [6], user: [{friends:["e"]}] } """ expr = create_expression.create_expression_from_prensor( prensor_test_util.create_big_prensor()) new_root = broadcast.broadcast(expr, path.Path(["user"]), "doc", "new_user") new_user = new_root.get_child("doc").get_child("new_user") self.assertIsNotNone(new_user) self.assertTrue(new_user.is_repeated) self.assertIsNone(new_user.type) self.assertFalse(new_user.is_leaf) new_user_node = expression_test_util.calculate_value_slowly(new_user) self.assertAllEqual(new_user_node.parent_index, [0, 1, 1, 2, 2]) self.assertAllEqual(new_user_node.index_to_value, [0, 1, 2, 1, 2]) new_friends = new_user.get_child("friends") self.assertIsNotNone(new_friends) self.assertTrue(new_friends.is_repeated) self.assertEqual(new_friends.type, tf.string) self.assertTrue(new_friends.is_leaf) new_friends_node = expression_test_util.calculate_value_slowly(new_friends) self.assertEqual(new_friends_node.values.dtype, tf.string) self.assertAllEqual(new_friends_node.values, ["a", "b", "c", "d", "b", "c", "d"]) self.assertAllEqual(new_friends_node.parent_index, [0, 1, 1, 2, 3, 3, 4])
def test_promote_and_calculate(self): """Tests promoting a leaf on a nested tree.""" expr = create_expression.create_expression_from_prensor( prensor_test_util.create_nested_prensor()) new_root, new_path = promote.promote_anonymous( expr, path.Path(["user", "friends"])) new_field = new_root.get_descendant_or_error(new_path) leaf_node = expression_test_util.calculate_value_slowly(new_field) self.assertAllEqual(leaf_node.parent_index, [0, 1, 1, 1, 2]) self.assertAllEqual(leaf_node.values, [b"a", b"b", b"c", b"d", b"e"])
def test_broadcast_and_calculate(self): """Tests get_sparse_tensors on a deep tree.""" expr = create_expression.create_expression_from_prensor( prensor_test_util.create_big_prensor()) new_root, new_path = broadcast.broadcast_anonymous( expr, path.Path(["foo"]), "user") new_field = new_root.get_descendant_or_error(new_path) leaf_node = expression_test_util.calculate_value_slowly(new_field) self.assertAllEqual(leaf_node.parent_index, [0, 1, 2, 3]) self.assertAllEqual(leaf_node.values, [9, 8, 8, 7])
def test_get_schema_no_schema(self): expr = create_expression.create_expression_from_prensor( prensor_test_util.create_big_prensor()) output_schema = expr.get_schema() self.assertLen(output_schema.feature, 4) # With create_schema_features = False, the schema will be empty. output_schema = expr.get_schema(create_schema_features=False) self.assertEmpty(output_schema.feature) self.assertEqual(schema_pb2.Schema(), output_schema)
def test_calculate_tree_root_direct(self): """Calculates the value of a tree with no sources.""" for options in options_to_test: with self.session(use_gpu=False) as sess: tree = create_expression.create_expression_from_prensor( prensor_test_util.create_simple_prensor()) [new_expr] = calculate.calculate_prensors([tree], options=options) size_result = sess.run(new_expr.node.size) self.assertAllEqual(size_result, 3)
def test_project(self): expr = create_expression.create_expression_from_prensor( prensor_test_util.create_nested_prensor()) projected = expr.project( [path.Path(["user", "friends"]), path.Path(["doc", "keep_me"])]) self.assertIsNotNone( projected.get_descendant(path.Path(["user", "friends"]))) self.assertIsNotNone( projected.get_descendant(path.Path(["doc", "keep_me"]))) self.assertIsNone(projected.get_descendant(path.Path(["doc", "bar"])))
def test_calculate_promote_anonymous(self): """Performs promote_test.PromoteValuesTest, but with calculate_values.""" for options in options_to_test: expr = create_expression.create_expression_from_prensor( prensor_test_util.create_nested_prensor()) new_root, new_path = promote.promote_anonymous( expr, path.Path(["user", "friends"])) new_field = new_root.get_descendant_or_error(new_path) [leaf_node] = calculate.calculate_values([new_field], options=options) self.assertAllEqual(leaf_node.parent_index, [0, 1, 1, 1, 2]) self.assertAllEqual(leaf_node.values, [b"a", b"b", b"c", b"d", b"e"])
def test_promote_with_schema(self): expr = create_expression.create_expression_from_prensor( prensor_test_util.create_big_prensor()).apply_schema( prensor_test_util.create_big_prensor_schema()) new_root, new_field = promote.promote_anonymous( expr, path.Path(["user", "friends"])) new_field = new_root.get_descendant_or_error(new_field) new_schema_feature = new_field.schema_feature self.assertIsNotNone(new_schema_feature) self.assertEqual(new_schema_feature.string_domain.value[0], "a")
def test_create_size_field(self): with self.session(use_gpu=False) as sess: expr = create_expression.create_expression_from_prensor( prensor_test_util.create_big_prensor()) new_root = expr.create_size_field("doc.bar", "result") new_field = new_root.get_descendant_or_error(path.Path(["doc", "result"])) leaf_node = expression_test_util.calculate_value_slowly(new_field) [parent_index, values] = sess.run([leaf_node.parent_index, leaf_node.values]) self.assertAllEqual(parent_index, [0, 1, 2]) self.assertAllEqual(values, [1, 2, 1])