def test_find_possible_splits_10(self): rule_a = qcfg_rule.rule_from_string("jump and NT_1 ### jump NT_1") rule_b = qcfg_rule.rule_from_string("NT_1 and NT_2 ### NT_1 NT_2") ruleset = {rule_b} splits = split_utils.find_possible_splits(rule_a, ruleset) expected = qcfg_rule.rule_from_string("jump ### jump") self.assertEqual({expected}, splits)
def test_can_derive_false_3(self): goal_rule = qcfg_rule.rule_from_string( "what are the major cities in the NT_1 through which the major river in NT_2 runs ### answer ( intersection ( major , intersection ( city , loc_2 ( intersection ( NT_1 , traverse_1 ( intersection ( major , intersection ( river , loc_2 ( NT_2 ) ) ) ) ) ) ) ) )" ) rules = { qcfg_rule.rule_from_string("river ### river"), qcfg_rule.rule_from_string( "river ### intersection ( river , NT_1 )"), qcfg_rule.rule_from_string("cities ### city"), qcfg_rule.rule_from_string( "what are the NT_1 ### answer ( NT_1 )"), qcfg_rule.rule_from_string( "NT_1 in NT_2 ### intersection ( NT_1 , loc_2 ( NT_2 ) )"), qcfg_rule.rule_from_string( "what are the major NT_1 ### answer ( intersection ( major , NT_1 ) )" ), qcfg_rule.rule_from_string( "NT_1 in the NT_2 ### intersection ( NT_1 , loc_2 ( NT_2 ) )"), qcfg_rule.rule_from_string( "river in NT_1 ### intersection ( river , loc_2 ( NT_1 ) )"), qcfg_rule.rule_from_string( "the NT_1 NT_2 ### intersection ( NT_1 , NT_2 )") } derived_rules = set() can_derive = derivation_utils.can_derive(goal_rule, rules, derived_rules) print("derived_rules: %s" % derived_rules) self.assertFalse(can_derive)
def test_get_node_tensors(self): source = "foo bar" target = "foo bar" rules = [ qcfg_rule.rule_from_string("foo NT_1 ### foo NT_1"), qcfg_rule.rule_from_string("bar ### bar"), qcfg_rule.rule_from_string("foo bar ### foo bar"), ] target_node = parsing_utils.get_target_node(source, target, rules) num_tokens = 2 (node_type_list, node_1_idx_list, node_2_idx_list, application_idx_list, num_nodes) = forest_serialization.get_forest_lists( target_node, num_tokens, _mock_application_idx_fn) print(node_type_list, "node_type_list") print(node_1_idx_list, "node_1_idx_list") print(node_2_idx_list, "node_2_idx_list") print(application_idx_list, "application_idx_list") print(num_nodes, "num_nodes") self.assertLen(node_type_list, num_nodes) self.assertLen(node_1_idx_list, num_nodes) self.assertLen(node_2_idx_list, num_nodes) self.assertLen(application_idx_list, num_nodes) self.assertEqual(num_nodes, 4) self.assertEqual(node_type_list, [1, 1, 1, 2]) self.assertEqual(application_idx_list, [1, 1, 1, -1]) self.assertEqual(node_1_idx_list, [-1, 0, -1, 2]) self.assertEqual(node_2_idx_list, [-1, -1, -1, 1])
def test_find_possible_splits_11(self): rule_a = qcfg_rule.rule_from_string( "NT_1 right twice ### I_TURN_RIGHT NT_1 I_TURN_RIGHT NT_1") rule_b = qcfg_rule.rule_from_string("NT_1 right ### I_TURN_RIGHT NT_1") ruleset = {rule_b} splits = split_utils.find_possible_splits(rule_a, ruleset) expected = qcfg_rule.rule_from_string("NT_1 twice ### NT_1 NT_1") self.assertEqual({expected}, splits)
def test_parse(self): tokens = ["dax", "twice"] rules = [ qcfg_rule.rule_from_string("dax ### DAX"), qcfg_rule.rule_from_string("NT_1 twice ### NT_1 NT_1"), ] parses = qcfg_parser.parse(tokens, rules, _node_fn, _postprocess_cell_fn) self.assertEqual(parses, ["DAX DAX"])
def test_can_parse(self): rules = [ qcfg_rule.rule_from_string("dax ### DAX"), qcfg_rule.rule_from_string("NT_1 twice ### NT_1 NT_1"), ] can_parse = qcfg_parser.can_parse(source="dax twice", target="DAX DAX", rules=rules) self.assertTrue(can_parse)
def test_find_possible_splits_7(self): rule_a = qcfg_rule.rule_from_string( "NT_1 ' s NT_2 director ### NT_2 & film.director.film ( NT_1 )") rule_b = qcfg_rule.rule_from_string("NT_1 ' s NT_2 ### NT_2 ( NT_1 )") ruleset = {rule_b} splits = split_utils.find_possible_splits(rule_a, ruleset) expected = qcfg_rule.rule_from_string( "NT_1 director ### NT_1 & film.director.film") self.assertEqual({expected}, splits)
def test_find_possible_splits_no_split(self): rule_a = qcfg_rule.rule_from_string( "NT_1 named NT_2 ### intersection ( NT_1 , NT_2 )") rule_b = qcfg_rule.rule_from_string( "NT_1 have a major city named NT_2 ### intersection ( NT_1 , loc_1 ( intersection ( major , intersection ( city , NT_2 ) ) ) )" ) ruleset = {rule_a} splits = split_utils.find_possible_splits(rule_b, ruleset) self.assertEmpty(splits)
def test_find_possible_splits_1(self): rule_a = qcfg_rule.rule_from_string("foo NT_1 ### food NT_1") rule_b = qcfg_rule.rule_from_string("bar ### bar") rule_c = qcfg_rule.rule_from_string("foo bar ### foo bar") ruleset = {rule_a, rule_b} splits = split_utils.find_possible_splits(rule_c, ruleset) expected = qcfg_rule.rule_from_string("foo NT_1 ### foo NT_1") self.assertEqual({expected}, splits)
def test_find_possible_splits_6(self): rule_a = qcfg_rule.rule_from_string( "written and edited ### film.film.edited_by , film.film.written_by" ) rule_b = qcfg_rule.rule_from_string( "NT_1 and edited ### film.film.edited_by , NT_1") ruleset = {rule_b} splits = split_utils.find_possible_splits(rule_a, ruleset) expected = qcfg_rule.rule_from_string( "written ### film.film.written_by") self.assertEqual({expected}, splits)
def test_find_possible_splits_5(self): rule_a = qcfg_rule.rule_from_string("Dutch ### nat ( m_059j2 )") rule_b = qcfg_rule.rule_from_string( "Who influenced M2 ' s Dutch child ### answer ( a(person) & influenced ( nat ( m_059j2 ) & parent ( M2 ) ) )" ) ruleset = {rule_a} splits = split_utils.find_possible_splits(rule_b, ruleset) expected = qcfg_rule.rule_from_string( "Who influenced M2 ' s NT_1 child ### answer ( a(person) & influenced ( NT_1 & parent ( M2 ) ) )" ) self.assertEqual({expected}, splits)
def test_can_derive_1(self): goal_rule = qcfg_rule.rule_from_string("foo bar ### foo bar") rules = { qcfg_rule.rule_from_string("foo NT_1 ### foo NT_1"), qcfg_rule.rule_from_string("bar ### bar"), } derived_rules = set() can_derive = derivation_utils.can_derive(goal_rule, rules, derived_rules) print("derived_rules: %s" % derived_rules) self.assertTrue(can_derive)
def test_find_possible_splits_2(self): rule_a = qcfg_rule.rule_from_string("what is NT_1 ### answer ( NT_1 )") rule_b = qcfg_rule.rule_from_string( "what is NT_1 traversed by NT_2 ### answer ( intersection ( NT_1 , traverse_1 ( NT_2 ) ) )" ) ruleset = {rule_a} splits = split_utils.find_possible_splits(rule_b, ruleset) expected = qcfg_rule.rule_from_string( "NT_1 traversed by NT_2 ### intersection ( NT_1 , traverse_1 ( NT_2 ) )" ) self.assertEqual({expected}, splits)
def test_find_possible_splits_4(self): rule_a = qcfg_rule.rule_from_string( "NT_1 ' s star ### film.actor.film/ns:film.performance.film ( NT_1 )" ) rule_b = qcfg_rule.rule_from_string( "Which NT_1 was NT_2 ' s star ### answer ( film.actor.film/ns:film.performance.film ( NT_2 ) & NT_1 )" ) ruleset = {rule_a} splits = split_utils.find_possible_splits(rule_b, ruleset) expected = qcfg_rule.rule_from_string( "Which NT_1 was NT_2 ### answer ( NT_2 & NT_1 )") self.assertEqual({expected}, splits)
def test_find_possible_splits_3(self): rule_a = qcfg_rule.rule_from_string( "NT_1 river ### intersection ( NT_1 , river )") rule_b = qcfg_rule.rule_from_string( "NT_1 traversed by the NT_2 river ### intersection ( NT_1 , traverse_1 ( intersection ( NT_2 , river ) ) )" ) ruleset = {rule_a} splits = split_utils.find_possible_splits(rule_b, ruleset) expected = qcfg_rule.rule_from_string( "NT_1 traversed by the NT_2 ### intersection ( NT_1 , traverse_1 ( NT_2 ) )" ) self.assertEqual({expected}, splits)
def test_can_derive_and_1(self): goal_rule = qcfg_rule.rule_from_string( "NT_1 and run twice ### NT_1 I_RUN I_RUN") rules = { qcfg_rule.rule_from_string("NT_1 twice ### NT_1 NT_1"), qcfg_rule.rule_from_string("NT_1 and NT_2 ### NT_1 NT_2"), qcfg_rule.rule_from_string("run ### I_RUN"), } derived_rules = set() can_derive = derivation_utils.can_derive(goal_rule, rules, derived_rules) print("derived_rules: %s" % derived_rules) self.assertTrue(can_derive)
def test_can_derive_after_1(self): goal_rule = qcfg_rule.rule_from_string( "NT_1 after jump thrice ### I_JUMP I_JUMP I_JUMP NT_1") rules = { qcfg_rule.rule_from_string("NT_1 thrice ### NT_1 NT_1 NT_1"), qcfg_rule.rule_from_string("NT_1 after NT_2 ### NT_2 NT_1"), qcfg_rule.rule_from_string("jump ### I_JUMP"), } derived_rules = set() can_derive = derivation_utils.can_derive(goal_rule, rules, derived_rules) print("derived_rules: %s" % derived_rules) self.assertTrue(can_derive)
def test_can_derive_and_2(self): goal_rule = qcfg_rule.rule_from_string( "look and jump left ### I_LOOK I_TURN_LEFT I_JUMP") rules = { qcfg_rule.rule_from_string("NT_1 left ### I_TURN_LEFT NT_1"), qcfg_rule.rule_from_string("NT_1 and NT_2 ### NT_1 NT_2"), qcfg_rule.rule_from_string("jump ### I_JUMP"), qcfg_rule.rule_from_string("look ### I_LOOK"), } derived_rules = set() can_derive = derivation_utils.can_derive(goal_rule, rules, derived_rules) print("derived_rules: %s" % derived_rules) self.assertTrue(can_derive)
def test_dataset_encoding_delta_3(self): current_ruleset = { qcfg_rule.rule_from_string("NT_1 after NT_2 ### NT_2 NT_1"), qcfg_rule.rule_from_string("run ### I_RUN"), qcfg_rule.rule_from_string( "NT_1 right thrice ### I_TURN_RIGHT NT_1 I_TURN_RIGHT NT_1 I_TURN_RIGHT NT_1" ), qcfg_rule.rule_from_string("NT_1 thrice ### NT_1 NT_1 NT_1"), qcfg_rule.rule_from_string("NT_1 thrice ### NT_1 I_WALK I_WALK"), qcfg_rule.rule_from_string("NT_1 thrice ### NT_1 I_RUN I_RUN"), } dataset = [ ("run after run right thrice", "I_TURN_RIGHT I_RUN I_TURN_RIGHT I_RUN I_TURN_RIGHT I_RUN I_RUN") ] candidate_rule_to_add = qcfg_rule.rule_from_string( "NT_1 right ### I_TURN_RIGHT NT_1") candidate_rules_to_remove = [ qcfg_rule.rule_from_string( "NT_1 right thrice ### I_TURN_RIGHT NT_1 I_TURN_RIGHT NT_1 I_TURN_RIGHT NT_1" ), ] sample_size = None dataset_encoding_delta = codelength_utils.get_dataset_encoding_delta( sample_size, dataset, current_ruleset, candidate_rule_to_add, candidate_rules_to_remove, verbose=True) self.assertAlmostEqual(dataset_encoding_delta, 2.1699250)
def test_can_derive_false_2(self): goal_rule = qcfg_rule.rule_from_string( "walk and run around right ### I_WALK I_TURN_RIGHT I_RUN I_TURN_RIGHT I_RUN I_TURN_RIGHT I_RUN I_TURN_RIGHT I_RUN" ) rules = { qcfg_rule.rule_from_string("NT_1 and NT_2 ### NT_1 NT_2"), qcfg_rule.rule_from_string("NT_1 and run ### NT_1 I_RUN"), qcfg_rule.rule_from_string("walk ### I_WALK"), qcfg_rule.rule_from_string("NT_1 right ### I_TURN_RIGHT NT_1"), } derived_rules = set() can_derive = derivation_utils.can_derive(goal_rule, rules, derived_rules) print("derived_rules: %s" % derived_rules) self.assertFalse(can_derive)
def test_get_outputs(self): tokenizer = test_utils.MockTokenizer() rules = [ qcfg_rule.rule_from_string("foo NT_1 ### foo NT_1"), qcfg_rule.rule_from_string("bar ### bar"), qcfg_rule.rule_from_string("foo bar ### foo bar"), ] config = test_utils.get_test_config() bert_config = test_utils.get_test_bert_config() wrapper = inference_wrapper.InferenceWrapper(tokenizer, rules, config, bert_config) output, score = wrapper.get_output("foo bar") self.assertEqual(output, "foo bar") self.assertIsNotNone(score)
def _write_tf_examples(examples_filepath, config, num_examples=8): """Write examples as test data.""" tokenizer = test_utils.MockTokenizer() rules = [ qcfg_rule.rule_from_string("foo NT_1 ### foo NT_1"), qcfg_rule.rule_from_string("bar ### bar"), qcfg_rule.rule_from_string("foo bar ### foo bar"), ] converter = example_converter.ExampleConverter(rules, tokenizer, config) example = ("foo bar", "foo bar") writer = tf.io.TFRecordWriter(examples_filepath) for _ in range(num_examples): tf_example = converter.convert(example) writer.write(tf_example.SerializeToString())
def test_parse_flat(self): tokens = ["dax", "twice"] rules = [ qcfg_rule.rule_from_string("dax twice ### DAX TWICE"), ] parses = qcfg_parser.parse(tokens, rules, _node_fn, _postprocess_cell_fn) self.assertEqual(parses, ["DAX TWICE"])
def read_rules(filename): """Read rule txt file to list of rules.""" rules = [] with gfile.GFile(filename, "r") as txt_file: for line in txt_file: line = line.rstrip() rule = qcfg_rule.rule_from_string(line) rules.append(rule) print("Loaded %s rules from %s." % (len(rules), filename)) return rules
def test_dataset_encoding_delta_1(self): current_ruleset = { qcfg_rule.rule_from_string("bar bar foo ### bar bar foo"), qcfg_rule.rule_from_string("foo ### foo"), qcfg_rule.rule_from_string("bar foo ### bar foo"), qcfg_rule.rule_from_string("bar bar NT_1 ### bar bar NT_1"), } dataset = [ ("bar bar foo", "bar bar foo"), ("bar foo", "bar foo"), ("foo", "foo"), ] candidate_rule_to_add = qcfg_rule.rule_from_string("bar NT_1 ### bar NT_1") candidate_rules_to_remove = [ qcfg_rule.rule_from_string("bar bar NT_1 ### bar bar NT_1") ] sample_size = 2 dataset_encoding_delta = codelength_utils.get_dataset_encoding_delta( sample_size, dataset, current_ruleset, candidate_rule_to_add, candidate_rules_to_remove, verbose=True) self.assertEqual(dataset_encoding_delta, 0.0)
def test_dataset_encoding_delta_2(self): current_ruleset = { qcfg_rule.rule_from_string("bar foo ### bar foo"), qcfg_rule.rule_from_string("bar ### bar"), qcfg_rule.rule_from_string("foo ### foo"), qcfg_rule.rule_from_string("foo bar ### bar foo"), } dataset = [ ("bar foo", "bar foo"), ("bar", "bar"), ("foo", "foo"), ("foo bar", "bar foo"), ] candidate_rule_to_add = qcfg_rule.rule_from_string( "NT_1 NT_2 ### NT_1 NT_2") candidate_rules_to_remove = [ qcfg_rule.rule_from_string("bar foo ### bar foo"), ] sample_size = 4 dataset_encoding_delta = codelength_utils.get_dataset_encoding_delta( sample_size, dataset, current_ruleset, candidate_rule_to_add, candidate_rules_to_remove, verbose=True) self.assertGreater(dataset_encoding_delta, 0.0)
def test_example_converter(self): tokenizer = test_utils.MockTokenizer() config = test_utils.get_test_config() rules = [ qcfg_rule.rule_from_string("foo NT_1 ### foo NT_1"), qcfg_rule.rule_from_string("bar ### bar"), qcfg_rule.rule_from_string("foo bar ### foo bar"), qcfg_rule.rule_from_string("foo bar ### baz"), ] converter = example_converter.ExampleConverter(rules, tokenizer, config) example = ("foo bar", "foo bar") tf_example = converter.convert(example) print(tf_example) feature_dict = tf_example.features.feature expected_keys = { "wordpiece_ids", "num_wordpieces", "application_span_begin", "application_span_end", "application_rule_idx", "nu_node_type", "nu_node_1_idx", "nu_node_2_idx", "nu_application_idx", "nu_num_nodes", "de_node_type", "de_node_1_idx", "de_node_2_idx", "de_application_idx", "de_num_nodes", } self.assertEqual(feature_dict.keys(), expected_keys) for key in expected_keys: print("%s: %s" % (key, feature_dict[key].int64_list.value)) self.assertEqual(feature_dict["num_wordpieces"].int64_list.value, [4]) self.assertEqual(feature_dict["wordpiece_ids"].int64_list.value, [1, 5, 6, 2, 0, 0, 0, 0]) self.assertEqual(feature_dict["application_rule_idx"].int64_list.value, [1, 4, 3, 2, 0, 0, 0, 0]) self.assertEqual( feature_dict["application_span_begin"].int64_list.value, [1, 1, 1, 2, 0, 0, 0, 0]) self.assertEqual(feature_dict["application_span_end"].int64_list.value, [2, 2, 2, 2, 0, 0, 0, 0]) self.assertEqual(feature_dict["nu_num_nodes"].int64_list.value, [4]) self.assertEqual(feature_dict["nu_node_type"].int64_list.value, [1, 1, 1, 2, 0, 0, 0, 0]) self.assertEqual(feature_dict["nu_application_idx"].int64_list.value, [3, 0, 2, -1, 0, 0, 0, 0]) self.assertEqual(feature_dict["nu_node_1_idx"].int64_list.value, [-1, 0, -1, 2, 0, 0, 0, 0]) self.assertEqual(feature_dict["nu_node_2_idx"].int64_list.value, [-1, -1, -1, 1, 0, 0, 0, 0]) self.assertEqual(feature_dict["de_num_nodes"].int64_list.value, [6]) self.assertEqual(feature_dict["de_node_type"].int64_list.value, [1, 1, 1, 1, 2, 2, 0, 0]) self.assertEqual(feature_dict["de_application_idx"].int64_list.value, [3, 0, 1, 2, -1, -1, 0, 0]) self.assertEqual(feature_dict["de_node_1_idx"].int64_list.value, [-1, 0, -1, -1, 3, 4, 0, 0]) self.assertEqual(feature_dict["de_node_2_idx"].int64_list.value, [-1, -1, -1, -1, 2, 1, 0, 0])