def test_find_possible_splits_10(self):
     rule_a = qcfg_rule.rule_from_string("jump and NT_1 ### jump NT_1")
     rule_b = qcfg_rule.rule_from_string("NT_1 and NT_2 ### NT_1 NT_2")
     ruleset = {rule_b}
     splits = split_utils.find_possible_splits(rule_a, ruleset)
     expected = qcfg_rule.rule_from_string("jump ### jump")
     self.assertEqual({expected}, splits)
 def test_can_derive_false_3(self):
     goal_rule = qcfg_rule.rule_from_string(
         "what are the major cities in the NT_1 through which the major river in NT_2 runs ### answer ( intersection ( major , intersection ( city , loc_2 ( intersection ( NT_1 , traverse_1 ( intersection ( major , intersection ( river , loc_2 ( NT_2 ) ) ) ) ) ) ) ) )"
     )
     rules = {
         qcfg_rule.rule_from_string("river ### river"),
         qcfg_rule.rule_from_string(
             "river ### intersection ( river , NT_1 )"),
         qcfg_rule.rule_from_string("cities ### city"),
         qcfg_rule.rule_from_string(
             "what are the NT_1 ### answer ( NT_1 )"),
         qcfg_rule.rule_from_string(
             "NT_1 in NT_2 ### intersection ( NT_1 , loc_2 ( NT_2 ) )"),
         qcfg_rule.rule_from_string(
             "what are the major NT_1 ### answer ( intersection ( major , NT_1 ) )"
         ),
         qcfg_rule.rule_from_string(
             "NT_1 in the NT_2 ### intersection ( NT_1 , loc_2 ( NT_2 ) )"),
         qcfg_rule.rule_from_string(
             "river in NT_1 ### intersection ( river , loc_2 ( NT_1 ) )"),
         qcfg_rule.rule_from_string(
             "the NT_1 NT_2 ### intersection ( NT_1 , NT_2 )")
     }
     derived_rules = set()
     can_derive = derivation_utils.can_derive(goal_rule, rules,
                                              derived_rules)
     print("derived_rules: %s" % derived_rules)
     self.assertFalse(can_derive)
Exemple #3
0
    def test_get_node_tensors(self):
        source = "foo bar"
        target = "foo bar"
        rules = [
            qcfg_rule.rule_from_string("foo NT_1 ### foo NT_1"),
            qcfg_rule.rule_from_string("bar ### bar"),
            qcfg_rule.rule_from_string("foo bar ### foo bar"),
        ]
        target_node = parsing_utils.get_target_node(source, target, rules)

        num_tokens = 2
        (node_type_list, node_1_idx_list, node_2_idx_list,
         application_idx_list,
         num_nodes) = forest_serialization.get_forest_lists(
             target_node, num_tokens, _mock_application_idx_fn)

        print(node_type_list, "node_type_list")
        print(node_1_idx_list, "node_1_idx_list")
        print(node_2_idx_list, "node_2_idx_list")
        print(application_idx_list, "application_idx_list")
        print(num_nodes, "num_nodes")

        self.assertLen(node_type_list, num_nodes)
        self.assertLen(node_1_idx_list, num_nodes)
        self.assertLen(node_2_idx_list, num_nodes)
        self.assertLen(application_idx_list, num_nodes)

        self.assertEqual(num_nodes, 4)
        self.assertEqual(node_type_list, [1, 1, 1, 2])
        self.assertEqual(application_idx_list, [1, 1, 1, -1])
        self.assertEqual(node_1_idx_list, [-1, 0, -1, 2])
        self.assertEqual(node_2_idx_list, [-1, -1, -1, 1])
 def test_find_possible_splits_11(self):
     rule_a = qcfg_rule.rule_from_string(
         "NT_1 right twice ### I_TURN_RIGHT NT_1 I_TURN_RIGHT NT_1")
     rule_b = qcfg_rule.rule_from_string("NT_1 right ### I_TURN_RIGHT NT_1")
     ruleset = {rule_b}
     splits = split_utils.find_possible_splits(rule_a, ruleset)
     expected = qcfg_rule.rule_from_string("NT_1 twice ### NT_1 NT_1")
     self.assertEqual({expected}, splits)
Exemple #5
0
 def test_parse(self):
     tokens = ["dax", "twice"]
     rules = [
         qcfg_rule.rule_from_string("dax ### DAX"),
         qcfg_rule.rule_from_string("NT_1 twice ### NT_1 NT_1"),
     ]
     parses = qcfg_parser.parse(tokens, rules, _node_fn,
                                _postprocess_cell_fn)
     self.assertEqual(parses, ["DAX DAX"])
Exemple #6
0
 def test_can_parse(self):
     rules = [
         qcfg_rule.rule_from_string("dax ### DAX"),
         qcfg_rule.rule_from_string("NT_1 twice ### NT_1 NT_1"),
     ]
     can_parse = qcfg_parser.can_parse(source="dax twice",
                                       target="DAX DAX",
                                       rules=rules)
     self.assertTrue(can_parse)
 def test_find_possible_splits_7(self):
     rule_a = qcfg_rule.rule_from_string(
         "NT_1 ' s NT_2 director ### NT_2 & film.director.film ( NT_1 )")
     rule_b = qcfg_rule.rule_from_string("NT_1 ' s NT_2 ### NT_2 ( NT_1 )")
     ruleset = {rule_b}
     splits = split_utils.find_possible_splits(rule_a, ruleset)
     expected = qcfg_rule.rule_from_string(
         "NT_1 director ### NT_1 & film.director.film")
     self.assertEqual({expected}, splits)
 def test_find_possible_splits_no_split(self):
     rule_a = qcfg_rule.rule_from_string(
         "NT_1 named NT_2 ### intersection ( NT_1 , NT_2 )")
     rule_b = qcfg_rule.rule_from_string(
         "NT_1 have a major city named NT_2 ### intersection ( NT_1 , loc_1 ( intersection ( major , intersection ( city , NT_2 ) ) ) )"
     )
     ruleset = {rule_a}
     splits = split_utils.find_possible_splits(rule_b, ruleset)
     self.assertEmpty(splits)
    def test_find_possible_splits_1(self):
        rule_a = qcfg_rule.rule_from_string("foo NT_1 ### food NT_1")
        rule_b = qcfg_rule.rule_from_string("bar ### bar")
        rule_c = qcfg_rule.rule_from_string("foo bar ### foo bar")
        ruleset = {rule_a, rule_b}

        splits = split_utils.find_possible_splits(rule_c, ruleset)
        expected = qcfg_rule.rule_from_string("foo NT_1 ### foo NT_1")
        self.assertEqual({expected}, splits)
 def test_find_possible_splits_6(self):
     rule_a = qcfg_rule.rule_from_string(
         "written and edited ### film.film.edited_by , film.film.written_by"
     )
     rule_b = qcfg_rule.rule_from_string(
         "NT_1 and edited ### film.film.edited_by , NT_1")
     ruleset = {rule_b}
     splits = split_utils.find_possible_splits(rule_a, ruleset)
     expected = qcfg_rule.rule_from_string(
         "written ### film.film.written_by")
     self.assertEqual({expected}, splits)
 def test_find_possible_splits_5(self):
     rule_a = qcfg_rule.rule_from_string("Dutch ### nat ( m_059j2 )")
     rule_b = qcfg_rule.rule_from_string(
         "Who influenced M2 ' s Dutch child ### answer ( a(person) & influenced ( nat ( m_059j2 ) & parent ( M2 ) ) )"
     )
     ruleset = {rule_a}
     splits = split_utils.find_possible_splits(rule_b, ruleset)
     expected = qcfg_rule.rule_from_string(
         "Who influenced M2 ' s NT_1 child ### answer ( a(person) & influenced ( NT_1 & parent ( M2 ) ) )"
     )
     self.assertEqual({expected}, splits)
 def test_can_derive_1(self):
     goal_rule = qcfg_rule.rule_from_string("foo bar ### foo bar")
     rules = {
         qcfg_rule.rule_from_string("foo NT_1 ### foo NT_1"),
         qcfg_rule.rule_from_string("bar ### bar"),
     }
     derived_rules = set()
     can_derive = derivation_utils.can_derive(goal_rule, rules,
                                              derived_rules)
     print("derived_rules: %s" % derived_rules)
     self.assertTrue(can_derive)
    def test_find_possible_splits_2(self):
        rule_a = qcfg_rule.rule_from_string("what is NT_1 ### answer ( NT_1 )")
        rule_b = qcfg_rule.rule_from_string(
            "what is NT_1 traversed by NT_2 ### answer ( intersection ( NT_1 , traverse_1 ( NT_2 ) ) )"
        )
        ruleset = {rule_a}

        splits = split_utils.find_possible_splits(rule_b, ruleset)
        expected = qcfg_rule.rule_from_string(
            "NT_1 traversed by NT_2 ### intersection ( NT_1 , traverse_1 ( NT_2 ) )"
        )
        self.assertEqual({expected}, splits)
 def test_find_possible_splits_4(self):
     rule_a = qcfg_rule.rule_from_string(
         "NT_1 ' s star ### film.actor.film/ns:film.performance.film ( NT_1 )"
     )
     rule_b = qcfg_rule.rule_from_string(
         "Which NT_1 was NT_2 ' s star ### answer ( film.actor.film/ns:film.performance.film ( NT_2 ) & NT_1 )"
     )
     ruleset = {rule_a}
     splits = split_utils.find_possible_splits(rule_b, ruleset)
     expected = qcfg_rule.rule_from_string(
         "Which NT_1 was NT_2 ### answer ( NT_2 & NT_1 )")
     self.assertEqual({expected}, splits)
    def test_find_possible_splits_3(self):
        rule_a = qcfg_rule.rule_from_string(
            "NT_1 river ### intersection ( NT_1 , river )")
        rule_b = qcfg_rule.rule_from_string(
            "NT_1 traversed by the NT_2 river ### intersection ( NT_1 , traverse_1 ( intersection ( NT_2 , river ) ) )"
        )
        ruleset = {rule_a}

        splits = split_utils.find_possible_splits(rule_b, ruleset)
        expected = qcfg_rule.rule_from_string(
            "NT_1 traversed by the NT_2 ### intersection ( NT_1 , traverse_1 ( NT_2 ) )"
        )
        self.assertEqual({expected}, splits)
 def test_can_derive_and_1(self):
     goal_rule = qcfg_rule.rule_from_string(
         "NT_1 and run twice ### NT_1 I_RUN I_RUN")
     rules = {
         qcfg_rule.rule_from_string("NT_1 twice ### NT_1 NT_1"),
         qcfg_rule.rule_from_string("NT_1 and NT_2 ### NT_1 NT_2"),
         qcfg_rule.rule_from_string("run ### I_RUN"),
     }
     derived_rules = set()
     can_derive = derivation_utils.can_derive(goal_rule, rules,
                                              derived_rules)
     print("derived_rules: %s" % derived_rules)
     self.assertTrue(can_derive)
 def test_can_derive_after_1(self):
     goal_rule = qcfg_rule.rule_from_string(
         "NT_1 after jump thrice ### I_JUMP I_JUMP I_JUMP NT_1")
     rules = {
         qcfg_rule.rule_from_string("NT_1 thrice ### NT_1 NT_1 NT_1"),
         qcfg_rule.rule_from_string("NT_1 after NT_2 ### NT_2 NT_1"),
         qcfg_rule.rule_from_string("jump ### I_JUMP"),
     }
     derived_rules = set()
     can_derive = derivation_utils.can_derive(goal_rule, rules,
                                              derived_rules)
     print("derived_rules: %s" % derived_rules)
     self.assertTrue(can_derive)
 def test_can_derive_and_2(self):
     goal_rule = qcfg_rule.rule_from_string(
         "look and jump left ### I_LOOK I_TURN_LEFT I_JUMP")
     rules = {
         qcfg_rule.rule_from_string("NT_1 left ### I_TURN_LEFT NT_1"),
         qcfg_rule.rule_from_string("NT_1 and NT_2 ### NT_1 NT_2"),
         qcfg_rule.rule_from_string("jump ### I_JUMP"),
         qcfg_rule.rule_from_string("look ### I_LOOK"),
     }
     derived_rules = set()
     can_derive = derivation_utils.can_derive(goal_rule, rules,
                                              derived_rules)
     print("derived_rules: %s" % derived_rules)
     self.assertTrue(can_derive)
Exemple #19
0
  def test_dataset_encoding_delta_3(self):
    current_ruleset = {
        qcfg_rule.rule_from_string("NT_1 after NT_2 ### NT_2 NT_1"),
        qcfg_rule.rule_from_string("run ### I_RUN"),
        qcfg_rule.rule_from_string(
            "NT_1 right thrice ### I_TURN_RIGHT NT_1 I_TURN_RIGHT NT_1 I_TURN_RIGHT NT_1"
        ),
        qcfg_rule.rule_from_string("NT_1 thrice ### NT_1 NT_1 NT_1"),
        qcfg_rule.rule_from_string("NT_1 thrice ### NT_1 I_WALK I_WALK"),
        qcfg_rule.rule_from_string("NT_1 thrice ### NT_1 I_RUN I_RUN"),
    }

    dataset = [
        ("run after run right thrice",
         "I_TURN_RIGHT I_RUN I_TURN_RIGHT I_RUN I_TURN_RIGHT I_RUN I_RUN")
    ]

    candidate_rule_to_add = qcfg_rule.rule_from_string(
        "NT_1 right ### I_TURN_RIGHT NT_1")
    candidate_rules_to_remove = [
        qcfg_rule.rule_from_string(
            "NT_1 right thrice ### I_TURN_RIGHT NT_1 I_TURN_RIGHT NT_1 I_TURN_RIGHT NT_1"
        ),
    ]

    sample_size = None
    dataset_encoding_delta = codelength_utils.get_dataset_encoding_delta(
        sample_size,
        dataset,
        current_ruleset,
        candidate_rule_to_add,
        candidate_rules_to_remove,
        verbose=True)
    self.assertAlmostEqual(dataset_encoding_delta, 2.1699250)
 def test_can_derive_false_2(self):
     goal_rule = qcfg_rule.rule_from_string(
         "walk and run around right ### I_WALK I_TURN_RIGHT I_RUN I_TURN_RIGHT I_RUN I_TURN_RIGHT I_RUN I_TURN_RIGHT I_RUN"
     )
     rules = {
         qcfg_rule.rule_from_string("NT_1 and NT_2 ### NT_1 NT_2"),
         qcfg_rule.rule_from_string("NT_1 and run ### NT_1 I_RUN"),
         qcfg_rule.rule_from_string("walk ### I_WALK"),
         qcfg_rule.rule_from_string("NT_1 right ### I_TURN_RIGHT NT_1"),
     }
     derived_rules = set()
     can_derive = derivation_utils.can_derive(goal_rule, rules,
                                              derived_rules)
     print("derived_rules: %s" % derived_rules)
     self.assertFalse(can_derive)
Exemple #21
0
    def test_get_outputs(self):
        tokenizer = test_utils.MockTokenizer()
        rules = [
            qcfg_rule.rule_from_string("foo NT_1 ### foo NT_1"),
            qcfg_rule.rule_from_string("bar ### bar"),
            qcfg_rule.rule_from_string("foo bar ### foo bar"),
        ]
        config = test_utils.get_test_config()
        bert_config = test_utils.get_test_bert_config()

        wrapper = inference_wrapper.InferenceWrapper(tokenizer, rules, config,
                                                     bert_config)

        output, score = wrapper.get_output("foo bar")
        self.assertEqual(output, "foo bar")
        self.assertIsNotNone(score)
Exemple #22
0
def _write_tf_examples(examples_filepath, config, num_examples=8):
  """Write examples as test data."""
  tokenizer = test_utils.MockTokenizer()
  rules = [
      qcfg_rule.rule_from_string("foo NT_1 ### foo NT_1"),
      qcfg_rule.rule_from_string("bar ### bar"),
      qcfg_rule.rule_from_string("foo bar ### foo bar"),
  ]

  converter = example_converter.ExampleConverter(rules, tokenizer, config)
  example = ("foo bar", "foo bar")

  writer = tf.io.TFRecordWriter(examples_filepath)
  for _ in range(num_examples):
    tf_example = converter.convert(example)
    writer.write(tf_example.SerializeToString())
Exemple #23
0
 def test_parse_flat(self):
     tokens = ["dax", "twice"]
     rules = [
         qcfg_rule.rule_from_string("dax twice ### DAX TWICE"),
     ]
     parses = qcfg_parser.parse(tokens, rules, _node_fn,
                                _postprocess_cell_fn)
     self.assertEqual(parses, ["DAX TWICE"])
Exemple #24
0
def read_rules(filename):
    """Read rule txt file to list of rules."""
    rules = []
    with gfile.GFile(filename, "r") as txt_file:
        for line in txt_file:
            line = line.rstrip()
            rule = qcfg_rule.rule_from_string(line)
            rules.append(rule)
    print("Loaded %s rules from %s." % (len(rules), filename))
    return rules
Exemple #25
0
  def test_dataset_encoding_delta_1(self):
    current_ruleset = {
        qcfg_rule.rule_from_string("bar bar foo ### bar bar foo"),
        qcfg_rule.rule_from_string("foo ### foo"),
        qcfg_rule.rule_from_string("bar foo ### bar foo"),
        qcfg_rule.rule_from_string("bar bar NT_1 ### bar bar NT_1"),
    }

    dataset = [
        ("bar bar foo", "bar bar foo"),
        ("bar foo", "bar foo"),
        ("foo", "foo"),
    ]

    candidate_rule_to_add = qcfg_rule.rule_from_string("bar NT_1 ### bar NT_1")
    candidate_rules_to_remove = [
        qcfg_rule.rule_from_string("bar bar NT_1 ### bar bar NT_1")
    ]

    sample_size = 2
    dataset_encoding_delta = codelength_utils.get_dataset_encoding_delta(
        sample_size,
        dataset,
        current_ruleset,
        candidate_rule_to_add,
        candidate_rules_to_remove,
        verbose=True)
    self.assertEqual(dataset_encoding_delta, 0.0)
Exemple #26
0
  def test_dataset_encoding_delta_2(self):
    current_ruleset = {
        qcfg_rule.rule_from_string("bar foo ### bar foo"),
        qcfg_rule.rule_from_string("bar ### bar"),
        qcfg_rule.rule_from_string("foo ### foo"),
        qcfg_rule.rule_from_string("foo bar ### bar foo"),
    }

    dataset = [
        ("bar foo", "bar foo"),
        ("bar", "bar"),
        ("foo", "foo"),
        ("foo bar", "bar foo"),
    ]

    candidate_rule_to_add = qcfg_rule.rule_from_string(
        "NT_1 NT_2 ### NT_1 NT_2")
    candidate_rules_to_remove = [
        qcfg_rule.rule_from_string("bar foo ### bar foo"),
    ]

    sample_size = 4
    dataset_encoding_delta = codelength_utils.get_dataset_encoding_delta(
        sample_size,
        dataset,
        current_ruleset,
        candidate_rule_to_add,
        candidate_rules_to_remove,
        verbose=True)
    self.assertGreater(dataset_encoding_delta, 0.0)
    def test_example_converter(self):
        tokenizer = test_utils.MockTokenizer()
        config = test_utils.get_test_config()

        rules = [
            qcfg_rule.rule_from_string("foo NT_1 ### foo NT_1"),
            qcfg_rule.rule_from_string("bar ### bar"),
            qcfg_rule.rule_from_string("foo bar ### foo bar"),
            qcfg_rule.rule_from_string("foo bar ### baz"),
        ]

        converter = example_converter.ExampleConverter(rules, tokenizer,
                                                       config)

        example = ("foo bar", "foo bar")
        tf_example = converter.convert(example)
        print(tf_example)

        feature_dict = tf_example.features.feature
        expected_keys = {
            "wordpiece_ids",
            "num_wordpieces",
            "application_span_begin",
            "application_span_end",
            "application_rule_idx",
            "nu_node_type",
            "nu_node_1_idx",
            "nu_node_2_idx",
            "nu_application_idx",
            "nu_num_nodes",
            "de_node_type",
            "de_node_1_idx",
            "de_node_2_idx",
            "de_application_idx",
            "de_num_nodes",
        }
        self.assertEqual(feature_dict.keys(), expected_keys)

        for key in expected_keys:
            print("%s: %s" % (key, feature_dict[key].int64_list.value))

        self.assertEqual(feature_dict["num_wordpieces"].int64_list.value, [4])
        self.assertEqual(feature_dict["wordpiece_ids"].int64_list.value,
                         [1, 5, 6, 2, 0, 0, 0, 0])
        self.assertEqual(feature_dict["application_rule_idx"].int64_list.value,
                         [1, 4, 3, 2, 0, 0, 0, 0])
        self.assertEqual(
            feature_dict["application_span_begin"].int64_list.value,
            [1, 1, 1, 2, 0, 0, 0, 0])
        self.assertEqual(feature_dict["application_span_end"].int64_list.value,
                         [2, 2, 2, 2, 0, 0, 0, 0])
        self.assertEqual(feature_dict["nu_num_nodes"].int64_list.value, [4])
        self.assertEqual(feature_dict["nu_node_type"].int64_list.value,
                         [1, 1, 1, 2, 0, 0, 0, 0])
        self.assertEqual(feature_dict["nu_application_idx"].int64_list.value,
                         [3, 0, 2, -1, 0, 0, 0, 0])
        self.assertEqual(feature_dict["nu_node_1_idx"].int64_list.value,
                         [-1, 0, -1, 2, 0, 0, 0, 0])
        self.assertEqual(feature_dict["nu_node_2_idx"].int64_list.value,
                         [-1, -1, -1, 1, 0, 0, 0, 0])
        self.assertEqual(feature_dict["de_num_nodes"].int64_list.value, [6])
        self.assertEqual(feature_dict["de_node_type"].int64_list.value,
                         [1, 1, 1, 1, 2, 2, 0, 0])
        self.assertEqual(feature_dict["de_application_idx"].int64_list.value,
                         [3, 0, 1, 2, -1, -1, 0, 0])
        self.assertEqual(feature_dict["de_node_1_idx"].int64_list.value,
                         [-1, 0, -1, -1, 3, 4, 0, 0])
        self.assertEqual(feature_dict["de_node_2_idx"].int64_list.value,
                         [-1, -1, -1, -1, 2, 1, 0, 0])