Ejemplo n.º 1
0
    def test_get_wordpiece_inputs_special(self):
        tokenizer = test_utils.MockTokenizer()
        utterance = "what river traverses m0 and m1"
        tokens = utterance.split(" ")
        (wordpiece_ids, num_wordpieces, token_start_wp_idx,
         token_end_wp_idx) = tokenization_utils.get_wordpiece_inputs(
             tokens, tokenizer, verbose=True)

        self.assertEqual(num_wordpieces, 8)
        self.assertEqual(wordpiece_ids, [1, 7, 8, 9, 3, 13, 4, 2])
        self.assertEqual(token_start_wp_idx, [1, 2, 3, 4, 5, 6])
        self.assertEqual(token_end_wp_idx, [1, 2, 3, 4, 5, 6])
def _write_tf_examples(examples_filepath, config, num_examples=8):
    """Write examples as test data."""
    tokenizer = test_utils.MockTokenizer()
    rules = [
        qcfg_rule.rule_from_string("foo NT_1 ### foo NT_1"),
        qcfg_rule.rule_from_string("bar ### bar"),
        qcfg_rule.rule_from_string("foo bar ### foo bar"),
    ]

    converter = example_converter.ExampleConverter(rules, tokenizer, config)
    example = ("foo bar", "foo bar")

    writer = tf.io.TFRecordWriter(examples_filepath)
    for _ in range(num_examples):
        tf_example = converter.convert(example)
        writer.write(tf_example.SerializeToString())
Ejemplo n.º 3
0
    def test_get_outputs(self):
        tokenizer = test_utils.MockTokenizer()
        rules = [
            qcfg_rule.rule_from_string("foo NT_1 ### foo NT_1"),
            qcfg_rule.rule_from_string("bar ### bar"),
            qcfg_rule.rule_from_string("foo bar ### foo bar"),
        ]
        config = test_utils.get_test_config()
        bert_config = test_utils.get_test_bert_config()

        wrapper = inference_wrapper.InferenceWrapper(tokenizer, rules, config,
                                                     bert_config)

        output, score = wrapper.get_output("foo bar")
        self.assertEqual(output, "foo bar")
        self.assertIsNotNone(score)
    def test_example_converter(self):
        tokenizer = test_utils.MockTokenizer()
        config = test_utils.get_test_config()

        rules = [
            qcfg_rule.rule_from_string("foo NT_1 ### foo NT_1"),
            qcfg_rule.rule_from_string("bar ### bar"),
            qcfg_rule.rule_from_string("foo bar ### foo bar"),
            qcfg_rule.rule_from_string("foo bar ### baz"),
        ]

        converter = example_converter.ExampleConverter(rules, tokenizer,
                                                       config)

        example = ("foo bar", "foo bar")
        tf_example = converter.convert(example)
        print(tf_example)

        feature_dict = tf_example.features.feature
        expected_keys = {
            "wordpiece_ids",
            "num_wordpieces",
            "application_span_begin",
            "application_span_end",
            "application_rule_idx",
            "nu_node_type",
            "nu_node_1_idx",
            "nu_node_2_idx",
            "nu_application_idx",
            "nu_num_nodes",
            "de_node_type",
            "de_node_1_idx",
            "de_node_2_idx",
            "de_application_idx",
            "de_num_nodes",
        }
        self.assertEqual(feature_dict.keys(), expected_keys)

        for key in expected_keys:
            print("%s: %s" % (key, feature_dict[key].int64_list.value))

        self.assertEqual(feature_dict["num_wordpieces"].int64_list.value, [4])
        self.assertEqual(feature_dict["wordpiece_ids"].int64_list.value,
                         [1, 5, 6, 2, 0, 0, 0, 0])
        self.assertEqual(feature_dict["application_rule_idx"].int64_list.value,
                         [1, 4, 3, 2, 0, 0, 0, 0])
        self.assertEqual(
            feature_dict["application_span_begin"].int64_list.value,
            [1, 1, 1, 2, 0, 0, 0, 0])
        self.assertEqual(feature_dict["application_span_end"].int64_list.value,
                         [2, 2, 2, 2, 0, 0, 0, 0])
        self.assertEqual(feature_dict["nu_num_nodes"].int64_list.value, [4])
        self.assertEqual(feature_dict["nu_node_type"].int64_list.value,
                         [1, 1, 1, 2, 0, 0, 0, 0])
        self.assertEqual(feature_dict["nu_application_idx"].int64_list.value,
                         [3, 0, 2, -1, 0, 0, 0, 0])
        self.assertEqual(feature_dict["nu_node_1_idx"].int64_list.value,
                         [-1, 0, -1, 2, 0, 0, 0, 0])
        self.assertEqual(feature_dict["nu_node_2_idx"].int64_list.value,
                         [-1, -1, -1, 1, 0, 0, 0, 0])
        self.assertEqual(feature_dict["de_num_nodes"].int64_list.value, [6])
        self.assertEqual(feature_dict["de_node_type"].int64_list.value,
                         [1, 1, 1, 1, 2, 2, 0, 0])
        self.assertEqual(feature_dict["de_application_idx"].int64_list.value,
                         [3, 0, 1, 2, -1, -1, 0, 0])
        self.assertEqual(feature_dict["de_node_1_idx"].int64_list.value,
                         [-1, 0, -1, -1, 3, 4, 0, 0])
        self.assertEqual(feature_dict["de_node_2_idx"].int64_list.value,
                         [-1, -1, -1, -1, 2, 1, 0, 0])