Exemple #1
0
    def get_utterance_semantics_pairs(self,
                                      random_source,
                                      rule_sets,
                                      branch_cap=None):
        all_pairs = {}
        rules = [self.rules[index - 1] for index in rule_sets]

        for rules, rules_anon, rules_ground, semantics in rules:
            cat_groundings = {}

            pairs = []
            if self.semantic_form_version == "slot":
                pairs = generate_sentence_slot_pairs(
                    ROOT_SYMBOL,
                    rules_ground,
                    semantics,
                    yield_requires_semantics=True,
                    branch_cap=branch_cap,
                    random_generator=random_source)
            else:
                pairs = generate_sentence_parse_pairs(
                    ROOT_SYMBOL,
                    rules_ground,
                    semantics,
                    yield_requires_semantics=True,
                    branch_cap=branch_cap,
                    random_generator=random_source)

            for utterance, parse in pairs:
                all_pairs[tree_printer(utterance)] = tree_printer(parse)
            #for sentence, semantics in pairs:
            #    print(tree_printer(sentence))
            #    print(tree_printer(semantics))
        return all_pairs
Exemple #2
0
 def test_generate(self):
     generator = Generator(grammar_format_version=2018)
     grammar = generator.load_rules(os.path.join(FIXTURE_DIR,
                                                 "grammar.txt"))
     semantics = generator.load_semantics_rules(
         os.path.join(FIXTURE_DIR, "semantics.txt"))
     pairs = list(
         generate_sentence_parse_pairs(NonTerminal("Main"), grammar,
                                       semantics))
     self.assertEqual(len(pairs), 6)
Exemple #3
0
def get_grounding_per_each_parse_by_cat(generator, random_source):
    grounded_examples = []

    for rules, rules_anon, rules_ground, semantics in generator:
        cat_groundings = {}
        # Start with each rule, since this is guaranteed to get at least all possible parses
        # Note, this may include parses that don't fall in the grammar...
        for generation_path, semantic_production in semantics.items():
            # Some non-terminals may expand into different parses (like $oprop)! So we'll expand them
            # every which way
            wild_expansions = list(
                generate_sentence_parse_pairs(generation_path,
                                              rules,
                                              semantics,
                                              yield_requires_semantics=True,
                                              random_generator=random_source))
            # We're going to be throwing away expansions that have the same parse, so let's
            # randomize here to make sure we aren't favoring the last expansion.
            # Note that the above generation should also return expansions in a random order anyway
            random_source.shuffle(wild_expansions)

            for utterance_wild, parse_wild in list(wild_expansions):
                utterance_anon, parse_anon = next(
                    expand_pair_full(utterance_wild,
                                     parse_wild,
                                     rules_anon,
                                     branch_cap=1,
                                     random_generator=random_source))

                utterance, parse_ground = next(
                    expand_pair_full(utterance_wild,
                                     parse_wild,
                                     rules_ground,
                                     branch_cap=1,
                                     random_generator=random_source))
                assert not has_placeholders(utterance)
                assert not has_placeholders(parse_ground)
                assert not has_placeholders(parse_ground)
                # We expect this to happen sometimes because of the cat1 cat2 object known wildcard situation
                if has_placeholders(parse_anon):
                    continue

                cat_groundings[parse_anon] = (utterance, parse_anon,
                                              parse_ground)
        grounded_examples.append(list(cat_groundings.values()))
    return grounded_examples
Exemple #4
0
    def test_parse_all_2019_anonymized(self):
        generator = Generator(grammar_format_version=2019)

        grammar_dir = os.path.abspath(
            os.path.dirname(__file__) + "/../resources/generator2019")
        rules, rules_anon, rules_ground, semantics, entities = load_all_2019(
            generator, grammar_dir)

        sentences = generate_sentence_parse_pairs(
            ROOT_SYMBOL,
            rules_ground, {},
            yield_requires_semantics=False,
            random_generator=random.Random(1))
        parser = GrammarBasedParser(rules_anon)

        # Bring me the apple from the fridge to the kitchen
        # ---straight anon to clusters--->
        # Bring me the {ob}  from the {loc} to the {loc}
        # ---Grammar based parser--->
        # (Failure; grammar has numbers on locs)

        # Bring me the apple from the fridge to the kitchen
        # ---id naive number anon--->
        # Bring me the {ob}  from the {loc 1} to the {loc 2}
        # ---Grammar based parser--->
        # (Failure; wrong numbers, or maybe)

        anonymizer = Anonymizer(*entities)
        parser = AnonymizingParser(parser, anonymizer)
        num_tested = 1000
        succeeded = 0
        for sentence, parse in itertools.islice(sentences, num_tested):
            sentence = tree_printer(sentence)
            parsed = parser(sentence)
            if parsed:
                succeeded += 1
            else:
                print(sentence)
                print(anonymizer(sentence))
                print()
                print(parser(anonymizer(sentence)))

        self.assertEqual(succeeded, num_tested)