Beispiel #1
0
def grammar_cc() -> Grammar:
    return Grammar(terminals='cd',
                   non_terminals='SC',
                   start='S',
                   rules=[Rule('S', 'CC'),
                          Rule('C', 'cC'),
                          Rule('C', 'd')])
Beispiel #2
0
def build_rules_starts_with_terminal(rules, complex_nonterminal, watched, unwatched):
	adding_rules = []
	symbols = complex_nonterminal.name
	for idx in range(1, len(symbols)):
		symbol = symbols[idx]

		if (isinstance(symbol, Terminal) or 
				(isinstance(symbol, Nonterminal) and not symbol.is_nullable)):
			break

		selected_rules = find_rules_for_nonterminal(rules, symbol)
		for rule  in selected_rules:
			if not rule.is_empty():
				left_side = [complex_nonterminal]

				new_name = list(rule.right_side)
				if (idx + 1) < len(symbols):
					new_name.extend(symbols[idx + 1:])
				new_complex_nonterminal = ComplexNonterminal(new_name)
				right_side = [symbols[0], new_complex_nonterminal]

				if (new_complex_nonterminal not in watched and
						new_complex_nonterminal not in unwatched):
					unwatched.append(new_complex_nonterminal)
			
				adding_rules.append(Rule(left_side, right_side))

	adding_rules.append(Rule([complex_nonterminal], [symbols[0]]))

	return adding_rules
Beispiel #3
0
    def add_rule_containing_optional(self, rule):
        """
        Handles adding a rule which contains an optional element on the RHS.
        We find the leftmost optional element on the RHS, and then generate
        two variants of the rule: one in which that element is required, and
        one in which it is removed.  We add these variants in place of the
        original rule.  (If there are more optional elements further to the
        right, we'll wind up recursing.)

        For example, if the original rule is:

            Rule('$Z', '$A ?$B ?$C $D')

        then we add these rules instead:

            Rule('$Z', '$A $B ?$C $D')
            Rule('$Z', '$A ?$C $D')
        """
        # Find index of the first optional element on the RHS.
        first = next((idx for idx, elt in enumerate(rule.rhs) if Unit2Grammar.is_optional(elt)), -1)
        assert first >= 0
        assert len(rule.rhs) > 1, 'Entire RHS is optional: %s' % rule
        prefix = rule.rhs[:first]
        suffix = rule.rhs[(first + 1):]
        # First variant: the first optional element gets deoptionalized.
        deoptionalized = (rule.rhs[first][1:],)
        self.add_rule(Rule(rule.lhs, prefix + deoptionalized + suffix, rule.sem))
        # Second variant: the first optional element gets removed.
        # If the semantics is a value, just keep it as is.
        sem = rule.sem
        # But if it's a function, we need to supply a dummy argument for the removed element.
        if isinstance(rule.sem, FunctionType):
            sem = lambda sems: rule.sem(sems[:first] + [None] + sems[first:])
        self.add_rule(Rule(rule.lhs, prefix + suffix, sem))
 def test_compl_word_rule(self):
     r = Rule("aller >> vais, vas, va, allons, allez, vont", "PrInd")
     self.assertEqual(r.input, 'aller')
     self.assertEqual(r.output, ['vais', 'vas', 'va', 'allons', 'allez', 'vont'])
     self.assertEqual(r.match('aller'), (True, 'aller'))
     self.assertEqual(r.match('parler'), (False, ''))
     self.assertEqual(r.transform('aller'), ['vais', 'vas', 'va', 'allons', 'allez', 'vont'])
Beispiel #5
0
def build_adding_rules(rule, idx):
	adding_rules = []
	for i in range(idx):
		left_side = list(rule.left_side)
		right_side = [rule.right_side[i].create_nonnullable_nonterminal()]
		right_side.extend(rule.right_side[i + 1:])
		adding_rules.append(Rule(left_side, right_side))

	if idx < len(rule.right_side):
		left_side = list(rule.left_side)
		right_side = rule.right_side[idx:]				
		adding_rules.append(Rule(left_side, right_side))
	return adding_rules
 def test_rule(self):
     r = Rule("~er >> ~e, ~es, ~e, ~ons, ~ez, ~ent", "PrInd")
     self.assertEqual(r.input, '~er')
     self.assertEqual(r.output, ['~e', '~es', '~e', '~ons', '~ez', '~ent'])
     self.assertEqual(r.match('parler'), (True, 'parl'))
     self.assertEqual(r.match('er'), (True, ''))
     self.assertEqual(r.match('finir'), (False, ''))
     self.assertEqual(r.transform('parler'), ['parle', 'parles', 'parle', 'parlons', 'parlez', 'parlent'])
     self.assertEqual(r.transform('finir'), [])
     r2 = Rule("~cer >> ~ce, ~ces, ~ce, ~cons, ~cez, ~cent", "PrInd")
     self.assertEqual(r.includes(r2), True)
     self.assertEqual(r2.includes(r), False)
Beispiel #7
0
    def parse(self, words):
        """Parses a sequence of words.

        Args:
            words (list<string>): The sequence of words to be parsed.

        Returns:
            A list of parse trees valid for the given input sequence, or [] if
                no valid parse.
        """
        chart = EarleyChart(words)
        chart.enqueue(
            State(rule=Rule(_GAMMA, [self._grammar.distinguished_symbol]),
                  span_start=0,
                  span_stop=0,
                  dot_position=0), 0)
        for i in range(len(words) + 1):
            for state in chart[i]:
                if state.incomplete:
                    self._predict(state, chart)
                    self._scan(state, chart)
                else:
                    self._complete(state, chart)

        return [
            self._tree_from_parse(p, chart)[1]
            for p in self._full_parses(chart)
        ]
Beispiel #8
0
    def calculate_control_table(self) -> None:
        states = self.items()
        n = len(states)
        self.control_table = [{
            symbol: Cell(Cell.ERROR)
            for symbol in self.grammar.terminals
        } for i in range(n)]
        self.goto = [{symbol: -1
                      for symbol in self.grammar.non_terminals}
                     for i in range(n)]

        for index, superstate in enumerate(states):
            for state in superstate:
                if state.after_dot in self.grammar.terminals:
                    try:
                        j = states.index(
                            self.goto_function(superstate, state.after_dot))
                        self.control_table[index][state.after_dot] = Cell(
                            Cell.SHIFT, j)
                    except ValueError:
                        pass
                if state.after_dot is None and state.rule.left != '@':
                    self.control_table[index][state.look] = Cell(
                        Cell.REDUCE, state.rule)
                if state == State(Rule('@', self.grammar.start), 1, '$'):
                    self.control_table[index]['$'] = Cell(Cell.ACCEPT)
                if state.after_dot in self.grammar.non_terminals:
                    try:
                        j = states.index(
                            self.goto_function(superstate, state.after_dot))
                        self.goto[index][state.after_dot] = j
                    except ValueError:
                        pass
def city_rule(name):
    if ' ' in name:
        name = "' %s '" % name
    return Rule('$City', [],
                name,
                lambda v: lambda var:
                ('_const ( %s , _cityid ( ' % var) + name + ' , _ ) )',
                weight=0.05)
Beispiel #10
0
def river_rule(name):
    orig_name = name
    if name.endswith(' river'):
        name = name[:-6]
    if ' ' in name:
        name = "' %s '" % name
    return Rule(
        '$River', [], orig_name, lambda v: lambda var:
        ('_const ( %s , _riverid ( ' % var) + name + ' ) )')
Beispiel #11
0
 def apply_annotators(self, chart, tokens, start, end):
     """Add parses to chart cell (start, end) by applying annotators."""
     if hasattr(self, 'annotators'):
         for annotator in self.annotators:
             for category, semantics in annotator.annotate(tokens[start:end]):
                 if not self.check_capacity(chart, start, end):
                     return
                 rule = Rule(category, tuple(tokens[start:end]), semantics)
                 chart[(start, end)].append(Parse(rule, tokens[start:end]))
Beispiel #12
0
def delete_useless_nonterminals_from_rule(rule, useless_nonterminals):
	left_side = list(rule.left_side)
	right_side = list(rule.right_side)
	for nonterminal in useless_nonterminals:
		while nonterminal in right_side:
			right_side.remove(nonterminal)
	if len(right_side) == 0:
		right_side = [EmptySymbol()]
	return Rule(left_side, right_side)
Beispiel #13
0
    def add_n_ary_rule(self, rule):
        """
		Handles adding a rule with three or more non-terminals on the RHS.
		We introduce a new category which covers all elements on the RHS except
		the first, and then generate two variants of the rule: one which
		consumes those elements to produce the new category, and another which
		combines the new category which the first element to produce the
		original LHS category.  We add these variants in place of the
		original rule.  (If the new rules still contain more than two elements
		on the RHS, we'll wind up recursing.)

		For example, if the original rule is:

		    Rule('$Z', '$A $B $C $D')

		then we create a new category '$Z_$A' (roughly, "$Z missing $A to the left"),
		and add these rules instead:

		    Rule('$Z_$A', '$B $C $D')
		    Rule('$Z', '$A $Z_$A')
		"""
        def add_category(base_name):
            assert self.is_cat(base_name)
            name = base_name
            while name in self.categories:
                name = name + '_'
            self.categories.add(name)
            return name

        def apply_semantics(rule, sems):
            # Note that this function would not be needed if we required that semantics
            # always be functions, never bare values.  That is, if instead of
            # Rule('$E', 'one', 1) we required Rule('$E', 'one', lambda sems: 1).
            # But that would be cumbersome.
            if isinstance(rule.sem, FunctionType):
                return rule.sem(sems)
            else:
                return rule.sem

        category = add_category('%s_%s' % (rule.lhs, rule.rhs[0]))
        self.add_rule(Rule(category, rule.rhs[1:], lambda sems: sems))
        self.add_rule(
            Rule(rule.lhs, (rule.rhs[0], category),
                 lambda sems: apply_semantics(rule, [sems[0]] + sems[1])))
Beispiel #14
0
def replace_nonleft_terminal_to_nonterminal(rule, terminal, nonterminal):
	if terminal in rule.right_side[1:]:
		new_right_side = list(rule.right_side)
		for idx in range(1, len(rule.right_side)):
			symbol = rule.right_side[idx]
			if symbol == terminal:
				new_right_side[idx] = nonterminal
	else:
		new_right_side = rule.right_side
	return Rule(rule.left_side, new_right_side)
Beispiel #15
0
    def predict(self, word: str) -> bool:
        situations = [set() for i in range(len(word) + 1)]
        situations[0].add(Situation(Rule('@', self.grammar.start), 0, 0))

        for j in range(len(word) + 1):
            self.Scan(situations, word, j)
            previous_result = situations[j]

            while True:
                old_length = len(situations[j])

                result = self.Complete(situations, previous_result)
                result |= self.Predict(previous_result, j)

                situations[j] |= result
                previous_result = result

                if old_length == len(situations[j]):
                    break

        return Situation(Rule('@', self.grammar.start), 1,
                         0) in situations[len(word)]
Beispiel #16
0
def replace_rule(new_rules, rule):
	adding_rules = []

	nonterminal = rule.right_side[0]
	nonterminal_rules = find_rules_for_nonterminal(new_rules, nonterminal)

	if nonterminal_rules:
		left_side = rule.left_side
		for nonterminal_rule in nonterminal_rules:
			right_side = replace_nonterminal(rule.right_side, 0, nonterminal_rule.right_side) 
			adding_rules.append(Rule(left_side, right_side))
	else:
		adding_rules.append(rule)

	return adding_rules
Beispiel #17
0
    def items(self) -> tp.List[tp.Set[State]]:
        result = [self.closure({State(Rule('@', self.grammar.start), 0, '$')})]
        while True:
            changed = False
            new = []
            for superstate in result:
                for symbol in self.grammar.terminals + self.grammar.non_terminals:
                    to = self.goto_function(superstate, symbol)
                    if to and to not in result and to not in new:
                        new.append(to)
                        changed = True
            result += new
            if not changed:
                break

        return result
Beispiel #18
0
    def __init__(self, grammar: Grammar):
        self.grammar = grammar
        self.grammar.non_terminals.append('@')
        self.grammar.add_rule(Rule('@', self.grammar.start))
        self.grammar.terminals.append('$')

        self.has_eps = {symbol: False for symbol in self.grammar.non_terminals}
        self.calculate_has_eps()

        self.first_helper = {
            symbol: set()
            for symbol in self.grammar.non_terminals
        }
        self.calculate_first()

        self.control_table = []
        self.goto = []
        self.calculate_control_table()
Beispiel #19
0
def build_rules_starts_with_nonterminal(rules, complex_nonterminal, watched, unwatched):
	adding_rules = []
	nonterminal_rules = find_rules_for_nonterminal(
		rules, complex_nonterminal.name[0])
	for rule in nonterminal_rules:
		if not rule.is_empty():
			left_side = [complex_nonterminal]

			new_name = replace_nonterminal(complex_nonterminal.name, 0, rule.right_side)
			new_complex_nonterminal = ComplexNonterminal(new_name)
			right_side = [new_complex_nonterminal]
			
			if (new_complex_nonterminal not in watched and
					new_complex_nonterminal not in unwatched):
				unwatched.append(new_complex_nonterminal)				

			new_rule = Rule(left_side, right_side)
			adding_rules.append(new_rule)
	return adding_rules
Beispiel #20
0
def convert_to_greibach(grammar):
	print "algorithm 3"
	converted_grammar = delete_empty_rules(grammar)
	print "converted grammar:"
	print converted_grammar
	
	new_grammar = Grammar()
	new_grammar.axiom = converted_grammar.axiom	
	new_grammar.terminals = set(converted_grammar.terminals)	

	sorted_nonterminals = sort_nonterminals(
		converted_grammar.nonterminals, converted_grammar.rules)
	print "sorted nonterminals:", [str(s) for s in sorted_nonterminals]
	new_grammar.nonterminals = set(sorted_nonterminals)

	# rebuild rules
	nonterminal = sorted_nonterminals[-1]
	new_rules = find_rules_for_nonterminal(converted_grammar.rules, nonterminal)
	for idx in range(len(sorted_nonterminals)  - 2, -1, -1):
		nonterminal = sorted_nonterminals[idx]
		rules = find_rules_for_nonterminal(converted_grammar.rules, nonterminal)
		for rule in rules:
			if isinstance(rule.right_side[0], Nonterminal):
				new_rules.extend(replace_rule(new_rules, rule))
			else:
				new_rules.append(rule)

	# add rules for terminals
	for terminal in converted_grammar.terminals:
		new_nonterminal = Nonterminal("X{%s}" % str(terminal))
		new_grammar.nonterminals.add(new_nonterminal)
		new_rules.append(Rule([new_nonterminal], [terminal]))

		# replace nonleft terminal to new nonterminals
		for rule in new_rules:
			rule =replace_nonleft_terminal_to_nonterminal(rule, terminal, new_nonterminal)

	new_grammar.rules = new_rules
	
	print "grammar:", new_grammar
		
	return delete_nonderivable_nonterminals(new_grammar)
Beispiel #21
0
def build_new_rules(grammar, disappearing_nonterminals, new_nonterminals):
	new_rules = []
	for rule in grammar.rules:
		if not rule.is_empty():
			idx = find_left_nonnullable_symbol_idx(rule)	

			adding_rules = build_adding_rules(rule, idx)
			new_rules.extend(adding_rules)			

			if rule.left_side[0] in disappearing_nonterminals:
				for adding_rule in adding_rules:
					left_side = [adding_rule.left_side[0].create_nonnullable_nonterminal()]
					right_side = list(adding_rule.right_side)
					new_rules.append(Rule(left_side, right_side))
		else:
			new_rules.append(rule)	

	new_cleared_rules = []
	for rule in new_rules:
		if rule.left_side[0] in new_nonterminals:
			new_cleared_rules.append(rule)

	return new_cleared_rules
Beispiel #22
0
def grammar_cc_items() -> tp.List[tp.Set[State]]:
    return [{
        State(Rule('@', 'S'), 0, '$'),
        State(Rule('S', 'CC'), 0, '$'),
        State(Rule('C', 'cC'), 0, 'c'),
        State(Rule('C', 'cC'), 0, 'd'),
        State(Rule('C', 'd'), 0, 'c'),
        State(Rule('C', 'd'), 0, 'd')
    }, {State(Rule('@', 'S'), 1, '$')},
            {
                State(Rule('S', 'CC'), 1, '$'),
                State(Rule('C', 'cC'), 0, '$'),
                State(Rule('C', 'd'), 0, '$')
            },
            {
                State(Rule('C', 'cC'), 1, 'c'),
                State(Rule('C', 'cC'), 1, 'd'),
                State(Rule('C', 'cC'), 0, 'c'),
                State(Rule('C', 'cC'), 0, 'd'),
                State(Rule('C', 'd'), 0, 'c'),
                State(Rule('C', 'd'), 0, 'd')
            }, {State(Rule('C', 'd'), 1, 'c'),
                State(Rule('C', 'd'), 1, 'd')},
            {State(Rule('S', 'CC'), 2, '$')},
            {
                State(Rule('C', 'cC'), 1, '$'),
                State(Rule('C', 'cC'), 0, '$'),
                State(Rule('C', 'd'), 0, '$')
            }, {State(Rule('C', 'd'), 1, '$')},
            {State(Rule('C', 'cC'), 2, 'c'),
             State(Rule('C', 'cC'), 2, 'd')}, {State(Rule('C', 'cC'), 2, '$')}]
Beispiel #23
0
def grammar_cc_control_table(
) -> tp.Tuple[tp.List[tp.Dict[str, Cell]], tp.List[tp.Dict[str, int]]]:
    return ([{
        'c': Cell(Cell.SHIFT, 3),
        'd': Cell(Cell.SHIFT, 4),
        '$': Cell(Cell.ERROR)
    }, {
        'c': Cell(Cell.ERROR),
        'd': Cell(Cell.ERROR),
        '$': Cell(Cell.ACCEPT)
    }, {
        'c': Cell(Cell.SHIFT, 6),
        'd': Cell(Cell.SHIFT, 7),
        '$': Cell(Cell.ERROR)
    }, {
        'c': Cell(Cell.SHIFT, 3),
        'd': Cell(Cell.SHIFT, 4),
        '$': Cell(Cell.ERROR)
    }, {
        'c': Cell(Cell.REDUCE, Rule('C', 'd')),
        'd': Cell(Cell.REDUCE, Rule('C', 'd')),
        '$': Cell(Cell.ERROR)
    }, {
        'c': Cell(Cell.ERROR),
        'd': Cell(Cell.ERROR),
        '$': Cell(Cell.REDUCE, Rule('S', 'CC'))
    }, {
        'c': Cell(Cell.SHIFT, 6),
        'd': Cell(Cell.SHIFT, 7),
        '$': Cell(Cell.ERROR)
    }, {
        'c': Cell(Cell.ERROR),
        'd': Cell(Cell.ERROR),
        '$': Cell(Cell.REDUCE, Rule('C', 'd'))
    }, {
        'c': Cell(Cell.REDUCE, Rule('C', 'cC')),
        'd': Cell(Cell.REDUCE, Rule('C', 'cC')),
        '$': Cell(Cell.ERROR)
    }, {
        'c': Cell(Cell.ERROR),
        'd': Cell(Cell.ERROR),
        '$': Cell(Cell.REDUCE, Rule('C', 'cC'))
    }], [{
        '@': -1,
        'S': 1,
        'C': 2
    }, {
        '@': -1,
        'S': -1,
        'C': -1
    }, {
        '@': -1,
        'S': -1,
        'C': 5
    }, {
        '@': -1,
        'S': -1,
        'C': 8
    }, {
        '@': -1,
        'S': -1,
        'C': -1
    }, {
        '@': -1,
        'S': -1,
        'C': -1
    }, {
        '@': -1,
        'S': -1,
        'C': 9
    }, {
        '@': -1,
        'S': -1,
        'C': -1
    }, {
        '@': -1,
        'S': -1,
        'C': -1
    }, {
        '@': -1,
        'S': -1,
        'C': -1
    }])
Beispiel #24
0
    def test_grammar_creation(self):
        numeral_rules = [
            Rule('$E', 'one'),
            Rule('$E', 'two'),
            Rule('$E', 'three'),
            Rule('$E', 'four'),
        ]

        operator_rules = [
            Rule('$UnOp', 'minus'),
            Rule('$BinOp', 'plus'),
            Rule('$BinOp', 'minus'),
            Rule('$BinOp', 'times'),
        ]

        compositional_rules = [
            Rule('$E', '$UnOp $E'),
            Rule('$EBO', '$E $BinOp'),
            Rule('$E', '$EBO $E')
        ]

        arithmetic_rules = numeral_rules + operator_rules + compositional_rules

        arithmetic_grammar = Grammar(arithmetic_rules)
        self.assertEqual(3, len(arithmetic_grammar.binary_rules))
        self.assertEqual(7, len(arithmetic_grammar.lexical_rules))
Beispiel #25
0
def parseRule(string):
    left, right = string.split('->')
    left = left.strip()
    right = right.strip().split(" ")
    return Rule(left, right)
Beispiel #26
0
class TestMethods(unittest.TestCase):
    optional_words = [
        'the', '?', 'what', 'is', 'in', 'of', 'how', 'many', 'are', 'which',
        'that', 'with', 'has', 'major', 'does', 'have', 'where', 'me', 'there',
        'give', 'name', 'all', 'a', 'by', 'you', 'to', 'tell', 'other', 'it',
        'do', 'whose', 'show', 'one', 'on', 'for', 'can', 'whats', 'urban',
        'them', 'list', 'exist', 'each', 'could', 'about'
    ]

    rules_optionals = [
        Rule('$ROOT', '?$Optionals $Query ?$Optionals', lambda sems: sems[1]),
        Rule('$Optionals', '$Optional ?$Optionals'),
    ] + [Rule('$Optional', word) for word in optional_words]

    rules_collection_entity = [
        Rule('$Query', '$Collection', lambda sems: sems[0]),
        Rule('$Collection', '$Entity', lambda sems: sems[0]),
    ]

    reader = GeobaseReader()
    geobase = GraphKB(reader.tuples)
    annotators = [NumberAnnotator(), GeobaseAnnotator(geobase)]

    def test_simple_grammar(self):
        rules = self.rules_optionals + self.rules_collection_entity
        grammar = Unit2Grammar(rules=rules, annotators=self.annotators)

        parses = grammar.parse('what is utah')
        self.assertEqual('/state/utah', parses[0].semantics)
        self.assertEqual(('/state/utah', ),
                         self.geobase.executor().execute(parses[0].semantics))

    domain = GeoQueryDomain()

    def test_evaluate_simple_grammar(self):
        from experiment import sample_wins_and_losses
        from metrics import DenotationOracleAccuracyMetric
        from scoring import Model

        rules = self.rules_optionals + self.rules_collection_entity
        grammar = Unit2Grammar(rules=rules, annotators=self.annotators)
        model = Model(grammar=grammar,
                      executor=self.geobase.executor().execute)
        metric = DenotationOracleAccuracyMetric()

        # If printing=True, prints a sampling of wins (correct semantics in
        # first parse) and losses on the dataset.
        metric_values = sample_wins_and_losses(domain=self.domain,
                                               model=model,
                                               metric=metric,
                                               seed=1,
                                               printing=False)
        self.assertEqual(17, metric_values['number of parses'])

    rules_types = [
        Rule('$Collection', '$Type', lambda sems: sems[0]),
        Rule('$Type', 'state', 'state'),
        Rule('$Type', 'states', 'state'),
        Rule('$Type', 'city', 'city'),
        Rule('$Type', 'cities', 'city'),
        Rule('$Type', 'big cities', 'city'),
        Rule('$Type', 'towns', 'city'),
        Rule('$Type', 'river', 'river'),
        Rule('$Type', 'rivers', 'river'),
        Rule('$Type', 'mountain', 'mountain'),
        Rule('$Type', 'mountains', 'mountain'),
        Rule('$Type', 'mount', 'mountain'),
        Rule('$Type', 'peak', 'mountain'),
        Rule('$Type', 'road', 'road'),
        Rule('$Type', 'roads', 'road'),
        Rule('$Type', 'lake', 'lake'),
        Rule('$Type', 'lakes', 'lake'),
        Rule('$Type', 'country', 'country'),
        Rule('$Type', 'countries', 'country'),
    ]

    def test_grammar_with_types(self):
        rules = self.rules_optionals + self.rules_collection_entity + self.rules_types
        grammar = Unit2Grammar(rules=rules, annotators=self.annotators)

        parses = grammar.parse('name the lakes')
        self.assertEqual(
            ('/lake/becharof', '/lake/champlain', '/lake/erie',
             '/lake/flathead', '/lake/great_salt_lake', '/lake/huron',
             '/lake/iliamna', '/lake/lake_of_the_woods', '/lake/michigan',
             '/lake/mille_lacs', '/lake/naknek', '/lake/okeechobee',
             '/lake/ontario', '/lake/pontchartrain', '/lake/rainy',
             '/lake/red', '/lake/salton_sea', '/lake/st._clair',
             '/lake/superior', '/lake/tahoe', '/lake/teshekpuk',
             '/lake/winnebago'),
            self.geobase.executor().execute(parses[0].semantics))

    def test_evaluate_grammar_with_types(self):
        from experiment import sample_wins_and_losses
        from geoquery import GeoQueryDomain
        from metrics import DenotationOracleAccuracyMetric
        from scoring import Model

        rules = self.rules_optionals + self.rules_collection_entity + self.rules_types
        grammar = Unit2Grammar(rules=rules, annotators=self.annotators)
        model = Model(grammar=grammar,
                      executor=self.geobase.executor().execute)
        metric = DenotationOracleAccuracyMetric()

        # If printing=True, prints a sampling of wins (correct semantics in
        # first parse) and losses on the dataset.
        metric_values = sample_wins_and_losses(domain=self.domain,
                                               model=model,
                                               metric=metric,
                                               seed=1,
                                               printing=False)
        self.assertEqual(20, metric_values['number of parses'])

    rules_relations = [
        Rule('$Collection', '$Relation ?$Optionals $Collection',
             lambda sems: sems[0](sems[2])),
        Rule('$Relation', '$FwdRelation', lambda sems: (lambda arg:
                                                        (sems[0], arg))),
        Rule('$Relation', '$RevRelation', lambda sems: (lambda arg:
                                                        (arg, sems[0]))),
        Rule('$FwdRelation', '$FwdBordersRelation', 'borders'),
        Rule('$FwdBordersRelation', 'border'),
        Rule('$FwdBordersRelation', 'bordering'),
        Rule('$FwdBordersRelation', 'borders'),
        Rule('$FwdBordersRelation', 'neighbor'),
        Rule('$FwdBordersRelation', 'neighboring'),
        Rule('$FwdBordersRelation', 'surrounding'),
        Rule('$FwdBordersRelation', 'next to'),
        Rule('$FwdRelation', '$FwdTraversesRelation', 'traverses'),
        Rule('$FwdTraversesRelation', 'cross ?over'),
        Rule('$FwdTraversesRelation', 'flow through'),
        Rule('$FwdTraversesRelation', 'flowing through'),
        Rule('$FwdTraversesRelation', 'flows through'),
        Rule('$FwdTraversesRelation', 'go through'),
        Rule('$FwdTraversesRelation', 'goes through'),
        Rule('$FwdTraversesRelation', 'in'),
        Rule('$FwdTraversesRelation', 'pass through'),
        Rule('$FwdTraversesRelation', 'passes through'),
        Rule('$FwdTraversesRelation', 'run through'),
        Rule('$FwdTraversesRelation', 'running through'),
        Rule('$FwdTraversesRelation', 'runs through'),
        Rule('$FwdTraversesRelation', 'traverse'),
        Rule('$FwdTraversesRelation', 'traverses'),
        Rule('$RevRelation', '$RevTraversesRelation', 'traverses'),
        Rule('$RevTraversesRelation', 'has'),
        Rule('$RevTraversesRelation',
             'have'),  # 'how many states have major rivers'
        Rule('$RevTraversesRelation', 'lie on'),
        Rule('$RevTraversesRelation', 'next to'),
        Rule('$RevTraversesRelation', 'traversed by'),
        Rule('$RevTraversesRelation', 'washed by'),
        Rule('$FwdRelation', '$FwdContainsRelation', 'contains'),
        # 'how many states have a city named springfield'
        Rule('$FwdContainsRelation', 'has'),
        Rule('$FwdContainsRelation', 'have'),
        Rule('$RevRelation', '$RevContainsRelation', 'contains'),
        Rule('$RevContainsRelation', 'contained by'),
        Rule('$RevContainsRelation', 'in'),
        Rule('$RevContainsRelation', 'found in'),
        Rule('$RevContainsRelation', 'located in'),
        Rule('$RevContainsRelation', 'of'),
        Rule('$RevRelation', '$RevCapitalRelation', 'capital'),
        Rule('$RevCapitalRelation', 'capital'),
        Rule('$RevCapitalRelation', 'capitals'),
        Rule('$RevRelation', '$RevHighestPointRelation', 'highest_point'),
        Rule('$RevHighestPointRelation', 'high point'),
        Rule('$RevHighestPointRelation', 'high points'),
        Rule('$RevHighestPointRelation', 'highest point'),
        Rule('$RevHighestPointRelation', 'highest points'),
        Rule('$RevRelation', '$RevLowestPointRelation', 'lowest_point'),
        Rule('$RevLowestPointRelation', 'low point'),
        Rule('$RevLowestPointRelation', 'low points'),
        Rule('$RevLowestPointRelation', 'lowest point'),
        Rule('$RevLowestPointRelation', 'lowest points'),
        Rule('$RevLowestPointRelation', 'lowest spot'),
        Rule('$RevRelation', '$RevHighestElevationRelation',
             'highest_elevation'),
        Rule('$RevHighestElevationRelation', '?highest elevation'),
        Rule('$RevRelation', '$RevHeightRelation', 'height'),
        Rule('$RevHeightRelation', 'elevation'),
        Rule('$RevHeightRelation', 'height'),
        Rule('$RevHeightRelation', 'high'),
        Rule('$RevHeightRelation', 'tall'),
        Rule('$RevRelation', '$RevAreaRelation', 'area'),
        Rule('$RevAreaRelation', 'area'),
        Rule('$RevAreaRelation', 'big'),
        Rule('$RevAreaRelation', 'large'),
        Rule('$RevAreaRelation', 'size'),
        Rule('$RevRelation', '$RevPopulationRelation', 'population'),
        Rule('$RevPopulationRelation', 'big'),
        Rule('$RevPopulationRelation', 'large'),
        Rule('$RevPopulationRelation', 'populated'),
        Rule('$RevPopulationRelation', 'population'),
        Rule('$RevPopulationRelation', 'populations'),
        Rule('$RevPopulationRelation', 'populous'),
        Rule('$RevPopulationRelation', 'size'),
        Rule('$RevRelation', '$RevLengthRelation', 'length'),
        Rule('$RevLengthRelation', 'length'),
        Rule('$RevLengthRelation', 'long'),
    ]

    def test_grammar_with_relations(self):
        rules = (self.rules_optionals + self.rules_collection_entity +
                 self.rules_types + self.rules_relations)
        grammar = Unit2Grammar(rules=rules, annotators=self.annotators)

        parses = grammar.parse('what is the capital of vermont ?')
        self.assertEqual(('/state/vermont', 'capital'), parses[0].semantics)
        self.assertEqual(('/city/montpelier_vt', ),
                         self.geobase.executor().execute(parses[0].semantics))

    def test_evaluate_grammar_with_relations(self):
        from experiment import sample_wins_and_losses
        from geoquery import GeoQueryDomain
        from metrics import DenotationOracleAccuracyMetric
        from scoring import Model

        rules = (self.rules_optionals + self.rules_collection_entity +
                 self.rules_types + self.rules_relations)
        grammar = Unit2Grammar(rules=rules, annotators=self.annotators)
        model = Model(grammar=grammar,
                      executor=self.geobase.executor().execute)
        metric = DenotationOracleAccuracyMetric()

        # If printing=True, prints a sampling of wins (correct semantics in
        # first parse) and losses on the dataset.
        metric_values = sample_wins_and_losses(domain=self.domain,
                                               model=model,
                                               metric=metric,
                                               seed=1,
                                               printing=False)
        self.assertEqual(256, metric_values['number of parses'])

    rules_intersection = [
        Rule('$Collection', '$Collection $Collection', lambda sems:
             ('.and', sems[0], sems[1])),
        Rule('$Collection', '$Collection $Optional $Collection', lambda sems:
             ('.and', sems[0], sems[2])),
        Rule('$Collection', '$Collection $Optional $Optional $Collection',
             lambda sems: ('.and', sems[0], sems[3])),
    ]

    def test_grammar_with_intersections(self):
        rules = (self.rules_optionals + self.rules_collection_entity +
                 self.rules_types + self.rules_relations +
                 self.rules_intersection)
        grammar = Unit2Grammar(rules=rules, annotators=self.annotators)

        parses = grammar.parse('states bordering california')
        self.assertEqual(('.and', 'state', ('borders', '/state/california')),
                         parses[0].semantics)
        self.assertEqual(('/state/arizona', '/state/nevada', '/state/oregon'),
                         self.geobase.executor().execute(parses[0].semantics))

    def test_evaluate_grammar_with_intersections(self):
        from experiment import sample_wins_and_losses
        from geoquery import GeoQueryDomain
        from metrics import DenotationOracleAccuracyMetric
        from scoring import Model

        rules = (self.rules_optionals + self.rules_collection_entity +
                 self.rules_types + self.rules_relations +
                 self.rules_intersection)
        grammar = Unit2Grammar(rules=rules, annotators=self.annotators)
        model = Model(grammar=grammar,
                      executor=self.geobase.executor().execute)
        metric = DenotationOracleAccuracyMetric()

        # If printing=True, prints a sampling of wins (correct semantics in
        # first parse) and losses on the dataset.
        metric_values = sample_wins_and_losses(domain=self.domain,
                                               model=model,
                                               metric=metric,
                                               seed=1,
                                               printing=False)
        self.assertEqual(1177, metric_values['number of parses'])

    rules_superlatives = [
        Rule('$Collection', '$Superlative ?$Optionals $Collection',
             lambda sems: sems[0] + (sems[2], )),
        Rule('$Collection', '$Collection ?$Optionals $Superlative',
             lambda sems: sems[2] + (sems[0], )),
        Rule('$Superlative', 'largest', ('.argmax', 'area')),
        Rule('$Superlative', 'largest', ('.argmax', 'population')),
        Rule('$Superlative', 'biggest', ('.argmax', 'area')),
        Rule('$Superlative', 'biggest', ('.argmax', 'population')),
        Rule('$Superlative', 'smallest', ('.argmin', 'area')),
        Rule('$Superlative', 'smallest', ('.argmin', 'population')),
        Rule('$Superlative', 'longest', ('.argmax', 'length')),
        Rule('$Superlative', 'shortest', ('.argmin', 'length')),
        Rule('$Superlative', 'tallest', ('.argmax', 'height')),
        Rule('$Superlative', 'highest', ('.argmax', 'height')),
        Rule('$Superlative', '$MostLeast $RevRelation', lambda sems:
             (sems[0], sems[1])),
        Rule('$MostLeast', 'most', '.argmax'),
        Rule('$MostLeast', 'least', '.argmin'),
        Rule('$MostLeast', 'lowest', '.argmin'),
        Rule('$MostLeast', 'greatest', '.argmax'),
        Rule('$MostLeast', 'highest', '.argmax'),
    ]

    def test_grammar_with_superlatives(self):
        rules = (self.rules_optionals + self.rules_collection_entity +
                 self.rules_types + self.rules_relations +
                 self.rules_intersection + self.rules_superlatives)
        grammar = Unit2Grammar(rules=rules, annotators=self.annotators)

        parses = grammar.parse('tallest mountain')
        self.assertEqual(('.argmax', 'height', 'mountain'),
                         parses[0].semantics)
        self.assertEqual(('/mountain/mckinley', ),
                         self.geobase.executor().execute(parses[0].semantics))

    def test_evaluate_grammar_with_superlatives(self):
        from experiment import sample_wins_and_losses
        from geoquery import GeoQueryDomain
        from metrics import DenotationOracleAccuracyMetric
        from scoring import Model

        rules = (self.rules_optionals + self.rules_collection_entity +
                 self.rules_types + self.rules_relations +
                 self.rules_intersection + self.rules_superlatives)

        grammar = Unit2Grammar(rules=rules, annotators=self.annotators)
        model = Model(grammar=grammar,
                      executor=self.geobase.executor().execute)
        metric = DenotationOracleAccuracyMetric()

        # If printing=True, prints a sampling of wins (correct semantics in
        # first parse) and losses on the dataset.
        metric_values = sample_wins_and_losses(domain=self.domain,
                                               model=model,
                                               metric=metric,
                                               seed=1,
                                               printing=False)
        self.assertEqual(2658, metric_values['number of parses'])

    rules_reverse_joins = [
        Rule('$Collection', '$Collection ?$Optionals $Relation',
             lambda sems: Unit3Grammar.reverse(sems[2])(sems[0])),
    ]

    def test_grammar_with_reverse_joins(self):
        rules = (self.rules_optionals + self.rules_collection_entity +
                 self.rules_types + self.rules_relations +
                 self.rules_intersection + self.rules_superlatives +
                 self.rules_reverse_joins)
        grammar = Unit3Grammar(rules=rules, annotators=self.annotators)

        parses = grammar.parse('which states does the rio grande cross')
        self.assertEqual(('.and', 'state', ('/river/rio_grande', 'traverses')),
                         parses[0].semantics)
        self.assertEqual(
            ('/state/colorado', '/state/new_mexico', '/state/texas'),
            self.geobase.executor().execute(parses[0].semantics))

    def test_evaluate_grammar_with_reverse_joins(self):
        from experiment import sample_wins_and_losses
        from geoquery import GeoQueryDomain
        from metrics import DenotationOracleAccuracyMetric
        from scoring import Model

        rules = (self.rules_optionals + self.rules_collection_entity +
                 self.rules_types + self.rules_relations +
                 self.rules_intersection + self.rules_superlatives +
                 self.rules_reverse_joins)

        grammar = Unit3Grammar(rules=rules, annotators=self.annotators)
        model = Model(grammar=grammar,
                      executor=self.geobase.executor().execute)
        metric = DenotationOracleAccuracyMetric()

        # If printing=True, prints a sampling of wins (correct semantics in
        # first parse) and losses on the dataset.
        metric_values = sample_wins_and_losses(domain=self.domain,
                                               model=model,
                                               metric=metric,
                                               seed=1,
                                               printing=False)
        self.assertEqual(11562, metric_values['number of parses'])
        self.assertEqual(152, metric_values['denotation accuracy'])

    def test_evaluate_model(self):
        from experiment import evaluate_model
        from metrics import denotation_match_metrics
        from scoring import Model
        from geo880 import geo880_train_examples

        rules = (self.rules_optionals + self.rules_collection_entity +
                 self.rules_types + self.rules_relations +
                 self.rules_intersection + self.rules_superlatives +
                 self.rules_reverse_joins)

        grammar = Unit3Grammar(rules=rules, annotators=self.annotators)
        model = Model(grammar=grammar,
                      executor=self.geobase.executor().execute)
        # Set print_examples=True and look for 'what state has the shortest
        # river?' and
        evaluate_model(model=model,
                       examples=geo880_train_examples[:10],
                       metrics=denotation_match_metrics(),
                       print_examples=False)
        # SLIDES

    def test_feature_function(self):
        from experiment import evaluate_model
        from metrics import denotation_match_metrics
        from scoring import Model
        from geo880 import geo880_train_examples

        rules = (self.rules_optionals + self.rules_collection_entity +
                 self.rules_types + self.rules_relations +
                 self.rules_intersection + self.rules_superlatives +
                 self.rules_reverse_joins)

        grammar = Unit3Grammar(rules=rules, annotators=self.annotators)

        def empty_denotation_feature(parse):
            features = defaultdict(float)
            if parse.denotation == ():
                features['empty_denotation'] += 1.0
            return features

        weights = {'empty_denotation': -1.0}

        model = Model(grammar=grammar,
                      feature_fn=empty_denotation_feature,
                      weights=weights,
                      executor=self.geobase.executor().execute)
        metric_values = evaluate_model(model=model,
                                       examples=geo880_train_examples,
                                       metrics=denotation_match_metrics(),
                                       print_examples=False)
        self.assertEqual(235, metric_values['denotation accuracy'])
Beispiel #27
0
        'S': -1
    }, {
        '@': -1,
        'S': 9
    }, {
        '@': -1,
        'S': -1
    }])


@pytest.mark.parametrize('grammar, tests',
                         [(Grammar(terminals='abcd',
                                   non_terminals='ABCDEFGHIJK',
                                   start='A',
                                   rules=[
                                       Rule('A', 'BC'),
                                       Rule('B', 'DEFG'),
                                       Rule('C', 'HIJK'),
                                       Rule('D', 'E'),
                                       Rule('E', 'a'),
                                       Rule('E', ''),
                                       Rule('F', 'G'),
                                       Rule('G', 'b'),
                                       Rule('H', 'I'),
                                       Rule('I', 'c'),
                                       Rule('I', ''),
                                       Rule('J', 'K'),
                                       Rule('K', 'd')
                                   ]), [('A', {'a', 'b'}), ('B', {'a', 'b'}),
                                        ('', set()), ('C', {'c', 'd'}),
                                        ('J', {'d'}), ('Dcb', {'a', 'c'})]),
Beispiel #28
0
        print(sentence)

        print('parsing')

        predictions = predict(sentence, W, U)
        lastPrediction = []
        for h in predictions:
            prediction = predictRules(h, pos, rules)

            newPreds = listDifference(lastPrediction, prediction[len(pos):])
            print(prediction[:len(pos)] + newPreds, len(prediction))
            lastPrediction = prediction[len(pos):]
            # print (prediction[:], len(prediction))

        # analyze rules not found
        rules_original = list(set(Rule.fromTree(r) for r in t.allRules()))
        rules_original = [r for r in rules_original if not r.terminalRule]

        found = []
        not_found = []
        for r in rules_original:
            if r.terminalRule:
                continue
            if r in prediction:
                found.append(r)
            else:
                not_found.append(r)

        for nf in not_found:
            print('not found', nf, nf in rules)
Beispiel #29
0
def grammar_ab_control_table(
) -> tp.Tuple[tp.List[tp.Dict[str, Cell]], tp.List[tp.Dict[str, int]]]:
    return ([{
        'a': Cell(Cell.SHIFT, 2),
        'b': Cell(Cell.ERROR),
        '$': Cell(Cell.REDUCE, Rule('S', ''))
    }, {
        'a': Cell(Cell.ERROR),
        'b': Cell(Cell.ERROR),
        '$': Cell(Cell.ACCEPT)
    }, {
        'a': Cell(Cell.SHIFT, 4),
        'b': Cell(Cell.REDUCE, Rule('S', '')),
        '$': Cell(Cell.ERROR)
    }, {
        'a': Cell(Cell.ERROR),
        'b': Cell(Cell.SHIFT, 5),
        '$': Cell(Cell.ERROR)
    }, {
        'a': Cell(Cell.SHIFT, 4),
        'b': Cell(Cell.REDUCE, Rule('S', '')),
        '$': Cell(Cell.ERROR)
    }, {
        'a': Cell(Cell.SHIFT, 2),
        'b': Cell(Cell.ERROR),
        '$': Cell(Cell.REDUCE, Rule('S', ''))
    }, {
        'a': Cell(Cell.ERROR),
        'b': Cell(Cell.SHIFT, 8),
        '$': Cell(Cell.ERROR)
    }, {
        'a': Cell(Cell.ERROR),
        'b': Cell(Cell.ERROR),
        '$': Cell(Cell.REDUCE, Rule('S', 'aSbS'))
    }, {
        'a': Cell(Cell.SHIFT, 4),
        'b': Cell(Cell.REDUCE, Rule('S', '')),
        '$': Cell(Cell.ERROR)
    }, {
        'a': Cell(Cell.ERROR),
        'b': Cell(Cell.REDUCE, Rule('S', 'aSbS')),
        '$': Cell(Cell.ERROR)
    }], [{
        '@': -1,
        'S': 1
    }, {
        '@': -1,
        'S': -1
    }, {
        '@': -1,
        'S': 3
    }, {
        '@': -1,
        'S': -1
    }, {
        '@': -1,
        'S': 6
    }, {
        '@': -1,
        'S': 7
    }, {
        '@': -1,
        'S': -1
    }, {
        '@': -1,
        'S': -1
    }, {
        '@': -1,
        'S': 9
    }, {
        '@': -1,
        'S': -1
    }])
Beispiel #30
0
def river_rule(name):
    orig_name = name
    if name.endswith(' river'):
        name = name[:-6]
    if ' ' in name:
        name = "' %s '" % name
    return Rule(
        '$River', [], orig_name, lambda v: lambda var:
        ('_const ( %s , _riverid ( ' % var) + name + ' ) )')


RULES = [
    # Root rules
    Rule(
        '$ROOT', ['$Answer'], 'what %s ?',
        lambda v, a: lambda: '_answer ( %(v1)s , ( %(c1)s ) )' % {
            'v1': v.get(1),
            'c1': a.sem_fn(v.get(1))
        }),
    #Rule('$ROOT', ['$Answer'], 'what is the %s ?', lambda v, a: lambda : '_answer ( %(v1)s , ( %(c1)s ) )' % {'v1': v.get(1), 'c1': a.sem_fn(v.get(1))}),
    #Rule('$ROOT', ['$Answer'], 'what are the %s ?', lambda v, a: lambda : '_answer ( %(v1)s , ( %(c1)s ) )' % {'v1': v.get(1), 'c1': a.sem_fn(v.get(1))}),

    # Things that could be answers
    Rule('$Answer', ['$State'], '%s', lambda v, a: lambda var: a.sem_fn(var)),
    Rule('$Answer', ['$Landmark'], '%s',
         lambda v, a: lambda var: a.sem_fn(var)),
    Rule(
        '$Answer', ['$State'], 'population of %s', lambda v, a: lambda var:
        '_population ( %(v1)s , %(var)s ) , %(c1)s' % {
            'var': var,
            'v1': v.get(1),
            'c1': a.sem_fn(v.get(1))
Beispiel #31
0
def grammar_ab_items() -> tp.List[tp.Set[State]]:
    return [{
        State(Rule('@', 'S'), 0, '$'),
        State(Rule('S', 'aSbS'), 0, '$'),
        State(Rule('S', ''), 0, '$')
    }, {State(Rule('@', 'S'), 1, '$')},
            {
                State(Rule('S', 'aSbS'), 1, '$'),
                State(Rule('S', 'aSbS'), 0, 'b'),
                State(Rule('S', ''), 0, 'b')
            }, {State(Rule('S', 'aSbS'), 2, '$')},
            {
                State(Rule('S', 'aSbS'), 1, 'b'),
                State(Rule('S', 'aSbS'), 0, 'b'),
                State(Rule('S', ''), 0, 'b')
            },
            {
                State(Rule('S', 'aSbS'), 3, '$'),
                State(Rule('S', 'aSbS'), 0, '$'),
                State(Rule('S', ''), 0, '$')
            }, {State(Rule('S', 'aSbS'), 2, 'b')},
            {State(Rule('S', 'aSbS'), 4, '$')},
            {
                State(Rule('S', 'aSbS'), 3, 'b'),
                State(Rule('S', 'aSbS'), 0, 'b'),
                State(Rule('S', ''), 0, 'b')
            }, {State(Rule('S', 'aSbS'), 4, 'b')}]
Beispiel #32
0
 def test_word_parts(self):
     r = Rule("~eu >> ~eux", "Pl")
     self.assertEqual(r.match('lieu'), (True, 'li'))
     self.assertEqual(r.match('heure'), (False, ''))
     self.assertEqual(r.transform('heure'), [])
Beispiel #33
0
def grammar_ab() -> Grammar:
    return Grammar(terminals='ab',
                   non_terminals='S',
                   start='S',
                   rules=[Rule('S', 'aSbS'), Rule('S', '')])