def grammar_cc() -> Grammar: return Grammar(terminals='cd', non_terminals='SC', start='S', rules=[Rule('S', 'CC'), Rule('C', 'cC'), Rule('C', 'd')])
def build_rules_starts_with_terminal(rules, complex_nonterminal, watched, unwatched): adding_rules = [] symbols = complex_nonterminal.name for idx in range(1, len(symbols)): symbol = symbols[idx] if (isinstance(symbol, Terminal) or (isinstance(symbol, Nonterminal) and not symbol.is_nullable)): break selected_rules = find_rules_for_nonterminal(rules, symbol) for rule in selected_rules: if not rule.is_empty(): left_side = [complex_nonterminal] new_name = list(rule.right_side) if (idx + 1) < len(symbols): new_name.extend(symbols[idx + 1:]) new_complex_nonterminal = ComplexNonterminal(new_name) right_side = [symbols[0], new_complex_nonterminal] if (new_complex_nonterminal not in watched and new_complex_nonterminal not in unwatched): unwatched.append(new_complex_nonterminal) adding_rules.append(Rule(left_side, right_side)) adding_rules.append(Rule([complex_nonterminal], [symbols[0]])) return adding_rules
def add_rule_containing_optional(self, rule): """ Handles adding a rule which contains an optional element on the RHS. We find the leftmost optional element on the RHS, and then generate two variants of the rule: one in which that element is required, and one in which it is removed. We add these variants in place of the original rule. (If there are more optional elements further to the right, we'll wind up recursing.) For example, if the original rule is: Rule('$Z', '$A ?$B ?$C $D') then we add these rules instead: Rule('$Z', '$A $B ?$C $D') Rule('$Z', '$A ?$C $D') """ # Find index of the first optional element on the RHS. first = next((idx for idx, elt in enumerate(rule.rhs) if Unit2Grammar.is_optional(elt)), -1) assert first >= 0 assert len(rule.rhs) > 1, 'Entire RHS is optional: %s' % rule prefix = rule.rhs[:first] suffix = rule.rhs[(first + 1):] # First variant: the first optional element gets deoptionalized. deoptionalized = (rule.rhs[first][1:],) self.add_rule(Rule(rule.lhs, prefix + deoptionalized + suffix, rule.sem)) # Second variant: the first optional element gets removed. # If the semantics is a value, just keep it as is. sem = rule.sem # But if it's a function, we need to supply a dummy argument for the removed element. if isinstance(rule.sem, FunctionType): sem = lambda sems: rule.sem(sems[:first] + [None] + sems[first:]) self.add_rule(Rule(rule.lhs, prefix + suffix, sem))
def test_compl_word_rule(self): r = Rule("aller >> vais, vas, va, allons, allez, vont", "PrInd") self.assertEqual(r.input, 'aller') self.assertEqual(r.output, ['vais', 'vas', 'va', 'allons', 'allez', 'vont']) self.assertEqual(r.match('aller'), (True, 'aller')) self.assertEqual(r.match('parler'), (False, '')) self.assertEqual(r.transform('aller'), ['vais', 'vas', 'va', 'allons', 'allez', 'vont'])
def build_adding_rules(rule, idx): adding_rules = [] for i in range(idx): left_side = list(rule.left_side) right_side = [rule.right_side[i].create_nonnullable_nonterminal()] right_side.extend(rule.right_side[i + 1:]) adding_rules.append(Rule(left_side, right_side)) if idx < len(rule.right_side): left_side = list(rule.left_side) right_side = rule.right_side[idx:] adding_rules.append(Rule(left_side, right_side)) return adding_rules
def test_rule(self): r = Rule("~er >> ~e, ~es, ~e, ~ons, ~ez, ~ent", "PrInd") self.assertEqual(r.input, '~er') self.assertEqual(r.output, ['~e', '~es', '~e', '~ons', '~ez', '~ent']) self.assertEqual(r.match('parler'), (True, 'parl')) self.assertEqual(r.match('er'), (True, '')) self.assertEqual(r.match('finir'), (False, '')) self.assertEqual(r.transform('parler'), ['parle', 'parles', 'parle', 'parlons', 'parlez', 'parlent']) self.assertEqual(r.transform('finir'), []) r2 = Rule("~cer >> ~ce, ~ces, ~ce, ~cons, ~cez, ~cent", "PrInd") self.assertEqual(r.includes(r2), True) self.assertEqual(r2.includes(r), False)
def parse(self, words): """Parses a sequence of words. Args: words (list<string>): The sequence of words to be parsed. Returns: A list of parse trees valid for the given input sequence, or [] if no valid parse. """ chart = EarleyChart(words) chart.enqueue( State(rule=Rule(_GAMMA, [self._grammar.distinguished_symbol]), span_start=0, span_stop=0, dot_position=0), 0) for i in range(len(words) + 1): for state in chart[i]: if state.incomplete: self._predict(state, chart) self._scan(state, chart) else: self._complete(state, chart) return [ self._tree_from_parse(p, chart)[1] for p in self._full_parses(chart) ]
def calculate_control_table(self) -> None: states = self.items() n = len(states) self.control_table = [{ symbol: Cell(Cell.ERROR) for symbol in self.grammar.terminals } for i in range(n)] self.goto = [{symbol: -1 for symbol in self.grammar.non_terminals} for i in range(n)] for index, superstate in enumerate(states): for state in superstate: if state.after_dot in self.grammar.terminals: try: j = states.index( self.goto_function(superstate, state.after_dot)) self.control_table[index][state.after_dot] = Cell( Cell.SHIFT, j) except ValueError: pass if state.after_dot is None and state.rule.left != '@': self.control_table[index][state.look] = Cell( Cell.REDUCE, state.rule) if state == State(Rule('@', self.grammar.start), 1, '$'): self.control_table[index]['$'] = Cell(Cell.ACCEPT) if state.after_dot in self.grammar.non_terminals: try: j = states.index( self.goto_function(superstate, state.after_dot)) self.goto[index][state.after_dot] = j except ValueError: pass
def city_rule(name): if ' ' in name: name = "' %s '" % name return Rule('$City', [], name, lambda v: lambda var: ('_const ( %s , _cityid ( ' % var) + name + ' , _ ) )', weight=0.05)
def river_rule(name): orig_name = name if name.endswith(' river'): name = name[:-6] if ' ' in name: name = "' %s '" % name return Rule( '$River', [], orig_name, lambda v: lambda var: ('_const ( %s , _riverid ( ' % var) + name + ' ) )')
def apply_annotators(self, chart, tokens, start, end): """Add parses to chart cell (start, end) by applying annotators.""" if hasattr(self, 'annotators'): for annotator in self.annotators: for category, semantics in annotator.annotate(tokens[start:end]): if not self.check_capacity(chart, start, end): return rule = Rule(category, tuple(tokens[start:end]), semantics) chart[(start, end)].append(Parse(rule, tokens[start:end]))
def delete_useless_nonterminals_from_rule(rule, useless_nonterminals): left_side = list(rule.left_side) right_side = list(rule.right_side) for nonterminal in useless_nonterminals: while nonterminal in right_side: right_side.remove(nonterminal) if len(right_side) == 0: right_side = [EmptySymbol()] return Rule(left_side, right_side)
def add_n_ary_rule(self, rule): """ Handles adding a rule with three or more non-terminals on the RHS. We introduce a new category which covers all elements on the RHS except the first, and then generate two variants of the rule: one which consumes those elements to produce the new category, and another which combines the new category which the first element to produce the original LHS category. We add these variants in place of the original rule. (If the new rules still contain more than two elements on the RHS, we'll wind up recursing.) For example, if the original rule is: Rule('$Z', '$A $B $C $D') then we create a new category '$Z_$A' (roughly, "$Z missing $A to the left"), and add these rules instead: Rule('$Z_$A', '$B $C $D') Rule('$Z', '$A $Z_$A') """ def add_category(base_name): assert self.is_cat(base_name) name = base_name while name in self.categories: name = name + '_' self.categories.add(name) return name def apply_semantics(rule, sems): # Note that this function would not be needed if we required that semantics # always be functions, never bare values. That is, if instead of # Rule('$E', 'one', 1) we required Rule('$E', 'one', lambda sems: 1). # But that would be cumbersome. if isinstance(rule.sem, FunctionType): return rule.sem(sems) else: return rule.sem category = add_category('%s_%s' % (rule.lhs, rule.rhs[0])) self.add_rule(Rule(category, rule.rhs[1:], lambda sems: sems)) self.add_rule( Rule(rule.lhs, (rule.rhs[0], category), lambda sems: apply_semantics(rule, [sems[0]] + sems[1])))
def replace_nonleft_terminal_to_nonterminal(rule, terminal, nonterminal): if terminal in rule.right_side[1:]: new_right_side = list(rule.right_side) for idx in range(1, len(rule.right_side)): symbol = rule.right_side[idx] if symbol == terminal: new_right_side[idx] = nonterminal else: new_right_side = rule.right_side return Rule(rule.left_side, new_right_side)
def predict(self, word: str) -> bool: situations = [set() for i in range(len(word) + 1)] situations[0].add(Situation(Rule('@', self.grammar.start), 0, 0)) for j in range(len(word) + 1): self.Scan(situations, word, j) previous_result = situations[j] while True: old_length = len(situations[j]) result = self.Complete(situations, previous_result) result |= self.Predict(previous_result, j) situations[j] |= result previous_result = result if old_length == len(situations[j]): break return Situation(Rule('@', self.grammar.start), 1, 0) in situations[len(word)]
def replace_rule(new_rules, rule): adding_rules = [] nonterminal = rule.right_side[0] nonterminal_rules = find_rules_for_nonterminal(new_rules, nonterminal) if nonterminal_rules: left_side = rule.left_side for nonterminal_rule in nonterminal_rules: right_side = replace_nonterminal(rule.right_side, 0, nonterminal_rule.right_side) adding_rules.append(Rule(left_side, right_side)) else: adding_rules.append(rule) return adding_rules
def items(self) -> tp.List[tp.Set[State]]: result = [self.closure({State(Rule('@', self.grammar.start), 0, '$')})] while True: changed = False new = [] for superstate in result: for symbol in self.grammar.terminals + self.grammar.non_terminals: to = self.goto_function(superstate, symbol) if to and to not in result and to not in new: new.append(to) changed = True result += new if not changed: break return result
def __init__(self, grammar: Grammar): self.grammar = grammar self.grammar.non_terminals.append('@') self.grammar.add_rule(Rule('@', self.grammar.start)) self.grammar.terminals.append('$') self.has_eps = {symbol: False for symbol in self.grammar.non_terminals} self.calculate_has_eps() self.first_helper = { symbol: set() for symbol in self.grammar.non_terminals } self.calculate_first() self.control_table = [] self.goto = [] self.calculate_control_table()
def build_rules_starts_with_nonterminal(rules, complex_nonterminal, watched, unwatched): adding_rules = [] nonterminal_rules = find_rules_for_nonterminal( rules, complex_nonterminal.name[0]) for rule in nonterminal_rules: if not rule.is_empty(): left_side = [complex_nonterminal] new_name = replace_nonterminal(complex_nonterminal.name, 0, rule.right_side) new_complex_nonterminal = ComplexNonterminal(new_name) right_side = [new_complex_nonterminal] if (new_complex_nonterminal not in watched and new_complex_nonterminal not in unwatched): unwatched.append(new_complex_nonterminal) new_rule = Rule(left_side, right_side) adding_rules.append(new_rule) return adding_rules
def convert_to_greibach(grammar): print "algorithm 3" converted_grammar = delete_empty_rules(grammar) print "converted grammar:" print converted_grammar new_grammar = Grammar() new_grammar.axiom = converted_grammar.axiom new_grammar.terminals = set(converted_grammar.terminals) sorted_nonterminals = sort_nonterminals( converted_grammar.nonterminals, converted_grammar.rules) print "sorted nonterminals:", [str(s) for s in sorted_nonterminals] new_grammar.nonterminals = set(sorted_nonterminals) # rebuild rules nonterminal = sorted_nonterminals[-1] new_rules = find_rules_for_nonterminal(converted_grammar.rules, nonterminal) for idx in range(len(sorted_nonterminals) - 2, -1, -1): nonterminal = sorted_nonterminals[idx] rules = find_rules_for_nonterminal(converted_grammar.rules, nonterminal) for rule in rules: if isinstance(rule.right_side[0], Nonterminal): new_rules.extend(replace_rule(new_rules, rule)) else: new_rules.append(rule) # add rules for terminals for terminal in converted_grammar.terminals: new_nonterminal = Nonterminal("X{%s}" % str(terminal)) new_grammar.nonterminals.add(new_nonterminal) new_rules.append(Rule([new_nonterminal], [terminal])) # replace nonleft terminal to new nonterminals for rule in new_rules: rule =replace_nonleft_terminal_to_nonterminal(rule, terminal, new_nonterminal) new_grammar.rules = new_rules print "grammar:", new_grammar return delete_nonderivable_nonterminals(new_grammar)
def build_new_rules(grammar, disappearing_nonterminals, new_nonterminals): new_rules = [] for rule in grammar.rules: if not rule.is_empty(): idx = find_left_nonnullable_symbol_idx(rule) adding_rules = build_adding_rules(rule, idx) new_rules.extend(adding_rules) if rule.left_side[0] in disappearing_nonterminals: for adding_rule in adding_rules: left_side = [adding_rule.left_side[0].create_nonnullable_nonterminal()] right_side = list(adding_rule.right_side) new_rules.append(Rule(left_side, right_side)) else: new_rules.append(rule) new_cleared_rules = [] for rule in new_rules: if rule.left_side[0] in new_nonterminals: new_cleared_rules.append(rule) return new_cleared_rules
def grammar_cc_items() -> tp.List[tp.Set[State]]: return [{ State(Rule('@', 'S'), 0, '$'), State(Rule('S', 'CC'), 0, '$'), State(Rule('C', 'cC'), 0, 'c'), State(Rule('C', 'cC'), 0, 'd'), State(Rule('C', 'd'), 0, 'c'), State(Rule('C', 'd'), 0, 'd') }, {State(Rule('@', 'S'), 1, '$')}, { State(Rule('S', 'CC'), 1, '$'), State(Rule('C', 'cC'), 0, '$'), State(Rule('C', 'd'), 0, '$') }, { State(Rule('C', 'cC'), 1, 'c'), State(Rule('C', 'cC'), 1, 'd'), State(Rule('C', 'cC'), 0, 'c'), State(Rule('C', 'cC'), 0, 'd'), State(Rule('C', 'd'), 0, 'c'), State(Rule('C', 'd'), 0, 'd') }, {State(Rule('C', 'd'), 1, 'c'), State(Rule('C', 'd'), 1, 'd')}, {State(Rule('S', 'CC'), 2, '$')}, { State(Rule('C', 'cC'), 1, '$'), State(Rule('C', 'cC'), 0, '$'), State(Rule('C', 'd'), 0, '$') }, {State(Rule('C', 'd'), 1, '$')}, {State(Rule('C', 'cC'), 2, 'c'), State(Rule('C', 'cC'), 2, 'd')}, {State(Rule('C', 'cC'), 2, '$')}]
def grammar_cc_control_table( ) -> tp.Tuple[tp.List[tp.Dict[str, Cell]], tp.List[tp.Dict[str, int]]]: return ([{ 'c': Cell(Cell.SHIFT, 3), 'd': Cell(Cell.SHIFT, 4), '$': Cell(Cell.ERROR) }, { 'c': Cell(Cell.ERROR), 'd': Cell(Cell.ERROR), '$': Cell(Cell.ACCEPT) }, { 'c': Cell(Cell.SHIFT, 6), 'd': Cell(Cell.SHIFT, 7), '$': Cell(Cell.ERROR) }, { 'c': Cell(Cell.SHIFT, 3), 'd': Cell(Cell.SHIFT, 4), '$': Cell(Cell.ERROR) }, { 'c': Cell(Cell.REDUCE, Rule('C', 'd')), 'd': Cell(Cell.REDUCE, Rule('C', 'd')), '$': Cell(Cell.ERROR) }, { 'c': Cell(Cell.ERROR), 'd': Cell(Cell.ERROR), '$': Cell(Cell.REDUCE, Rule('S', 'CC')) }, { 'c': Cell(Cell.SHIFT, 6), 'd': Cell(Cell.SHIFT, 7), '$': Cell(Cell.ERROR) }, { 'c': Cell(Cell.ERROR), 'd': Cell(Cell.ERROR), '$': Cell(Cell.REDUCE, Rule('C', 'd')) }, { 'c': Cell(Cell.REDUCE, Rule('C', 'cC')), 'd': Cell(Cell.REDUCE, Rule('C', 'cC')), '$': Cell(Cell.ERROR) }, { 'c': Cell(Cell.ERROR), 'd': Cell(Cell.ERROR), '$': Cell(Cell.REDUCE, Rule('C', 'cC')) }], [{ '@': -1, 'S': 1, 'C': 2 }, { '@': -1, 'S': -1, 'C': -1 }, { '@': -1, 'S': -1, 'C': 5 }, { '@': -1, 'S': -1, 'C': 8 }, { '@': -1, 'S': -1, 'C': -1 }, { '@': -1, 'S': -1, 'C': -1 }, { '@': -1, 'S': -1, 'C': 9 }, { '@': -1, 'S': -1, 'C': -1 }, { '@': -1, 'S': -1, 'C': -1 }, { '@': -1, 'S': -1, 'C': -1 }])
def test_grammar_creation(self): numeral_rules = [ Rule('$E', 'one'), Rule('$E', 'two'), Rule('$E', 'three'), Rule('$E', 'four'), ] operator_rules = [ Rule('$UnOp', 'minus'), Rule('$BinOp', 'plus'), Rule('$BinOp', 'minus'), Rule('$BinOp', 'times'), ] compositional_rules = [ Rule('$E', '$UnOp $E'), Rule('$EBO', '$E $BinOp'), Rule('$E', '$EBO $E') ] arithmetic_rules = numeral_rules + operator_rules + compositional_rules arithmetic_grammar = Grammar(arithmetic_rules) self.assertEqual(3, len(arithmetic_grammar.binary_rules)) self.assertEqual(7, len(arithmetic_grammar.lexical_rules))
def parseRule(string): left, right = string.split('->') left = left.strip() right = right.strip().split(" ") return Rule(left, right)
class TestMethods(unittest.TestCase): optional_words = [ 'the', '?', 'what', 'is', 'in', 'of', 'how', 'many', 'are', 'which', 'that', 'with', 'has', 'major', 'does', 'have', 'where', 'me', 'there', 'give', 'name', 'all', 'a', 'by', 'you', 'to', 'tell', 'other', 'it', 'do', 'whose', 'show', 'one', 'on', 'for', 'can', 'whats', 'urban', 'them', 'list', 'exist', 'each', 'could', 'about' ] rules_optionals = [ Rule('$ROOT', '?$Optionals $Query ?$Optionals', lambda sems: sems[1]), Rule('$Optionals', '$Optional ?$Optionals'), ] + [Rule('$Optional', word) for word in optional_words] rules_collection_entity = [ Rule('$Query', '$Collection', lambda sems: sems[0]), Rule('$Collection', '$Entity', lambda sems: sems[0]), ] reader = GeobaseReader() geobase = GraphKB(reader.tuples) annotators = [NumberAnnotator(), GeobaseAnnotator(geobase)] def test_simple_grammar(self): rules = self.rules_optionals + self.rules_collection_entity grammar = Unit2Grammar(rules=rules, annotators=self.annotators) parses = grammar.parse('what is utah') self.assertEqual('/state/utah', parses[0].semantics) self.assertEqual(('/state/utah', ), self.geobase.executor().execute(parses[0].semantics)) domain = GeoQueryDomain() def test_evaluate_simple_grammar(self): from experiment import sample_wins_and_losses from metrics import DenotationOracleAccuracyMetric from scoring import Model rules = self.rules_optionals + self.rules_collection_entity grammar = Unit2Grammar(rules=rules, annotators=self.annotators) model = Model(grammar=grammar, executor=self.geobase.executor().execute) metric = DenotationOracleAccuracyMetric() # If printing=True, prints a sampling of wins (correct semantics in # first parse) and losses on the dataset. metric_values = sample_wins_and_losses(domain=self.domain, model=model, metric=metric, seed=1, printing=False) self.assertEqual(17, metric_values['number of parses']) rules_types = [ Rule('$Collection', '$Type', lambda sems: sems[0]), Rule('$Type', 'state', 'state'), Rule('$Type', 'states', 'state'), Rule('$Type', 'city', 'city'), Rule('$Type', 'cities', 'city'), Rule('$Type', 'big cities', 'city'), Rule('$Type', 'towns', 'city'), Rule('$Type', 'river', 'river'), Rule('$Type', 'rivers', 'river'), Rule('$Type', 'mountain', 'mountain'), Rule('$Type', 'mountains', 'mountain'), Rule('$Type', 'mount', 'mountain'), Rule('$Type', 'peak', 'mountain'), Rule('$Type', 'road', 'road'), Rule('$Type', 'roads', 'road'), Rule('$Type', 'lake', 'lake'), Rule('$Type', 'lakes', 'lake'), Rule('$Type', 'country', 'country'), Rule('$Type', 'countries', 'country'), ] def test_grammar_with_types(self): rules = self.rules_optionals + self.rules_collection_entity + self.rules_types grammar = Unit2Grammar(rules=rules, annotators=self.annotators) parses = grammar.parse('name the lakes') self.assertEqual( ('/lake/becharof', '/lake/champlain', '/lake/erie', '/lake/flathead', '/lake/great_salt_lake', '/lake/huron', '/lake/iliamna', '/lake/lake_of_the_woods', '/lake/michigan', '/lake/mille_lacs', '/lake/naknek', '/lake/okeechobee', '/lake/ontario', '/lake/pontchartrain', '/lake/rainy', '/lake/red', '/lake/salton_sea', '/lake/st._clair', '/lake/superior', '/lake/tahoe', '/lake/teshekpuk', '/lake/winnebago'), self.geobase.executor().execute(parses[0].semantics)) def test_evaluate_grammar_with_types(self): from experiment import sample_wins_and_losses from geoquery import GeoQueryDomain from metrics import DenotationOracleAccuracyMetric from scoring import Model rules = self.rules_optionals + self.rules_collection_entity + self.rules_types grammar = Unit2Grammar(rules=rules, annotators=self.annotators) model = Model(grammar=grammar, executor=self.geobase.executor().execute) metric = DenotationOracleAccuracyMetric() # If printing=True, prints a sampling of wins (correct semantics in # first parse) and losses on the dataset. metric_values = sample_wins_and_losses(domain=self.domain, model=model, metric=metric, seed=1, printing=False) self.assertEqual(20, metric_values['number of parses']) rules_relations = [ Rule('$Collection', '$Relation ?$Optionals $Collection', lambda sems: sems[0](sems[2])), Rule('$Relation', '$FwdRelation', lambda sems: (lambda arg: (sems[0], arg))), Rule('$Relation', '$RevRelation', lambda sems: (lambda arg: (arg, sems[0]))), Rule('$FwdRelation', '$FwdBordersRelation', 'borders'), Rule('$FwdBordersRelation', 'border'), Rule('$FwdBordersRelation', 'bordering'), Rule('$FwdBordersRelation', 'borders'), Rule('$FwdBordersRelation', 'neighbor'), Rule('$FwdBordersRelation', 'neighboring'), Rule('$FwdBordersRelation', 'surrounding'), Rule('$FwdBordersRelation', 'next to'), Rule('$FwdRelation', '$FwdTraversesRelation', 'traverses'), Rule('$FwdTraversesRelation', 'cross ?over'), Rule('$FwdTraversesRelation', 'flow through'), Rule('$FwdTraversesRelation', 'flowing through'), Rule('$FwdTraversesRelation', 'flows through'), Rule('$FwdTraversesRelation', 'go through'), Rule('$FwdTraversesRelation', 'goes through'), Rule('$FwdTraversesRelation', 'in'), Rule('$FwdTraversesRelation', 'pass through'), Rule('$FwdTraversesRelation', 'passes through'), Rule('$FwdTraversesRelation', 'run through'), Rule('$FwdTraversesRelation', 'running through'), Rule('$FwdTraversesRelation', 'runs through'), Rule('$FwdTraversesRelation', 'traverse'), Rule('$FwdTraversesRelation', 'traverses'), Rule('$RevRelation', '$RevTraversesRelation', 'traverses'), Rule('$RevTraversesRelation', 'has'), Rule('$RevTraversesRelation', 'have'), # 'how many states have major rivers' Rule('$RevTraversesRelation', 'lie on'), Rule('$RevTraversesRelation', 'next to'), Rule('$RevTraversesRelation', 'traversed by'), Rule('$RevTraversesRelation', 'washed by'), Rule('$FwdRelation', '$FwdContainsRelation', 'contains'), # 'how many states have a city named springfield' Rule('$FwdContainsRelation', 'has'), Rule('$FwdContainsRelation', 'have'), Rule('$RevRelation', '$RevContainsRelation', 'contains'), Rule('$RevContainsRelation', 'contained by'), Rule('$RevContainsRelation', 'in'), Rule('$RevContainsRelation', 'found in'), Rule('$RevContainsRelation', 'located in'), Rule('$RevContainsRelation', 'of'), Rule('$RevRelation', '$RevCapitalRelation', 'capital'), Rule('$RevCapitalRelation', 'capital'), Rule('$RevCapitalRelation', 'capitals'), Rule('$RevRelation', '$RevHighestPointRelation', 'highest_point'), Rule('$RevHighestPointRelation', 'high point'), Rule('$RevHighestPointRelation', 'high points'), Rule('$RevHighestPointRelation', 'highest point'), Rule('$RevHighestPointRelation', 'highest points'), Rule('$RevRelation', '$RevLowestPointRelation', 'lowest_point'), Rule('$RevLowestPointRelation', 'low point'), Rule('$RevLowestPointRelation', 'low points'), Rule('$RevLowestPointRelation', 'lowest point'), Rule('$RevLowestPointRelation', 'lowest points'), Rule('$RevLowestPointRelation', 'lowest spot'), Rule('$RevRelation', '$RevHighestElevationRelation', 'highest_elevation'), Rule('$RevHighestElevationRelation', '?highest elevation'), Rule('$RevRelation', '$RevHeightRelation', 'height'), Rule('$RevHeightRelation', 'elevation'), Rule('$RevHeightRelation', 'height'), Rule('$RevHeightRelation', 'high'), Rule('$RevHeightRelation', 'tall'), Rule('$RevRelation', '$RevAreaRelation', 'area'), Rule('$RevAreaRelation', 'area'), Rule('$RevAreaRelation', 'big'), Rule('$RevAreaRelation', 'large'), Rule('$RevAreaRelation', 'size'), Rule('$RevRelation', '$RevPopulationRelation', 'population'), Rule('$RevPopulationRelation', 'big'), Rule('$RevPopulationRelation', 'large'), Rule('$RevPopulationRelation', 'populated'), Rule('$RevPopulationRelation', 'population'), Rule('$RevPopulationRelation', 'populations'), Rule('$RevPopulationRelation', 'populous'), Rule('$RevPopulationRelation', 'size'), Rule('$RevRelation', '$RevLengthRelation', 'length'), Rule('$RevLengthRelation', 'length'), Rule('$RevLengthRelation', 'long'), ] def test_grammar_with_relations(self): rules = (self.rules_optionals + self.rules_collection_entity + self.rules_types + self.rules_relations) grammar = Unit2Grammar(rules=rules, annotators=self.annotators) parses = grammar.parse('what is the capital of vermont ?') self.assertEqual(('/state/vermont', 'capital'), parses[0].semantics) self.assertEqual(('/city/montpelier_vt', ), self.geobase.executor().execute(parses[0].semantics)) def test_evaluate_grammar_with_relations(self): from experiment import sample_wins_and_losses from geoquery import GeoQueryDomain from metrics import DenotationOracleAccuracyMetric from scoring import Model rules = (self.rules_optionals + self.rules_collection_entity + self.rules_types + self.rules_relations) grammar = Unit2Grammar(rules=rules, annotators=self.annotators) model = Model(grammar=grammar, executor=self.geobase.executor().execute) metric = DenotationOracleAccuracyMetric() # If printing=True, prints a sampling of wins (correct semantics in # first parse) and losses on the dataset. metric_values = sample_wins_and_losses(domain=self.domain, model=model, metric=metric, seed=1, printing=False) self.assertEqual(256, metric_values['number of parses']) rules_intersection = [ Rule('$Collection', '$Collection $Collection', lambda sems: ('.and', sems[0], sems[1])), Rule('$Collection', '$Collection $Optional $Collection', lambda sems: ('.and', sems[0], sems[2])), Rule('$Collection', '$Collection $Optional $Optional $Collection', lambda sems: ('.and', sems[0], sems[3])), ] def test_grammar_with_intersections(self): rules = (self.rules_optionals + self.rules_collection_entity + self.rules_types + self.rules_relations + self.rules_intersection) grammar = Unit2Grammar(rules=rules, annotators=self.annotators) parses = grammar.parse('states bordering california') self.assertEqual(('.and', 'state', ('borders', '/state/california')), parses[0].semantics) self.assertEqual(('/state/arizona', '/state/nevada', '/state/oregon'), self.geobase.executor().execute(parses[0].semantics)) def test_evaluate_grammar_with_intersections(self): from experiment import sample_wins_and_losses from geoquery import GeoQueryDomain from metrics import DenotationOracleAccuracyMetric from scoring import Model rules = (self.rules_optionals + self.rules_collection_entity + self.rules_types + self.rules_relations + self.rules_intersection) grammar = Unit2Grammar(rules=rules, annotators=self.annotators) model = Model(grammar=grammar, executor=self.geobase.executor().execute) metric = DenotationOracleAccuracyMetric() # If printing=True, prints a sampling of wins (correct semantics in # first parse) and losses on the dataset. metric_values = sample_wins_and_losses(domain=self.domain, model=model, metric=metric, seed=1, printing=False) self.assertEqual(1177, metric_values['number of parses']) rules_superlatives = [ Rule('$Collection', '$Superlative ?$Optionals $Collection', lambda sems: sems[0] + (sems[2], )), Rule('$Collection', '$Collection ?$Optionals $Superlative', lambda sems: sems[2] + (sems[0], )), Rule('$Superlative', 'largest', ('.argmax', 'area')), Rule('$Superlative', 'largest', ('.argmax', 'population')), Rule('$Superlative', 'biggest', ('.argmax', 'area')), Rule('$Superlative', 'biggest', ('.argmax', 'population')), Rule('$Superlative', 'smallest', ('.argmin', 'area')), Rule('$Superlative', 'smallest', ('.argmin', 'population')), Rule('$Superlative', 'longest', ('.argmax', 'length')), Rule('$Superlative', 'shortest', ('.argmin', 'length')), Rule('$Superlative', 'tallest', ('.argmax', 'height')), Rule('$Superlative', 'highest', ('.argmax', 'height')), Rule('$Superlative', '$MostLeast $RevRelation', lambda sems: (sems[0], sems[1])), Rule('$MostLeast', 'most', '.argmax'), Rule('$MostLeast', 'least', '.argmin'), Rule('$MostLeast', 'lowest', '.argmin'), Rule('$MostLeast', 'greatest', '.argmax'), Rule('$MostLeast', 'highest', '.argmax'), ] def test_grammar_with_superlatives(self): rules = (self.rules_optionals + self.rules_collection_entity + self.rules_types + self.rules_relations + self.rules_intersection + self.rules_superlatives) grammar = Unit2Grammar(rules=rules, annotators=self.annotators) parses = grammar.parse('tallest mountain') self.assertEqual(('.argmax', 'height', 'mountain'), parses[0].semantics) self.assertEqual(('/mountain/mckinley', ), self.geobase.executor().execute(parses[0].semantics)) def test_evaluate_grammar_with_superlatives(self): from experiment import sample_wins_and_losses from geoquery import GeoQueryDomain from metrics import DenotationOracleAccuracyMetric from scoring import Model rules = (self.rules_optionals + self.rules_collection_entity + self.rules_types + self.rules_relations + self.rules_intersection + self.rules_superlatives) grammar = Unit2Grammar(rules=rules, annotators=self.annotators) model = Model(grammar=grammar, executor=self.geobase.executor().execute) metric = DenotationOracleAccuracyMetric() # If printing=True, prints a sampling of wins (correct semantics in # first parse) and losses on the dataset. metric_values = sample_wins_and_losses(domain=self.domain, model=model, metric=metric, seed=1, printing=False) self.assertEqual(2658, metric_values['number of parses']) rules_reverse_joins = [ Rule('$Collection', '$Collection ?$Optionals $Relation', lambda sems: Unit3Grammar.reverse(sems[2])(sems[0])), ] def test_grammar_with_reverse_joins(self): rules = (self.rules_optionals + self.rules_collection_entity + self.rules_types + self.rules_relations + self.rules_intersection + self.rules_superlatives + self.rules_reverse_joins) grammar = Unit3Grammar(rules=rules, annotators=self.annotators) parses = grammar.parse('which states does the rio grande cross') self.assertEqual(('.and', 'state', ('/river/rio_grande', 'traverses')), parses[0].semantics) self.assertEqual( ('/state/colorado', '/state/new_mexico', '/state/texas'), self.geobase.executor().execute(parses[0].semantics)) def test_evaluate_grammar_with_reverse_joins(self): from experiment import sample_wins_and_losses from geoquery import GeoQueryDomain from metrics import DenotationOracleAccuracyMetric from scoring import Model rules = (self.rules_optionals + self.rules_collection_entity + self.rules_types + self.rules_relations + self.rules_intersection + self.rules_superlatives + self.rules_reverse_joins) grammar = Unit3Grammar(rules=rules, annotators=self.annotators) model = Model(grammar=grammar, executor=self.geobase.executor().execute) metric = DenotationOracleAccuracyMetric() # If printing=True, prints a sampling of wins (correct semantics in # first parse) and losses on the dataset. metric_values = sample_wins_and_losses(domain=self.domain, model=model, metric=metric, seed=1, printing=False) self.assertEqual(11562, metric_values['number of parses']) self.assertEqual(152, metric_values['denotation accuracy']) def test_evaluate_model(self): from experiment import evaluate_model from metrics import denotation_match_metrics from scoring import Model from geo880 import geo880_train_examples rules = (self.rules_optionals + self.rules_collection_entity + self.rules_types + self.rules_relations + self.rules_intersection + self.rules_superlatives + self.rules_reverse_joins) grammar = Unit3Grammar(rules=rules, annotators=self.annotators) model = Model(grammar=grammar, executor=self.geobase.executor().execute) # Set print_examples=True and look for 'what state has the shortest # river?' and evaluate_model(model=model, examples=geo880_train_examples[:10], metrics=denotation_match_metrics(), print_examples=False) # SLIDES def test_feature_function(self): from experiment import evaluate_model from metrics import denotation_match_metrics from scoring import Model from geo880 import geo880_train_examples rules = (self.rules_optionals + self.rules_collection_entity + self.rules_types + self.rules_relations + self.rules_intersection + self.rules_superlatives + self.rules_reverse_joins) grammar = Unit3Grammar(rules=rules, annotators=self.annotators) def empty_denotation_feature(parse): features = defaultdict(float) if parse.denotation == (): features['empty_denotation'] += 1.0 return features weights = {'empty_denotation': -1.0} model = Model(grammar=grammar, feature_fn=empty_denotation_feature, weights=weights, executor=self.geobase.executor().execute) metric_values = evaluate_model(model=model, examples=geo880_train_examples, metrics=denotation_match_metrics(), print_examples=False) self.assertEqual(235, metric_values['denotation accuracy'])
'S': -1 }, { '@': -1, 'S': 9 }, { '@': -1, 'S': -1 }]) @pytest.mark.parametrize('grammar, tests', [(Grammar(terminals='abcd', non_terminals='ABCDEFGHIJK', start='A', rules=[ Rule('A', 'BC'), Rule('B', 'DEFG'), Rule('C', 'HIJK'), Rule('D', 'E'), Rule('E', 'a'), Rule('E', ''), Rule('F', 'G'), Rule('G', 'b'), Rule('H', 'I'), Rule('I', 'c'), Rule('I', ''), Rule('J', 'K'), Rule('K', 'd') ]), [('A', {'a', 'b'}), ('B', {'a', 'b'}), ('', set()), ('C', {'c', 'd'}), ('J', {'d'}), ('Dcb', {'a', 'c'})]),
print(sentence) print('parsing') predictions = predict(sentence, W, U) lastPrediction = [] for h in predictions: prediction = predictRules(h, pos, rules) newPreds = listDifference(lastPrediction, prediction[len(pos):]) print(prediction[:len(pos)] + newPreds, len(prediction)) lastPrediction = prediction[len(pos):] # print (prediction[:], len(prediction)) # analyze rules not found rules_original = list(set(Rule.fromTree(r) for r in t.allRules())) rules_original = [r for r in rules_original if not r.terminalRule] found = [] not_found = [] for r in rules_original: if r.terminalRule: continue if r in prediction: found.append(r) else: not_found.append(r) for nf in not_found: print('not found', nf, nf in rules)
def grammar_ab_control_table( ) -> tp.Tuple[tp.List[tp.Dict[str, Cell]], tp.List[tp.Dict[str, int]]]: return ([{ 'a': Cell(Cell.SHIFT, 2), 'b': Cell(Cell.ERROR), '$': Cell(Cell.REDUCE, Rule('S', '')) }, { 'a': Cell(Cell.ERROR), 'b': Cell(Cell.ERROR), '$': Cell(Cell.ACCEPT) }, { 'a': Cell(Cell.SHIFT, 4), 'b': Cell(Cell.REDUCE, Rule('S', '')), '$': Cell(Cell.ERROR) }, { 'a': Cell(Cell.ERROR), 'b': Cell(Cell.SHIFT, 5), '$': Cell(Cell.ERROR) }, { 'a': Cell(Cell.SHIFT, 4), 'b': Cell(Cell.REDUCE, Rule('S', '')), '$': Cell(Cell.ERROR) }, { 'a': Cell(Cell.SHIFT, 2), 'b': Cell(Cell.ERROR), '$': Cell(Cell.REDUCE, Rule('S', '')) }, { 'a': Cell(Cell.ERROR), 'b': Cell(Cell.SHIFT, 8), '$': Cell(Cell.ERROR) }, { 'a': Cell(Cell.ERROR), 'b': Cell(Cell.ERROR), '$': Cell(Cell.REDUCE, Rule('S', 'aSbS')) }, { 'a': Cell(Cell.SHIFT, 4), 'b': Cell(Cell.REDUCE, Rule('S', '')), '$': Cell(Cell.ERROR) }, { 'a': Cell(Cell.ERROR), 'b': Cell(Cell.REDUCE, Rule('S', 'aSbS')), '$': Cell(Cell.ERROR) }], [{ '@': -1, 'S': 1 }, { '@': -1, 'S': -1 }, { '@': -1, 'S': 3 }, { '@': -1, 'S': -1 }, { '@': -1, 'S': 6 }, { '@': -1, 'S': 7 }, { '@': -1, 'S': -1 }, { '@': -1, 'S': -1 }, { '@': -1, 'S': 9 }, { '@': -1, 'S': -1 }])
def river_rule(name): orig_name = name if name.endswith(' river'): name = name[:-6] if ' ' in name: name = "' %s '" % name return Rule( '$River', [], orig_name, lambda v: lambda var: ('_const ( %s , _riverid ( ' % var) + name + ' ) )') RULES = [ # Root rules Rule( '$ROOT', ['$Answer'], 'what %s ?', lambda v, a: lambda: '_answer ( %(v1)s , ( %(c1)s ) )' % { 'v1': v.get(1), 'c1': a.sem_fn(v.get(1)) }), #Rule('$ROOT', ['$Answer'], 'what is the %s ?', lambda v, a: lambda : '_answer ( %(v1)s , ( %(c1)s ) )' % {'v1': v.get(1), 'c1': a.sem_fn(v.get(1))}), #Rule('$ROOT', ['$Answer'], 'what are the %s ?', lambda v, a: lambda : '_answer ( %(v1)s , ( %(c1)s ) )' % {'v1': v.get(1), 'c1': a.sem_fn(v.get(1))}), # Things that could be answers Rule('$Answer', ['$State'], '%s', lambda v, a: lambda var: a.sem_fn(var)), Rule('$Answer', ['$Landmark'], '%s', lambda v, a: lambda var: a.sem_fn(var)), Rule( '$Answer', ['$State'], 'population of %s', lambda v, a: lambda var: '_population ( %(v1)s , %(var)s ) , %(c1)s' % { 'var': var, 'v1': v.get(1), 'c1': a.sem_fn(v.get(1))
def grammar_ab_items() -> tp.List[tp.Set[State]]: return [{ State(Rule('@', 'S'), 0, '$'), State(Rule('S', 'aSbS'), 0, '$'), State(Rule('S', ''), 0, '$') }, {State(Rule('@', 'S'), 1, '$')}, { State(Rule('S', 'aSbS'), 1, '$'), State(Rule('S', 'aSbS'), 0, 'b'), State(Rule('S', ''), 0, 'b') }, {State(Rule('S', 'aSbS'), 2, '$')}, { State(Rule('S', 'aSbS'), 1, 'b'), State(Rule('S', 'aSbS'), 0, 'b'), State(Rule('S', ''), 0, 'b') }, { State(Rule('S', 'aSbS'), 3, '$'), State(Rule('S', 'aSbS'), 0, '$'), State(Rule('S', ''), 0, '$') }, {State(Rule('S', 'aSbS'), 2, 'b')}, {State(Rule('S', 'aSbS'), 4, '$')}, { State(Rule('S', 'aSbS'), 3, 'b'), State(Rule('S', 'aSbS'), 0, 'b'), State(Rule('S', ''), 0, 'b') }, {State(Rule('S', 'aSbS'), 4, 'b')}]
def test_word_parts(self): r = Rule("~eu >> ~eux", "Pl") self.assertEqual(r.match('lieu'), (True, 'li')) self.assertEqual(r.match('heure'), (False, '')) self.assertEqual(r.transform('heure'), [])
def grammar_ab() -> Grammar: return Grammar(terminals='ab', non_terminals='S', start='S', rules=[Rule('S', 'aSbS'), Rule('S', '')])