def test_choose_next(): agenda = Agenda(simple_tokens, simple_lexicon) assert agenda.choose_next() == Arc(Terminal('PN', 'I'), 0, 1, 1) assert agenda.choose_next() == Arc(Terminal('V', 'SLEEP'), 1, 2, 1) agenda.agenda.append(Arc(NonTerminal('NP', 'N'), 0, 1, 0)) # no completed arcs to select with pytest.raises(ValueError): assert agenda.choose_next()
def test_agenda_constr(): agenda = Agenda(simple_tokens, simple_lexicon) my_agenda = [ Arc(Terminal('PN', 'I'), 0, 1, 1), Arc(Terminal('V', 'SLEEP'), 1, 2, 1)] for arc in my_agenda: assert arc in agenda for arc in agenda: assert arc in my_agenda
def test_arc_is_complete(): arc1 = Arc(NonTerminal('DT', 'the'), 2, 3, 1, [10]) arc2 = Arc(NonTerminal('NP', 'DT', 'N'), 2, 3, 1, [12]) arc3 = Arc(NonTerminal('NP', 'DT', 'N'), 2, 3, 2, [14]) assert arc1.is_complete() assert not arc2.is_complete() assert arc3.is_complete()
def test_arc_eq(): rule = NonTerminal('NP', 'N') start = 1 end = 2 dot = 1 assert ( Arc(rule, start, end, dot) == Arc(rule, start, end, dot)) assert( Arc(rule, start, end, dot) != Arc(rule, start + 1, end, dot))
def test_predict(): agenda = Agenda(simple_tokens, simple_lexicon) current = agenda.choose_next() assert current == Arc(Terminal('PN', 'I'), 0, 1, 1) agenda.predict(simple_grammar, current) my_agenda = [ Arc(Terminal('V', 'SLEEP'), 1, 2, 1), Arc(NonTerminal('NP', 'PN'), 0, 0, 0)] for arc in my_agenda: assert arc in agenda for arc in agenda: assert arc in my_agenda
def __init__(self, tokens, lexicon): """ Initializes Agenda object with all the terminal arcs associated with the given tokens and according to the rules defined in the given lexicon. Args: tokens (list of str) : list of words in sentence lexicon (Lexicon) : lexicon for this language """ self.agenda = [] for index, token in enumerate(tokens): for terminal in lexicon[token]: arc = Arc( terminal, start=index, end=index + 1, dot=1, history=None) self.agenda.append(arc)
def predict(self, grammar, current): """ Adds all nonterminal rules that could be extended by the current arc to the agenda according to the given grammar. Args: grammar (Grammar) : grammar for this language current (Arc) : current arc to look up in grammar """ if not current.is_complete(): return try: predicted = grammar[current.rule.parent] except KeyError: # no rules for this key return else: # create an arc for each rule for current key for i, rule in enumerate(predicted): arc = Arc(rule, current.start, current.start, 0) self.agenda.append(arc)
def test_chart(): chart = Chart(['I', 'SLEEP']) chart.add(Arc(NonTerminal('PN', 'I'), 0, 1, 1)) assert not chart.is_sentence chart.add(Arc(NonTerminal('V', 'SLEEP'), 1, 2, 1)) chart.add(Arc(NonTerminal('NP', 'PN'), 0, 1, 1, [1])) chart.add(Arc(NonTerminal('VP', 'V'), 1, 2, 1, [2])) chart.add(Arc(NonTerminal('S', 'VP'), 1, 2, 1, [2])) assert not chart.is_sentence chart.add(Arc(NonTerminal('S', 'NP', 'VP'), 0, 2, 2, [3, 4])) assert chart.is_sentence answer = [ '0 1 2', ' ---- PN --> I', ' ---- V --> SLEEP', ' ---- NP --> PN', ' ---- VP --> V', ' ---- S --> VP', ' --------- S --> NP VP'] assert str(chart) == '\n'.join(answer)
def test_arc_extend(): key = Arc(NonTerminal('N', 'cat'), start=0, end=1, dot=1) arc = Arc(NonTerminal('NP', 'N'), start=0, end=0, dot=0) ext = arc.get_extended(key) assert ext.rule == NonTerminal('NP', 'N') assert ext.start == 0 assert ext.end == 1 assert ext.dot == 1 assert ext.history == [key] # key parent does not match current node in arc children arc = Arc(NonTerminal('VP', 'V'), start=0, end=0, dot=0) with pytest.raises(ValueError): arc.get_extended(key) arc = Arc(NonTerminal('NP', 'N'), start=1, end=2, dot=0) with pytest.raises(ValueError): arc.get_extended(key) arc = Arc(NonTerminal('NP', 'DT', 'N'), start=0, end=2, dot=1) with pytest.raises(ValueError): arc.get_extended(key)
def test_arc_string(): arc1 = Arc(NonTerminal('NP', 'N'), 0, 0, 0, [None]) assert str(arc1) == '<0> NP --> *0 N [None] <0> {}'.format(id(arc1)) arc2 = Arc(NonTerminal('VP', 'AUX', 'V'), 2, 4, 2, [arc1, None]) assert str(arc2) == \ '<2> VP --> AUX V *2 [{}, None] <4> {}'.format(id(arc1), id(arc2))
simple_grammar.load(StringIO(""" S --> NP VP NP --> PN VP --> V """)) simple_lexicon = Lexicon() simple_lexicon.load(StringIO(""" I : PN sleep : V """)) simple_parser = Parser(simple_grammar, simple_lexicon) simple_sentence = ' i sleep ' simple_tokens = ['I', 'SLEEP'] simple_parse = '[.S [.NP [.PN I]][.VP [.V SLEEP]]]' simple_chart = [ Arc(NonTerminal('S', 'NP', 'VP'), 0, 2, 2), Arc(NonTerminal('NP', 'PN'), 0, 1, 1), Arc(NonTerminal('VP', 'V'), 1, 2, 1), Arc(Terminal('PN', 'I'), 0, 1, 1), Arc(Terminal('V', 'SLEEP'), 1, 2, 1)] complex_grammar = Grammar() complex_grammar.load(StringIO(""" S --> NP VP NP --> DT N NP --> DT ADJ N NP --> PN VP --> V VP --> VP NP VP --> AUX VP """))