def test_choose_next():
    agenda = Agenda(simple_tokens, simple_lexicon)
    assert agenda.choose_next() == Arc(Terminal('PN', 'I'), 0, 1, 1)
    assert agenda.choose_next() == Arc(Terminal('V', 'SLEEP'), 1, 2, 1)
    agenda.agenda.append(Arc(NonTerminal('NP', 'N'), 0, 1, 0))
    # no completed arcs to select
    with pytest.raises(ValueError):
        assert agenda.choose_next()
def test_agenda_constr():
    agenda = Agenda(simple_tokens, simple_lexicon)
    my_agenda = [
        Arc(Terminal('PN', 'I'), 0, 1, 1),
        Arc(Terminal('V', 'SLEEP'), 1, 2, 1)]
    for arc in my_agenda:
        assert arc in agenda
    for arc in agenda:
        assert arc in my_agenda
def test_arc_is_complete():
    arc1 = Arc(NonTerminal('DT', 'the'), 2, 3, 1, [10])
    arc2 = Arc(NonTerminal('NP', 'DT', 'N'), 2, 3, 1, [12])
    arc3 = Arc(NonTerminal('NP', 'DT', 'N'), 2, 3, 2, [14])
    assert arc1.is_complete()
    assert not arc2.is_complete()
    assert arc3.is_complete()
def test_arc_eq():
    rule = NonTerminal('NP', 'N')
    start = 1
    end = 2
    dot = 1
    assert (
        Arc(rule, start, end, dot) ==
        Arc(rule, start, end, dot))
    assert(
        Arc(rule, start, end, dot) !=
        Arc(rule, start + 1, end, dot))
def test_predict():
    agenda = Agenda(simple_tokens, simple_lexicon)
    current = agenda.choose_next()
    assert current == Arc(Terminal('PN', 'I'), 0, 1, 1)
    agenda.predict(simple_grammar, current)
    my_agenda = [
        Arc(Terminal('V', 'SLEEP'), 1, 2, 1),
        Arc(NonTerminal('NP', 'PN'), 0, 0, 0)]
    for arc in my_agenda:
        assert arc in agenda
    for arc in agenda:
        assert arc in my_agenda
Exemple #6
0
 def __init__(self, tokens, lexicon):
     """ Initializes Agenda object with all the terminal
     arcs associated with the given tokens and according
     to the rules defined in the given lexicon.
     Args:
         tokens (list of str) : list of words in sentence
         lexicon (Lexicon) : lexicon for this language
     """
     self.agenda = []
     for index, token in enumerate(tokens):
         for terminal in lexicon[token]:
             arc = Arc(
                 terminal, start=index,
                 end=index + 1, dot=1, history=None)
             self.agenda.append(arc)
Exemple #7
0
 def predict(self, grammar, current):
     """ Adds all nonterminal rules that could be
     extended by the current arc to the agenda according
     to the given grammar.
     Args:
         grammar (Grammar) : grammar for this language
         current (Arc) : current arc to look up in grammar
     """
     if not current.is_complete():
         return
     try:
         predicted = grammar[current.rule.parent]
     except KeyError:  # no rules for this key
         return
     else:
         # create an arc for each rule for current key
         for i, rule in enumerate(predicted):
             arc = Arc(rule, current.start, current.start, 0)
             self.agenda.append(arc)
def test_chart():
    chart = Chart(['I', 'SLEEP'])
    chart.add(Arc(NonTerminal('PN', 'I'), 0, 1, 1))
    assert not chart.is_sentence
    chart.add(Arc(NonTerminal('V', 'SLEEP'), 1, 2, 1))
    chart.add(Arc(NonTerminal('NP', 'PN'), 0, 1, 1, [1]))
    chart.add(Arc(NonTerminal('VP', 'V'), 1, 2, 1, [2]))
    chart.add(Arc(NonTerminal('S', 'VP'), 1, 2, 1, [2]))
    assert not chart.is_sentence
    chart.add(Arc(NonTerminal('S', 'NP', 'VP'), 0, 2, 2, [3, 4]))
    assert chart.is_sentence
    answer = [
        '0    1    2',
        ' ----       PN --> I',
        '      ----  V --> SLEEP',
        ' ----       NP --> PN',
        '      ----  VP --> V',
        '      ----  S --> VP',
        ' ---------  S --> NP VP']
    assert str(chart) == '\n'.join(answer)
def test_arc_extend():
    key = Arc(NonTerminal('N', 'cat'), start=0, end=1, dot=1)
    arc = Arc(NonTerminal('NP', 'N'), start=0, end=0, dot=0)
    ext = arc.get_extended(key)
    assert ext.rule == NonTerminal('NP', 'N')
    assert ext.start == 0
    assert ext.end == 1
    assert ext.dot == 1
    assert ext.history == [key]

    # key parent does not match current node in arc children
    arc = Arc(NonTerminal('VP', 'V'), start=0, end=0, dot=0)
    with pytest.raises(ValueError):
        arc.get_extended(key)
    arc = Arc(NonTerminal('NP', 'N'), start=1, end=2, dot=0)
    with pytest.raises(ValueError):
        arc.get_extended(key)
    arc = Arc(NonTerminal('NP', 'DT', 'N'), start=0, end=2, dot=1)
    with pytest.raises(ValueError):
        arc.get_extended(key)
def test_arc_string():
    arc1 = Arc(NonTerminal('NP', 'N'), 0, 0, 0, [None])
    assert str(arc1) == '<0> NP --> *0 N [None] <0> {}'.format(id(arc1))
    arc2 = Arc(NonTerminal('VP', 'AUX', 'V'), 2, 4, 2, [arc1, None])
    assert str(arc2) == \
        '<2> VP --> AUX V *2 [{}, None] <4> {}'.format(id(arc1), id(arc2))
simple_grammar.load(StringIO("""
    S --> NP VP
    NP --> PN
    VP --> V
    """))
simple_lexicon = Lexicon()
simple_lexicon.load(StringIO("""
    I : PN
    sleep : V
    """))
simple_parser = Parser(simple_grammar, simple_lexicon)
simple_sentence = ' i   sleep '
simple_tokens = ['I', 'SLEEP']
simple_parse = '[.S [.NP [.PN I]][.VP [.V SLEEP]]]'
simple_chart = [
    Arc(NonTerminal('S', 'NP', 'VP'), 0, 2, 2),
    Arc(NonTerminal('NP', 'PN'), 0, 1, 1),
    Arc(NonTerminal('VP', 'V'), 1, 2, 1),
    Arc(Terminal('PN', 'I'), 0, 1, 1),
    Arc(Terminal('V', 'SLEEP'), 1, 2, 1)]

complex_grammar = Grammar()
complex_grammar.load(StringIO("""
    S --> NP VP
    NP --> DT N
    NP --> DT ADJ N
    NP --> PN
    VP --> V
    VP --> VP NP
    VP --> AUX VP
    """))