Ejemplo n.º 1
0
    def elim_direct(self, elim, elim_prods):
        recur_prods = list()
        nonrecur_prods = list()
        for prod in elim_prods:
            if prod.syms[0] == elim:
                recur_prods.append(prod)
            else:
                nonrecur_prods.append(prod)
        if not recur_prods:
            return nonrecur_prods

        new_nterm = self.grammar.get_alt_nonterminal(elim)

        for recur_prod in recur_prods:
            syms = list(recur_prod.syms[1:])
            syms.append(new_nterm)
            self.grammar.add_production(new_nterm, syms)
        self.grammar.add_production(new_nterm, '@')

        new_prods = list()
        for prod in nonrecur_prods:
            syms = list(prod.syms)
            syms.append(new_nterm)
            new_prods.append(Production(elim, syms))
        return new_prods
Ejemplo n.º 2
0
def test1():
    # Test grammar 1

    print("------ Test 1 ---------")

    # Abbreviations
    NT = Nonterminal
    TERM = Terminal

    # Hard-coded test case - grammar not read from file

    E = NT('E')
    T = NT('T')
    P = NT('P')
    plus = TERM('+')
    mult = TERM('*')
    ident = TERM('ident')

    g = {
        E: [Production(rhs=[E, plus, T]),
            Production(rhs=[T])],
        T: [Production(rhs=[T, mult, P]),
            Production(rhs=[P])],
        P: [Production(rhs=[ident])],
    }

    p = Parser(g, E)
    s = [
        Token('ident', 'a'),
        Token('*', '*'),
        Token('ident', 'b'),
        Token('+', '+'),
        Token('ident', 'c'),
        Token('*', '*'),
        Token('ident', 'd'),
        Token('+', '+'),
        Token('ident', 'e'),
        Token('+', '+'),
        Token('ident', 'f')
    ]

    forest = p.go(s)

    print("Parse combinations: {0}".format(Parser.num_combinations(forest)))

    ParseForestPrinter.print_forest(forest)
 def test_grammar_conversion_ndfa_fa_aaab(self):
     # S -> aS | b
     self.grammar.add_production(Production('S', 'aS'))
     self.grammar.add_production(Production('S', 'b'))
     fa = self.grammar.to_finite_automaton()
     # Should accept
     self.assertEqual(True, fa.recognize_sentence('b'))
     self.assertEqual(True, fa.recognize_sentence('ab'))
     self.assertEqual(True, fa.recognize_sentence('aab'))
     self.assertEqual(True, fa.recognize_sentence('aaaaaaaaaaaaab'))
     # Shouldn't accept
     self.assertEqual(False, fa.recognize_sentence(''))
     self.assertEqual(False, fa.recognize_sentence('a'))
     self.assertEqual(False, fa.recognize_sentence('aa'))
     self.assertEqual(False, fa.recognize_sentence('aaaaaaaaaaaaa'))
     self.assertEqual(False, fa.recognize_sentence('ba'))
     self.assertEqual(False, fa.recognize_sentence('abb'))
     self.assertEqual(False, fa.recognize_sentence('abaaaaaaab'))
Ejemplo n.º 4
0
    def dummy_state(self):
        """
        Returns a dummy state to start the algorithm.
        """
        gam = Production("⟐", ("NP"))  # This represents GAMMA -> NP
        dot = 0  # This represents the dot before NP
        pos = [0, 0]  # This represents the position [0,0]

        return DottedRule(gam, dot, pos)
Ejemplo n.º 5
0
    def predictor(self, state):
        """
        Implements the Earley Predictor
        """
        idx = state.position[1]
        for rule in self.grammar[state.nextcat()]:
            newtree = Production(state.nextcat(), rule.rhs)
            newstate = DottedRule(newtree, 0, [idx, idx])

            if newstate not in self.chart[idx]:
                self.chart[idx].append(newstate)
 def test_grammar_conversion_ndfa_fa_ccababba(self):
     # S -> cS | cA
     # A -> aA | bA | a | b
     self.grammar.add_production(Production('S', 'cS'))
     self.grammar.add_production(Production('S', 'cA'))
     self.grammar.add_production(Production('A', 'aA'))
     self.grammar.add_production(Production('A', 'bA'))
     self.grammar.add_production(Production('A', 'a'))
     self.grammar.add_production(Production('A', 'b'))
     fa = self.grammar.to_finite_automaton()
     # Should accept
     self.assertEqual(True, fa.recognize_sentence('ca'))
     self.assertEqual(True, fa.recognize_sentence('cb'))
     self.assertEqual(True, fa.recognize_sentence('ccccca'))
     self.assertEqual(True, fa.recognize_sentence('cccccb'))
     self.assertEqual(True, fa.recognize_sentence('cab'))
     self.assertEqual(True, fa.recognize_sentence('cba'))
     self.assertEqual(True, fa.recognize_sentence('cbababba'))
     self.assertEqual(True,
                      fa.recognize_sentence('ccccababaaaabbbbbbabaabaabbb'))
     # Shouldn't accept
     self.assertEqual(False, fa.recognize_sentence(''))
     self.assertEqual(False, fa.recognize_sentence('c'))
     self.assertEqual(False, fa.recognize_sentence('cccccc'))
     self.assertEqual(False, fa.recognize_sentence('a'))
     self.assertEqual(False, fa.recognize_sentence('b'))
     self.assertEqual(False, fa.recognize_sentence('babaaab'))
     self.assertEqual(False, fa.recognize_sentence('babababaabc'))
     self.assertEqual(False, fa.recognize_sentence('bababcabaab'))
 def test_grammar_conversion_ndfa_fa_aabbccd(self):
     # S -> aS | bB
     # B -> bB | cC
     # C -> cC | d
     self.grammar.add_production(Production('S', 'aS'))
     self.grammar.add_production(Production('S', 'bB'))
     self.grammar.add_production(Production('B', 'bB'))
     self.grammar.add_production(Production('B', 'cC'))
     self.grammar.add_production(Production('C', 'cC'))
     self.grammar.add_production(Production('C', 'd'))
     fa = self.grammar.to_finite_automaton()
     # Should accept
     self.assertEqual(True, fa.recognize_sentence('abcd'))
     self.assertEqual(True, fa.recognize_sentence('bcd'))
     self.assertEqual(True, fa.recognize_sentence('bbbcccd'))
     self.assertEqual(True, fa.recognize_sentence('aaabbbcccd'))
     self.assertEqual(True, fa.recognize_sentence('aaaabccccd'))
     self.assertEqual(True, fa.recognize_sentence('aaaabcd'))
     # Shouldn't accept
     self.assertEqual(False, fa.recognize_sentence(''))
     self.assertEqual(False, fa.recognize_sentence('abc'))
     self.assertEqual(False, fa.recognize_sentence('acd'))
     self.assertEqual(False, fa.recognize_sentence('abd'))
     self.assertEqual(False, fa.recognize_sentence('aaaaabbbbbcccc'))
     self.assertEqual(False, fa.recognize_sentence('dabc'))
     self.assertEqual(False, fa.recognize_sentence('abdc'))
     self.assertEqual(False, fa.recognize_sentence('adbc'))
     self.assertEqual(False, fa.recognize_sentence('aadbbccd'))
     self.assertEqual(False, fa.recognize_sentence('dabcd'))
     self.assertEqual(False, fa.recognize_sentence('abcdd'))
Ejemplo n.º 8
0
 def elim_indirect(self, elim, elim_idx, elim_prods):
     new_prods = list()
     replaced = [False] * len(elim_prods)
     for chk in self.nterms[:elim_idx]:
         chk_prods = self.grammar.prods[chk]
         for prod_idx, prod in enumerate(elim_prods):
             if prod.syms[0] == chk:
                 # Replace prod.nterm -> chk other_syms
                 # with prod.nterm -> chk.syms other_syms
                 for chk_prod in chk_prods:
                     new_syms = chk_prod.syms + prod.syms[1:]
                     new_prods.append(Production(elim, new_syms))
                 replaced[prod_idx] = True
     for prod_idx, prod in enumerate(elim_prods):
         if not replaced[prod_idx]:
             new_prods.append(prod)
     return new_prods
Ejemplo n.º 9
0
    def scanner(self, state):
        """
        Implements the Earley Scanner
        """
        idx = state.position[1]

        if len(self.words) == idx:
            return  # Make sure we're not trying to scan past the last word

        if state.nextcat() == self.words[idx][1]:
            newtree = Production(state.nextcat(), (self.words[idx][0], ))
            newpos = [idx, idx + 1]
            newstate = DottedRule(newtree, state.progress + 1, newpos)

            if len(self.chart) < idx + 2:
                self.chart.append([])
            if newstate not in self.chart[idx + 1]:
                self.chart[idx + 1].append(newstate)
Ejemplo n.º 10
0
def read_grammar_from_file(in_file):

    with open(in_file, "r") as f_d:
        line = f_d.readline().strip()
        productions = []
        terminals = line.split(' ')
        line = f_d.readline().strip()
        non_terminals = line.split(' ')
        line = f_d.readline().strip()
        start_sym = line
        line = f_d.readline().strip()
        while line != "":
            segments = line.split('->')
            productions_on_line = segments[1].split('|')
            for prod in productions_on_line:
                production = Production(segments[0].replace(" ", ""), prod.split())
                productions.append(production)
            line = f_d.readline().strip()
        return Grammar(start_sym, terminals, non_terminals, productions)
Ejemplo n.º 11
0
    def create_grammar_from_file(filename) -> Grammar:

        with open(filename, "r") as f:
            grammar_json = load(f)

        try:
            non_terms = grammar_json["non_term"]
            terms = grammar_json["term"]
            start = grammar_json["start"]
            productions = grammar_json["productions"]
        except KeyError:
            raise Exception("Incorrect fields in `{}`".format(filename))

        if start not in non_terms:
            raise Exception("Incorrect start symbol in grammar")

        intersect = set(terms) & set(non_terms)
        if intersect:
            raise Exception(
                "Terminals and non-terminals have the same symbols: {}".format(
                    intersect))

        grammar_productions = []
        for p in productions:
            left = p["l"]
            right = p["r"]
            if left not in non_terms or not set(right).issubset(
                    set(non_terms + terms)):
                raise Exception("Incorrect production: {} -> {}".format(
                    left, "".join(right)))
            grammar_productions.append(Production(left, right))

        return Grammar(non_terminals=non_terms,
                       terminals=terms,
                       start_symbol=start,
                       productions=grammar_productions)
 def test_add_production(self):
     self.grammar.add_production(Production("S", "aS"))
     self.assertEqual(1, self.grammar.productions_quantity())
 def test_create_production(self):
     production = Production('S', 'aS')
     self.assertEqual('S', production.left())
     self.assertEqual('aS', production.right())
Ejemplo n.º 14
0
def covering_grammar(grammar, Wi=1.0, Wr=1.0, Wd=1.0):

    alphabet = grammar.alphabet if grammar.alphabet != None else default_alphabet

    productions = []

    nonterminals = [n for n in grammar.nonterminals]

    # - A1.2

    H = 'H'
    while H in nonterminals:
        H += '\''
    nonterminals.append(H)

    I = 'I'
    while I in nonterminals:
        I += '\''
    nonterminals.append(I)

    E = {}

    for a in alphabet:

        Ea = f'E_{a}'
        while Ea in nonterminals:
            Ea += '\''
        E[a] = Ea
        nonterminals.append(Ea)

        for b in alphabet:

            if a == b:
                productions.append(Production(Ea, [b]))
            else:
                productions.append(Production(Ea, [b], Wr))

        productions.append(Production(Ea, [H, a]))
        productions.append(Production(I, [a], Wi))
        productions.append(Production(Ea, [], Wd))

    # - A1.3

    start = 'S'
    while start in nonterminals:
        start += '\''
    nonterminals.append(start)

    productions.append(Production(start, [grammar.start]))

    productions.append(Production(start, [grammar.start, H]))

    productions.append(Production(H, [H, I]))

    productions.append(Production(H, [I]))

    # - A1.1

    for production in grammar.productions:

        alpha_runs = []
        beta_items = []

        curr_run = []

        for item in production.right:

            if item in alphabet:
                alpha_runs.append(curr_run)
                beta_items.append(item)
                curr_run = []
            else:
                curr_run.append(item)

        alpha_runs.append(curr_run)

        new_rhs = []
        for i in range(len(beta_items)):

            new_rhs += alpha_runs[i]
            new_rhs.append(E[beta_items[i]])

        new_rhs += alpha_runs[-1]

        productions.append(Production(production.left, new_rhs))

    return Grammar(nonterminals, start, productions, alphabet)
Ejemplo n.º 15
0
            'take', 'touch', 'have', 'stroke', 'nibble')
SpeakVerb.define('say', 'cry', 'shout', 'scream', 'laugh', 'whisper',
                 'mention', 'think', 'scribble')
Adverb.define('quickly', 'slowly', 'furiously', 'lovingly', 'unknowingly',
              'happily', 'angrily')
Noun.define('bird', 'dog', 'dinosaur', 'force', 'Masterball', 'alien', 'dude',
            'arrow', 'experience', 'demon', 'candy')
Adjective.define('large', 'tiny', 'crazy', 'psychopathic', 'blue', 'ancient',
                 'sad', 'angry', 'cheerful', 'lit', 'gold')
Name.define('Dio', 'Mr. Goose', 'Hackerman', 'Jojo', 'Luke')
Article.define('the', 'this', 'that', 'this one', 'that one', 'a', 'some')
ArticlePlural.define('the', 'these', 'those', 'many', 'some')
Preposition.define('of', 'from', 'in', 'by', 'with', 'without', 'within',
                   'inside', 'outside')

NamePhrase.define(Production(',', Name))
NounPhraseSingle.define(Production(Article, Many(Adjective, 0, 1), Noun))
NounPhrasePlural.define(
    Production(ArticlePlural, Many(Adjective, 0, 1),
               Noun.clone().transform(pluralize_all)))
NounPhrase.define(NounPhrasePlural, NounPhraseSingle)
PrepPhrase.define(Production(Preposition, NounPhrase))

Subject3rd.define(Production(NounPhraseSingle, maybe(PrepPhrase)), 'he', 'she',
                  'it', Name).set_distr(0.3, 0.1, 0.1, 0.2, 0.31)
SubjectPlural.define('you', 'they', 'we', 'y\'all',
                     Production(NounPhrasePlural,
                                maybe(PrepPhrase))).set_distr(
                                    0.15, 0.15, 0.15, 0.15, 0.41)
Subject.define(
    Production(