Beispiel #1
0
    def __init__(self, file_base):
        self.file_base = file_base

        grammar_string = open(
            f"{os.path.dirname(__file__)}/{file_base}.grammar",
            'r',
            encoding='utf-8').read()

        self.productions = convert_grammar_string_to_productions(
            grammar_string)
        simple_prods = [(prod.lhs_s, prod.rhs_pieces)
                        for prod in self.productions]
        # LR
        self.lr_parser = LR_Parser('SLR(1)', simple_prods, 'silent')

        #   # Earley (attempt, doesn't work)
        #   self.eparser = Earley.Parser(simple_prods, '*EOI*')

        if shared.g_outdir:
            self.f_errors = shared.open_for_output(file_base + '_errors')
            self.f_ambig = shared.open_for_output(file_base + '_ambig')
            self.f_parsed = shared.open_for_output(file_base + '_parsed')
        else:
            # testing
            self.f_ambig = sys.stdout

        self.error_count = 0
        self.group_errors_by_expectation = True

        if self.group_errors_by_expectation:
            self.error_posns = collections.defaultdict(list)
Beispiel #2
0
def main():
    global input, grammar

    g = Grammar(grammar)
    g.parse()
    gotos = GotoGenerator(g)
    gotos.generate()
    gotos.display()

    g.first_follow.display()

    parsing_table = Table(g, gotos)
    parsing_table.generate()

    lr_parser = LR_Parser(g, parsing_table, input)
    lr_parser.parse()
Beispiel #3
0
def main():
  global input, grammar

  g = Grammar(grammar)
  g.parse()
  gotos = GotoGenerator(g)
  gotos.generate()
  gotos.display()

  g.first_follow.display()

  parsing_table = Table(g, gotos)
  parsing_table.generate()

  lr_parser = LR_Parser(g, parsing_table, input)
  lr_parser.parse()
Beispiel #4
0
def main(*argv):
  global input, grammar
  if len(argv[0]) == 2:
    grammar = """"""
    with open(argv[0][0], 'r') as content_file:
        grammar = content_file.read()

    input = argv[0][1]

  g = Grammar(grammar)
  g.parse()
  gotos = GotoGenerator(g)
  gotos.generate()
  gotos.display()

  g.first_follow.display()

  parsing_table = Table(g, gotos)
  parsing_table.generate()

  lr_parser = LR_Parser(g, parsing_table, input)
  lr_parser.parse()
Beispiel #5
0
class Pseudocode_Parser:
    def __init__(self, file_base):
        self.file_base = file_base

        grammar_string = open(
            f"{os.path.dirname(__file__)}/{file_base}.grammar",
            'r',
            encoding='utf-8').read()

        self.productions = convert_grammar_string_to_productions(
            grammar_string)
        simple_prods = [(prod.lhs_s, prod.rhs_pieces)
                        for prod in self.productions]
        # LR
        self.lr_parser = LR_Parser('SLR(1)', simple_prods, 'silent')

        #   # Earley (attempt, doesn't work)
        #   self.eparser = Earley.Parser(simple_prods, '*EOI*')

        if shared.g_outdir:
            self.f_errors = shared.open_for_output(file_base + '_errors')
            self.f_ambig = shared.open_for_output(file_base + '_ambig')
            self.f_parsed = shared.open_for_output(file_base + '_parsed')
        else:
            # testing
            self.f_ambig = sys.stdout

        self.error_count = 0
        self.group_errors_by_expectation = True

        if self.group_errors_by_expectation:
            self.error_posns = collections.defaultdict(list)

    def parse_and_handle_errors(self, start_posn, end_posn, goal):

        entry_lhs = '{_' + goal.lower() + '_}'
        entry_rhs = ''
        entry_prod = Production(True, entry_lhs, entry_rhs)
        entry_token = (entry_prod, start_posn, start_posn, '')

        # hm
        # Find the start of 'this line'
        # (the line that contains start_posn)
        for posn in range(start_posn, -1, -1):
            if posn == 0 or shared.spec_text[posn - 1] == '\n':
                line_start_posn = posn
                break
        else:
            assert 0
        # And then find the end of this line's indentation
        for posn in range(line_start_posn, start_posn + 1):
            if shared.spec_text[posn] != ' ':
                line_indent_end_posn = posn
                break
        else:
            assert 0
        #
        this_line_indentation = line_indent_end_posn - line_start_posn

        token_generator = tokenizer_for_pseudocode.tokenize(
            shared.spec_text, start_posn, end_posn, True,
            this_line_indentation)

        tokens = [entry_token] + [token_info for token_info in token_generator]

        def matcher_for_gparse(curr_tind, terminals):
            assert curr_tind < len(tokens)
            (tok_prod, tok_s_posn, tok_e_posn, tok_text) = tokens[curr_tind]

            matching_terminals = []
            for terminal in terminals:

                assert isinstance(terminal, str)

                match_token = False

                if terminal.startswith('{') and terminal.endswith('}'):
                    if tok_prod.lhs_s == terminal:
                        if terminal in ['{nlai}', '{_indent_}', '{_outdent_}']:
                            match_token = None
                        else:
                            match_token = ANode(tok_prod, [tok_text],
                                                tok_s_posn, tok_e_posn)
                else:
                    if terminal == 'an?':
                        if tok_text in ['a', 'an']:
                            match_token = None
                    else:
                        if tok_text == terminal:
                            match_token = None

                if match_token is not False:
                    matching_terminals.append(
                        (terminal, curr_tind + 1, match_token))

            return matching_terminals

        def reducer(pi, reductands, s_tind, e_tind):
            prod = self.productions[pi]
            prod.n_reductions += 1
            assert len(reductands) == len(prod.rhs_pieces)

            if prod.lhs_s.startswith('{_'):
                # We're not interested in the details.
                return None

            node_children = []
            for red in reductands:
                if red is None:
                    # rhs_piece is a regex with no capturing group
                    # or is an uninteresting nonterminal
                    continue
                if red.prod.lhs_s == '{space}': continue
                node_children.append(red)

            (_, s_posn, _, _) = tokens[s_tind]
            (_, e_posn, _, _) = tokens[e_tind]
            node = ANode(prod, node_children, s_posn, e_posn)
            return node

        try:
            results = self.lr_parser.gparse(matcher_for_gparse, reducer, 0)

        except ParsingError as e:
            self.error_count += 1
            (_, tok_s_posn, _, _) = tokens[e.posn]
            if self.group_errors_by_expectation:
                self.error_posns[tuple(e.expecting)].append(tok_s_posn)
            else:
                print(
                    '\n' +
                    shared.source_line_with_caret_marking_column(tok_s_posn) +
                    '\n' + "Expecting: " + ' '.join(e.expecting),
                    file=self.f_errors)
            print('(Error)', file=self.f_parsed)
            return None

        except TooManyHeadsError as e:
            (_, tok_s_posn, _, _) = tokens[e.posn]
            print(shared.source_line_with_caret_marking_column(tok_s_posn))
            raise

        if len(results) != 1:
            print('-------------------------------', file=self.f_ambig)
            for result in results:
                result.printTree(self.f_ambig)

        result = results[0]

        result.set_parent_links()

        def count(node):
            if isinstance(node, str): return
            assert isinstance(node, ANode)
            if not hasattr(node.prod, 'n_delivered_instances'): return
            node.prod.n_delivered_instances += 1
            for child in node.children:
                count(child)

        count(result)

        [entry_node, goal_node] = result.children
        assert entry_node.prod is entry_prod
        assert goal_node.prod.lhs_s == '{' + goal + '}'

        goal_node.printTree(self.f_parsed)

        return goal_node

    def report(self):
        report_file_base = self.file_base + '_prod_counts'
        shared.stderr(f"generating new {report_file_base} ...")

        if self.group_errors_by_expectation:
            # This approach is better when I'm developing a grammar,
            # as it tends to group similar cases.

            def err(x):
                print(x, file=self.f_errors)

            err("%d parsing errors:" % self.error_count)
            err('')
            for (expecting, posns) in sorted(self.error_posns.items()):
                # err('')
                err('X' * 80)
                # err('')
                err("Expecting:")
                for e in expecting:
                    err("    %r" % e)
                for posn in posns:
                    err(
                        shared.source_line_with_caret_marking_column(
                            math.ceil(posn)))

        f = shared.open_for_output(report_file_base)
        for prod in self.productions:
            print("%5d %s" % (prod.n_delivered_instances, prod), file=f)