Esempio n. 1
0
def main(file: str):
    with open(file, 'r') as file_:
        text = file_.read()
    compiler = Compiler()
    scanner = compiler.get_scanner(text)
    for token in scanner:
        print(
            token,
            compiler.get_name(token.code if hasattr(token, 'code') else None))

    compiler.output_messages()
Esempio n. 2
0
class Parser:
    def __init__(self, scanner_type: Callable, text: str, T: Enum, NT: Enum,
                 table: dict, axiom: Enum):
        self.compiler = Compiler(scanner_type)
        self.scanner = self.compiler.get_scanner(text)
        self.text = text
        self.T = T
        self.NT = NT
        self.table = table
        self.axiom = axiom
        self.root = NonTerminalNode(self.axiom)

    def get_token_name(self, token: Token) -> str:
        if isinstance(token, (AxiomToken, NonTerminalToken)):
            return self.compiler.get_name(token.code)
        elif isinstance(token, TerminalToken):
            return token.terminal_str
        else:
            return ""

    def token_repr(self, tok: Token):
        return f"{tok} {self.get_token_name(tok)}"

    def print_tree(self):
        self.root.print_node()

    def print_scanner(self):
        for token in self.compiler.get_scanner(self.text):
            print(self.token_repr(token))

    def parse(self, debug=False):
        stack = [self.T.END_OF_PROGRAM, self.axiom]
        a = self.next_token()
        node_stack = [self.root]
        while True:
            x = stack.pop()
            if x == self.T.END_OF_PROGRAM:
                break
            node = node_stack.pop()
            if x in self.T:
                if debug:
                    print("STACK TOP T:", x.name)

                if x == a.tag:
                    node.set_values(a, self.get_token_name(a))
                    a = self.next_token()
                else:
                    raise ParseException(a.coords)
            else:
                if debug:
                    print("STACK TOP NT:", x, ", CUR T:", self.token_repr(a))
                _tuple = (a.tag, x)

                if _tuple in self.table:
                    chain = list(self.table[_tuple])

                    while chain:
                        symbol = chain.pop()
                        if symbol in self.T:
                            new_node = TerminalNode()
                        else:
                            new_node = NonTerminalNode(symbol)
                        node.add_children(new_node)
                        node_stack.append(new_node)
                        stack.append(symbol)
                    if debug:
                        print("STACK:", stack)

                else:
                    raise ParseException(a.coords)

    def next_token(self):
        a = next(self.scanner)
        a.tag = self.get_equivalent_enum(a.tag)
        return a

    def get_equivalent_enum(self, tag):
        return getattr(self.T, tag.name)
Esempio n. 3
0
class Parser:
    text: str = attr.ib()
    T: Enum = attr.ib()
    sym: Token = None
    grammar_dict: Dict[Enum, List[Alt]] = dict()
    compiler: Compiler
    scanner: Callable
    first_set: Dict[Enum, set]

    def __post_init__(self, scanner_type: Callable, text: str):
        self.compiler = Compiler(scanner_type)
        self.scanner = self.compiler.get_scanner(text)

    def parse_terminal(self, terminal: T):
        sym = self.sym
        if sym.has_tag(terminal):
            self.next_token()
        else:
            raise ParseException(sym.coords, terminal)

    def next_token(self):
        self.sym = next(self.scanner)

    def first_set_for_chain(self, rhs: List[Alt]) -> set:
        if not rhs:
            return {EPSILON}
        res_set = set()
        for alt in rhs:
            # print(alt)
            res_set |= self.first_set_for_alt(alt)
        return res_set

    def first_set_for_alt(self, alt):
        print(alt, type(alt))
        if not alt.list_of_term:
            return {EPSILON}
        head_term, *tail_term = alt.list_of_term
        sym = head_term.symbol
        print(sym, type(sym))
        if isinstance(sym, TerminalToken):
            first_set_term = {self.get_token_name(sym)}
            if not (head_term.quantifier is None or head_term.quantifier == T.PLUS):
                first_set_term = first_set_term - {EPSILON}
                first_set_term |= self.first_set_for_alt(Alt(tail_term))
        elif not tail_term and isinstance(sym, NonTerminalToken):
            first_set_term = self.first_set[self.get_token_name(sym)].copy()
            if not (head_term.quantifier is None or head_term.quantifier == T.PLUS):
                first_set_term = first_set_term - {EPSILON}
                first_set_term |= self.first_set_for_alt(Alt(tail_term))
        elif isinstance(sym, NonTerminalToken):
            first_set_term = self.first_set[self.get_token_name(sym)].copy()
            if not (head_term.quantifier is None or head_term.quantifier == T.PLUS):
                first_set_term.add(EPSILON)
            if EPSILON in first_set_term:
                first_set_term = first_set_term - {EPSILON}
                first_set_term |= self.first_set_for_alt(Alt(tail_term))
        return first_set_term

    def build_first_set(self):
        self.first_set = {nt: set() for nt in iter(self.grammar_dict.keys())}
        while True:
            changed_flag = False
            for lhs, rhs in self.grammar_dict.items():
                first_rhs = self.first_set_for_chain(rhs)
                first_lhs = self.first_set[lhs]
                if not first_rhs == first_lhs:
                    changed_flag = True
                    first_lhs.update(first_rhs)
            if not changed_flag:
                break
        pprint.pprint(self.first_set)

    # (S) = [NT \= (rhs) \.]*.
    def parse(self):
        self.next_token()
        while self.sym.has_tag(T.NON_TERMINAL):
            sym = self.sym
            self.parse_terminal(T.NON_TERMINAL)
            self.parse_terminal(T.ASSIGN)
            rhs = self.parse_rhs()
            self.grammar_dict[self.get_token_name(sym)] = rhs
            self.parse_terminal(T.DOT)

    # (rhs) = (alt) [\| (alt)]*.
    def parse_rhs(self) -> List[Alt]:
        alt = self.parse_alt()
        rhs_list = [alt]
        while self.sym.has_tag(T.OR):
            self.parse_terminal(T.OR)
            alt = self.parse_alt()
            rhs_list.append(alt)
        return rhs_list

    # (alt) = [(term)(M)]*.
    def parse_alt(self) -> Alt:
        alt_list = []
        while self.sym.has_one_of_tags(T.NON_TERMINAL, T.TERMINAL, T.LPAREN):
            term = self.parse_term()
            quantifier = self.parse_m()
            if quantifier is not None:
                term.quantifier = quantifier
            alt_list.append(term)
        return Alt(alt_list)
    # (term) = NT|T|\[(rhs)\]
    def parse_term(self) -> Term:
        sym = self.sym
        if sym.has_tag(T.NON_TERMINAL):
            self.parse_terminal(T.NON_TERMINAL)
            return Term(sym, None)
        elif sym.has_tag(T.TERMINAL):
            self.parse_terminal(T.TERMINAL)
            return Term(sym, None)
        else:
            self.parse_terminal(T.LPAREN)
            rhs = self.parse_rhs()
            self.parse_terminal(T.RPAREN)
            return Term(rhs, None)

    # (M) = [\+|\*|\?]?
    def parse_m(self) -> Optional[Enum]:
        sym = self.sym
        if sym.has_one_of_tags(T.PLUS, T.QUEST_MARK, T.MUL):
            self.parse_terminal(sym.tag)
            return sym.tag

    def get_token_name(self, token: Token) -> str:
        if isinstance(token, NonTerminalToken):
            return self.compiler.get_name(token.code)
        elif isinstance(token, TerminalToken):
            return token.terminal_str
        else:
            return ""

    def token_repr(self, tok: Token):
        return f"{tok} {self.get_token_name(tok)}"

    def print_scanner(self):
        for token in self.compiler.get_scanner(self.text):
            print(self.token_repr(token))