Beispiel #1
0
 def parse_map(nodes):
     keys = nodes[0::2]
     vals = nodes[1::2]
     for k in keys:
         if not isinstance(k, KeywordNode):
             raise ParserError(k, "must be a keyword node")
     for v in vals:
         if isinstance(v, KeywordNode):
             raise ParserError(v, "can't be a keyword node")
     if len(keys) != len(vals):
         raise ParserError(self, "the keys and values don't have same length")
     return zip(keys, vals)
Beispiel #2
0
 def check_dup(self):
     ret = set()
     for v in self.kv_map.values():
         if v in ret and IS(v, NameNode):
             raise ParserError(v, "duplicate variable")
         else:
             ret.add(v)
Beispiel #3
0
 def check_dup(self):
     ret = set()
     for e in self.elements:
         if e in ret and IS(e, NameNode):
             raise ParserError(e, "duplicate variable")
         else:
             ret.add(e)
Beispiel #4
0
    def no_match_error(self, token, top, line):
        if token.value():
            val = token.value().lower()
        else:
            val = ""

        raise ParserError(
            "Expected " + top.lower() + " before " + token.type().lower() +
            ": " + val, line)
Beispiel #5
0
    def parse_number(self, lexeme):
        # skip the sign
        lexeme = lexeme.lstrip('+-').lower()

        if lexeme.startswith('0x'):
            base = 16
        elif lexeme.startswith('0b'):
            base = 2
        elif lexeme.startswith('0'):
            base = 8
        else:
            base = 10
        try:
            return int(lexeme, base)
        except ValueError:
            raise ParserError(self, "Not a number")
Beispiel #6
0
    def parse_properties(self, lst):
        params = []
        properties = None
        for entry in lst:
            if isinstance(entry, NameToken):
                params.append(self.parse_tok(entry))
            if isinstance(entry, Pair):
                eles = entry.elements
                if len(eles) < 2:
                    raise ParserError(entry, "at least 2 elements")
                if not properties:
                    properties = SymTable()
                nodes = self.parse_lst(eles)
                name = nodes[0]
                ty = nodes[1]
                params.append(name)
                properties.put_type(name.id, ty)

                for k, v in Parser.parse_map(nodes[2:]):
                    properties.put(name.id, k.id, v)

        return params, properties
Beispiel #7
0
 def __init__(self, lexeme, fname, start, end, line, col):
     super(FloatNode, self).__init__(fname, start, end, line, col)
     try:
         self.value = float(lexeme)          # need fixed
     except ValueError:
         raise ParserError(self, "invlid literal for float")
Beispiel #8
0
 def error(self, error_code, token):
     raise ParserError(error_code=error_code,
                       token=token,
                       message=f'{error_code.value} -> {token}')
Beispiel #9
0
 def production_error(self, token, top):
     raise ParserError(
         "Expected type " + top.lower() + " but provided " +
         token.type().lower(), token.line())
Beispiel #10
0
    def parse_pair(self, p):
        open_delim = p.open_delim
        close_delim = p.close_delim
        fname = open_delim.fname
        start = open_delim.start
        end = close_delim.end
        line = open_delim.line
        col = open_delim.col

        if IS(p, ParenPair):
            if len(p.elements) == 0:
                raise ParserError(open_delim, 'Pair should not be empty.')

            kw_tok = p.elements[0]

            if isinstance(kw_tok, NameToken):
                kw = p.elements[0].lexeme
                # the template for error message
                msg_tpl = """
                 %s: bad syntax;
                expect %s parts after keyword, but given %s parts
                """
                if kw == SEQ_KW:
                    statements = []
                    for i in p.elements[1:]:
                        statements.append(self.parse_tok_or_pair(i))
                    return BlockNode(statements, fname, start, end, line, col)

                elif kw == ASSIGN_KW:
                    if len(p.elements) != 3:
                        err_msg = msg_tpl % (ASSIGN_KW, 2, len(p.elements)-1)
                        raise ParserError(open_delim, err_msg)
                    patt = self.parse_tok_or_pair(p.elements[1])
                    if IS(patt, VectorNode) or IS(patt, RecordLiteralNode):
                        patt.check_dup()
                    val = self.parse_tok_or_pair(p.elements[2])
                    return AssignNode(patt, val, fname, start, end, line, col)
                elif kw == DEFINE_KW:
                    if len(p.elements) != 3:
                        err_msg = msg_tpl % (DEFINE_KW, 2, len(p.elements)-1)
                        raise ParserError(open_delim, err_msg)
                    patt = self.parse_tok_or_pair(p.elements[1])
                    if IS(patt, VectorNode) or IS(patt, RecordLiteralNode):
                        patt.check_dup()
                    val = self.parse_tok_or_pair(p.elements[2])
                    return DefNode(patt, val, fname, start, end, line, col)
                elif kw == IF_KW:
                    if len(p.elements) != 4:
                        if len(p.elements) == 3:
                            msg = "if: missing an \"else\" expression"
                        else:
                            msg = """if: bad syntax;
                            has %d part after keyword""" % len(p.elements)-1
                        raise ParserError(open_delim, msg)
                    test = self.parse_tok_or_pair(p.elements[1])
                    conseq = self.parse_tok_or_pair(p.elements[2])
                    alt = self.parse_tok_or_pair(p.elements[3])
                    return IfNode(test, conseq, alt, fname, start,
                                  end, line, col)
                elif kw == FUN_KW:    # anonymous function
                    if len(p.elements) < 2:
                        err_msg = msg_tpl % (FUN_KW, "at least 1", len(p.elements)-1)
                        raise ParserError(open_delim, err_msg)
                    params_pair = p.elements[1]
                    if not isinstance(params_pair, Pair):
                        raise ParserError(open_delim, 'Formal Parameters should be a list')
                    param_names, properties = self.parse_properties(params_pair.elements)
                    # construct body
                    statements = self.parse_lst(p.elements[2:])
                    body = BlockNode(statements, fname, start, end, line, col)
                    return FunNode(param_names, properties, body,
                                   fname, start, end, line, col)
            # application(Call)
            fun = self.parse_tok_or_pair(p.elements[0])
            parsed_args = self.parse_lst(p.elements[1:])
            if len(parsed_args) == 0:
                loc = close_delim
            else:
                loc = p.elements[1]
            positional, keywords = Parser.parse_arguments(parsed_args)
            args = ArgumentNode(positional, keywords, loc.fname, loc.start,
                                end, loc.line, loc.col)
            return CallNode(fun, args, fname, start, end, line, col)
        # vector literal
        elif IS(p, SquarePair):
            eles = self.parse_lst(p.elements)
            return VectorNode(eles, fname, start, end, line, col)
        # record literal
        elif IS(p, CurlyPair):
            elements = self.parse_lst(p.elements)
            kv_map = dict(Parser.parse_map(elements))
            return RecordLiteralNode(kv_map, fname, start, end, line, col)
        else:
            raise ParserError(open_delim, 'unkown pair')
Beispiel #11
0
class Parser(object):
    def __init__(self, fname, text=None):
        self.lex = Lexer(fname, text)

    def parse_pair(self, p):
        open_delim = p.open_delim
        close_delim = p.close_delim
        fname = open_delim.fname
        start = open_delim.start
        end = close_delim.end
        line = open_delim.line
        col = open_delim.col

        if IS(p, ParenPair):
            if len(p.elements) == 0:
                raise ParserError(open_delim, 'Pair should not be empty.')

            kw_tok = p.elements[0]

            if isinstance(kw_tok, NameToken):
                kw = p.elements[0].lexeme
                # the template for error message
                msg_tpl = """
                 %s: bad syntax;
                expect %s parts after keyword, but given %s parts
                """
                if kw == SEQ_KW:
                    statements = []
                    for i in p.elements[1:]:
                        statements.append(self.parse_tok_or_pair(i))
                    return BlockNode(statements, fname, start, end, line, col)

                elif kw == ASSIGN_KW:
                    if len(p.elements) != 3:
                        err_msg = msg_tpl % (ASSIGN_KW, 2, len(p.elements)-1)
                        raise ParserError(open_delim, err_msg)
                    patt = self.parse_tok_or_pair(p.elements[1])
                    if IS(patt, VectorNode) or IS(patt, RecordLiteralNode):
                        patt.check_dup()
                    val = self.parse_tok_or_pair(p.elements[2])
                    return AssignNode(patt, val, fname, start, end, line, col)
                elif kw == DEFINE_KW:
                    if len(p.elements) != 3:
                        err_msg = msg_tpl % (DEFINE_KW, 2, len(p.elements)-1)
                        raise ParserError(open_delim, err_msg)
                    patt = self.parse_tok_or_pair(p.elements[1])
                    if IS(patt, VectorNode) or IS(patt, RecordLiteralNode):
                        patt.check_dup()
                    val = self.parse_tok_or_pair(p.elements[2])
                    return DefNode(patt, val, fname, start, end, line, col)
                elif kw == IF_KW:
                    if len(p.elements) != 4:
                        if len(p.elements) == 3:
                            msg = "if: missing an \"else\" expression"
                        else:
                            msg = """if: bad syntax;
                            has %d part after keyword""" % len(p.elements)-1
                        raise ParserError(open_delim, msg)
                    test = self.parse_tok_or_pair(p.elements[1])
                    conseq = self.parse_tok_or_pair(p.elements[2])
                    alt = self.parse_tok_or_pair(p.elements[3])
                    return IfNode(test, conseq, alt, fname, start,
                                  end, line, col)
                elif kw == FUN_KW:    # anonymous function
                    if len(p.elements) < 2:
                        err_msg = msg_tpl % (FUN_KW, "at least 1", len(p.elements)-1)
                        raise ParserError(open_delim, err_msg)
                    params_pair = p.elements[1]
                    if not isinstance(params_pair, Pair):
                        raise ParserError(open_delim, 'Formal Parameters should be a list')
                    param_names, properties = self.parse_properties(params_pair.elements)
                    # construct body
                    statements = self.parse_lst(p.elements[2:])
                    body = BlockNode(statements, fname, start, end, line, col)
                    return FunNode(param_names, properties, body,
                                   fname, start, end, line, col)
            # application(Call)
            fun = self.parse_tok_or_pair(p.elements[0])
            parsed_args = self.parse_lst(p.elements[1:])
            if len(parsed_args) == 0:
                loc = close_delim
            else:
                loc = p.elements[1]
            positional, keywords = Parser.parse_arguments(parsed_args)
            args = ArgumentNode(positional, keywords, loc.fname, loc.start,
                                end, loc.line, loc.col)
            return CallNode(fun, args, fname, start, end, line, col)
        # vector literal
        elif IS(p, SquarePair):
            eles = self.parse_lst(p.elements)
            return VectorNode(eles, fname, start, end, line, col)
        # record literal
        elif IS(p, CurlyPair):
            elements = self.parse_lst(p.elements)
            kv_map = dict(Parser.parse_map(elements))
            return RecordLiteralNode(kv_map, fname, start, end, line, col)
        else:
            raise ParserError(open_delim, 'unkown pair')

    def parse_tok(self, tok):
        lexeme = tok.lexeme
        fname = tok.fname
        start = tok.start
        end = tok.end
        line = tok.line
        col = tok.col

        if isinstance(tok, NumToken):
            if '.' in lexeme:
                return FloatNode(lexeme, fname, start, end, line, col)
            else:
                return IntNode(lexeme, fname, start, end, line, col)
        elif isinstance(tok, StrToken):
            return StrNode(lexeme, fname, start, end, line, col)
        elif isinstance(tok, NameToken):
            # vector subscript
            if VECTOR_SUB in lexeme:
                val, idx = lexeme.split(VECTOR_SUB, 1)
                val = NameNode(val, fname, start, end, line, col)
                idx = IntNode(idx, fname, start, end, line, col)
                return SubscriptNode(val, idx, fname, start, end, line, col)
            # record attribute access
            if RECORD_ATTR in lexeme:
                val, attr = lexeme.split(RECORD_ATTR, 1)
                val = NameNode(val, fname, start, end, line, col)
                attr = NameNode(attr, fname, start, end, line, col)
                return AttrNode(val, attr, fname, start, end, line, col)
            # boolean
            if lexeme in (TRUE_KW, FALSE_KW):
                return BoolNode(lexeme, fname, start, end, line, col)
            # keyword
            if lexeme.startswith(KEYWORD_PREFIX):
                return KeywordNode(lexeme, fname, start, end, line, col)
            else:
                return NameNode(lexeme, fname, start, end, line, col)

    def parse_tok_or_pair(self, pt):
        if isinstance(pt, Token):
            return self.parse_tok(pt)
        elif isinstance(pt, Pair):
            return self.parse_pair(pt)

    def parse_lst(self, lst):
        ret = []
        for i in lst:
            ret.append(self.parse_tok_or_pair(i))
        return ret

    def parse(self):
        first = self.next_pair_or_tok(0)
        pt = first
        statements = []
        while pt:
            statements.append(self.parse_tok_or_pair(pt))
            pt = self.next_pair_or_tok(0)
        last = statements[len(statements)-1]
        return BlockNode(statements, first.fname, first.start, last.end,
                         first.line, first.col)

    def parse_properties(self, lst):
        params = []
        properties = None
        for entry in lst:
            if isinstance(entry, NameToken):
                params.append(self.parse_tok(entry))
            if isinstance(entry, Pair):
                eles = entry.elements
                if len(eles) < 2:
                    raise ParserError(entry, "at least 2 elements")
                if not properties:
                    properties = SymTable()
                nodes = self.parse_lst(eles)
                name = nodes[0]
                ty = nodes[1]
                params.append(name)
                properties.put_type(name.id, ty)

                for k, v in Parser.parse_map(nodes[2:]):
                    properties.put(name.id, k.id, v)

        return params, properties

    @staticmethod
    def parse_map(nodes):
        keys = nodes[0::2]
        vals = nodes[1::2]
        for k in keys:
            if not isinstance(k, KeywordNode):
                raise ParserError(k, "must be a keyword node")
        for v in vals:
            if isinstance(v, KeywordNode):
                raise ParserError(v, "can't be a keyword node")
        if len(keys) != len(vals):
            raise ParserError(self, "the keys and values don't have same length")
        return zip(keys, vals)

    @staticmethod
    def parse_arguments(nodes):
        positional = []
        keywords = {}

        # get separate index of positional arguments and keyword arguments
        sep_idx = len(nodes)
        for i in range(len(nodes)):
            if isinstance(nodes[i], KeywordNode):
                sep_idx = i
                break

        positional_nodes = nodes[0:sep_idx]
        keywords_nodes = nodes[sep_idx:]
        positional.extend(positional_nodes)

        for k, v in Parser.parse_map(keywords_nodes):
            keywords[k] = v     # the key is KeywordNode instance
        return positional, keywords

    def next_pair_or_tok(self, deepth):
        try:
            tok = self.lex.next_token()
        except LexicalError, e:
            fatal(str(e))

        if not tok:
            return False

        if is_open(tok.lexeme):
            open_delim = tok
            elements = []
            while True:
                pt = self.next_pair_or_tok(deepth+1)
                if not pt:
                    raise ParserError(tok, "unbalanced delimeter")
                if isinstance(pt, DelimeterToken) and\
                   match_delimeter(open_delim.lexeme, pt.lexeme):
                    break
                elements.append(pt)
            close_delim = pt
            if open_delim.lexeme == PAREN_BEGIN:
                return ParenPair(open_delim, close_delim, elements)
            elif open_delim.lexeme == SQUARE_BEGIN:
                return SquarePair(open_delim, close_delim, elements)
            elif open_delim.lexeme == CURLY_BEGIN:
                return CurlyPair(open_delim, close_delim, elements)
            else:
                raise ParserError(open_delim, "unkown pair")
        elif is_close(tok.lexeme) and deepth == 0:
            raise ParserError(tok, "unbalanced delimeter")
        else:
            return tok
Beispiel #12
0
def error(message):
    e = ParserError(message=message)
    print(e.message)
    exit(1)