コード例 #1
0
ファイル: parser.py プロジェクト: mamba-lang/mamba
    def parse(self, sanitized: bool = True) -> ast.Node:
        declarations = []

        while True:
            # Skip statement delimiters.
            self.consume_newlines()

            # Check for eof.
            if self.peek().kind == TokenKind.eof:
                break

            # Parse a declaration.
            declarations.append(self.parse_declaration())

        if declarations:
            source_range = SourceRange(
                start=declarations[0].source_range.start,
                end=declarations[-1].source_range.end)
        else:
            source_range = SourceRange(start=SourceLocation())
        module = ast.Module(declarations=declarations,
                            source_range=source_range)

        # Sanitize the AST.
        if sanitized:
            sanitizer = Sanitizer()
            module = sanitizer.visit(module)

        return module
コード例 #2
0
ファイル: parser.py プロジェクト: mamba-lang/mamba
    def parse_match_expression(self) -> ast.MatchExpression:
        start_token = self.consume(TokenKind.match)
        if start_token is None:
            raise self.unexpected_token(expected='match')

        # Parse the subject.
        self.consume_newlines()
        subject = self.parse_expression()

        # Parse the different cases.
        self.consume_newlines()
        case_token = self.peek()
        cases = []
        while case_token.kind in {TokenKind.when, TokenKind.else_}:
            cases.append(self.parse_match_case())
            self.consume_newlines()
            case_token = self.peek()

        if not cases:
            raise exc.ParseError(
                message='match expressions should have at least one case',
                source_range=self.peek().source_range)

        return ast.MatchExpression(subject=subject,
                                   cases=cases,
                                   source_range=SourceRange(
                                       start=start_token.source_range.start,
                                       end=cases[-1].source_range.end))
コード例 #3
0
ファイル: parser.py プロジェクト: mamba-lang/mamba
    def parse_binding(self) -> ast.Binding:
        start_token = self.consume(TokenKind.let)
        if start_token is None:
            raise self.unexpected_token(expected='let')

        # Parse the name of the binding.
        self.consume_newlines()
        name_token = self.consume(TokenKind.identifier)
        if name_token is None:
            raise self.expected_identifier()

        # Parse the optional annotation of the binding.
        backtrack = self.stream_position
        self.consume_newlines()
        if self.consume(TokenKind.colon) is not None:
            annotation = self.parse_type()
            end = annotation.source_range.end
        else:
            self.rewind_to(backtrack)
            annotation = None
            end = name_token.source_range.end

        return ast.Binding(name=name_token.value,
                           annotation=annotation,
                           source_range=SourceRange(
                               start=start_token.source_range.start, end=end))
コード例 #4
0
ファイル: parser.py プロジェクト: mamba-lang/mamba
    def parse_function_type(self) -> ast.Node:
        # Parse the domain of the function.
        if self.peek().kind == TokenKind.lparen:
            # If the first token is a parenthesis, we have to try parsing the domain as any
            # parenthesized type. Note that this won't parse a function type declared with the
            # form `(a: T) -> (a: T)`.
            domain = self.parse_parenthesized(self.parse_type)
        else:
            # In the case the domain isn't parenthesized, it should be parsed as anything but a
            # function type, as the arrow operator is right associative. In other words, we don't
            # want to parse a (non-parenthesized) function type as a function domain.
            domain = self.attempt(
                self.parse_object_type) or self.parse_identifier()

        # Parse an arrow operator.
        self.consume_newlines()
        if self.consume(TokenKind.arrow) is None:
            raise self.unexpected_token(expected='->')

        # Parse the codomain of the function.
        self.consume_newlines()
        if self.peek().kind == TokenKind.lparen:
            codomain = self.parse_parenthesized(self.parse_type)
        else:
            # Unlike its domain, the codomain of a function can be parsed as any other type,
            # including a function type. That said, as for domains, we should first attempt to
            # parse an object property used as a syntactic sugar.
            codomain = self.parse_type()

        return ast.FunctionType(domain=domain,
                                codomain=codomain,
                                source_range=SourceRange(
                                    start=domain.source_range.start,
                                    end=codomain.source_range.end))
コード例 #5
0
ファイル: parser.py プロジェクト: mamba-lang/mamba
    def parse_union_type(self) -> ast.Node:
        # If the current token is a left parenthesis, we can't already know whether it encloses a
        # single type of a union, or if it encloses the union type itself. In other words, are we
        # parsing `(T1 | T2)` or `(T1) | T2`?
        if self.peek().kind == TokenKind.lparen:
            # If parsing a parenthesized union type succeeds, then we use the result as the "first"
            # type of the union, in case the next consumable token is `|`. Otherwise, we retry
            # parsing the parenthesis as part of an individual.
            union = self.attempt(
                lambda: self.parse_parenthesized(self.parse_union_type))
            types = [union] if union is not None else [self.parse_type()]
        else:
            types = [self.parse_type()]

        # Attempt to parse other types, separated by `|`.
        while True:
            backtrack = self.stream_position
            self.consume_newlines()
            if self.consume(TokenKind.or_) is None:
                self.rewind_to(backtrack)
                break
            self.consume_newlines()
            types.append(self.parse_type())

        if len(types) > 1:
            return ast.UnionType(types=types,
                                 source_range=SourceRange(
                                     start=types[0].source_range.start,
                                     end=types[-1].source_range.start))
        else:
            return types[0]
コード例 #6
0
ファイル: parser.py プロジェクト: mamba-lang/mamba
    def parse_type_declaration(self) -> ast.TypeDeclaration:
        # Parse the `type` keyword.
        start_token = self.consume(TokenKind.type)
        if start_token is None:
            raise self.unexpected_token(expected='type')

        # Parse the name of the type.
        name_token = self.consume(TokenKind.identifier)
        if name_token is None:
            raise self.expected_identifier()

        # Parse the optional placeholders.
        self.consume_newlines()
        if self.consume(TokenKind.lbracket) is not None:
            placeholders = self.parse_sequence(TokenKind.rbracket,
                                               self.parse_placeholder)
            if self.consume(TokenKind.rbracket) is None:
                raise self.unexpected_token(expected=']')
        else:
            placeholders = []

        # Parse the binding operator.
        self.consume_newlines()
        if self.consume(TokenKind.bind) is None:
            raise self.unexpected_token(expected='=')

        # Parse the body of the type.
        body = self.parse_union_type()

        return ast.TypeDeclaration(name=name_token.value,
                                   placeholders=placeholders,
                                   body=body,
                                   source_range=SourceRange(
                                       start=start_token.source_range.start,
                                       end=body.source_range.end))
コード例 #7
0
ファイル: parser.py プロジェクト: mamba-lang/mamba
    def parse_if_expression(self) -> ast.IfExpression:
        # Parse the `if` keyword.
        if_token = self.consume(TokenKind.if_)
        if if_token is None:
            raise self.unexpected_token(expected='if')

        # Parse the condition of the expression.
        self.consume_newlines()
        condition = self.parse_expression()

        # Parse the `then` keyword.
        self.consume_newlines()
        if self.consume(TokenKind.then) is None:
            raise self.unexpected_token(expected='then')

        # Parse the "then" expression.
        self.consume_newlines()
        then = self.parse_expression()

        # Parse the `else` keyword.
        self.consume_newlines()
        if self.consume(TokenKind.else_) is None:
            raise self.unexpected_token(expected='else')

        # Parse the "then" expression.
        self.consume_newlines()
        else_ = self.parse_expression()

        return ast.IfExpression(condition=condition,
                                then=then,
                                else_=else_,
                                source_range=SourceRange(
                                    start=if_token.source_range.start,
                                    end=else_.source_range.end))
コード例 #8
0
ファイル: parser.py プロジェクト: mamba-lang/mamba
    def parse_match_case(self) -> ast.Node:
        start_token = self.peek()

        # Parse a when case.
        if start_token.kind == TokenKind.when:
            # Parse the case pattern.
            self.consume()
            self.consume_newlines()
            pattern = self.parse_expression()

            # Parse the `then` keyword.
            self.consume_newlines()
            if self.consume(TokenKind.then) is None:
                raise self.unexpected_token(expected='then')

            # Parse the case body.
            self.consume_newlines()
            body = self.parse_expression()

            return ast.WhenCase(pattern=pattern,
                                body=body,
                                source_range=SourceRange(
                                    start=start_token.source_range.start,
                                    end=body.source_range.end))

        # Parse an else case.
        if start_token.kind == TokenKind.else_:
            # Parse the case body.
            self.consume()
            self.consume_newlines()
            body = self.parse_expression()

            return ast.ElseCase(body=body,
                                source_range=SourceRange(
                                    start=start_token.source_range.start,
                                    end=body.source_range.end))

        raise self.unexpected_token(expected='when')
コード例 #9
0
ファイル: parser.py プロジェクト: mamba-lang/mamba
    def parse_closure_expression(self) -> ast.ClosureExpression:
        start_token = self.peek()

        # Parse the domain definition.
        if start_token.kind == TokenKind.underscore:
            self.consume()
            domain = ast.Nothing(source_range=self.consume().source_range)
        else:
            # Attempt to parse an object property (i.e. the syntactic sugar for singletons).
            prop = self.attempt(self.parse_object_type_property)
            if prop is not None:
                domain = ast.ObjectType(properties=[prop],
                                        source_range=prop.source_range)
            else:
                domain = self.parse_type()

        # Parse the optional codomain.
        self.consume_newlines()
        if self.consume(TokenKind.arrow) is not None:
            self.consume_newlines()
            if self.peek().kind == TokenKind.underscore:
                codomain = ast.Nothing(
                    source_range=self.consume().source_range)
            else:
                # Attempt to parse an object property (i.e. the syntactic sugar for singletons).
                prop = self.attempt(self.parse_object_type_property)
                if prop is not None:
                    codomain = ast.ObjectType(properties=[prop],
                                              source_range=prop.source_range)
                else:
                    codomain = self.parse_type()
        else:
            codomain = None

        # Parse the bold arrow operator.
        self.consume_newlines()
        if self.consume(TokenKind.bold_arrow) is None:
            raise self.unexpected_token(expected='=>')

        # Parse the body of the function.
        body = self.parse_expression()

        return ast.ClosureExpression(domain=domain,
                                     codomain=codomain,
                                     body=body,
                                     source_range=SourceRange(
                                         start=start_token.source_range.start,
                                         end=body.source_range.end))
コード例 #10
0
ファイル: parser.py プロジェクト: mamba-lang/mamba
    def parse_function_declaration(self) -> ast.FunctionDeclaration:
        # Parse the `func` keyword.
        start_token = self.consume(TokenKind.func)
        if start_token is None:
            raise self.unexpected_token(expected='func')

        # Parse the name of the function.
        name_token = self.consume(TokenKind.identifier) or self.consume(
            TokenKind.operator)
        if name_token is None:
            raise self.expected_identifier()

        # Parse the optional placeholders.
        self.consume_newlines()
        if self.consume(TokenKind.lbracket) is not None:
            placeholders = self.parse_sequence(TokenKind.rbracket,
                                               self.parse_placeholder)
            if self.consume(TokenKind.rbracket) is None:
                raise self.unexpected_token(expected=']')
        else:
            placeholders = []

        # Parse the type of the function.
        self.consume_newlines()
        function_type = self.parse_type()
        while isinstance(function_type, ast.ParenthesizedNode):
            function_type = function_type.node
        if not isinstance(function_type, ast.FunctionType):
            raise exc.ParseError(
                source_range=function_type.source_range,
                message=f"'{function_type}' is not a function signature")

        # Parse the binding operator.
        self.consume_newlines()
        if self.consume(TokenKind.bind) is None:
            raise self.unexpected_token(expected='=')

        # Parse the body of the function.
        body = self.parse_expression()

        return ast.FunctionDeclaration(
            name=name_token.value,
            placeholders=placeholders,
            domain=function_type.domain,
            codomain=function_type.codomain,
            body=body,
            source_range=SourceRange(start=start_token.source_range.start,
                                     end=body.source_range.end))
コード例 #11
0
ファイル: parser.py プロジェクト: mamba-lang/mamba
    def parse_list_literal(self) -> ast.ListLiteral:
        # Parse a left bracket.
        start_token = self.consume(TokenKind.lbracket)
        if start_token is None:
            raise self.unexpected_token(expected='[')

        # Parse the items of the list.
        items = self.parse_sequence(TokenKind.rbracket, self.parse_expression)
        end_token = self.consume(TokenKind.rbracket)
        if end_token is None:
            raise self.unexpected_token(expected=']')

        return ast.ListLiteral(items=items,
                               source_range=SourceRange(
                                   start=start_token.source_range.start,
                                   end=end_token.source_range.end))
コード例 #12
0
ファイル: parser.py プロジェクト: mamba-lang/mamba
    def parse_parenthesized(self, parser: callable) -> ast.ParenthesizedNode:
        start_token = self.consume(TokenKind.lparen)
        if start_token is None:
            raise self.unexpected_token(expected='(')

        self.consume_newlines()
        enclosed = parser()
        self.consume_newlines()
        end_token = self.consume(TokenKind.rparen)
        if end_token is None:
            raise exc.ImbalancedParenthesis(
                source_range=self.peek().source_range)

        return ast.ParenthesizedNode(node=enclosed,
                                     source_range=SourceRange(
                                         start=start_token.source_range.start,
                                         end=end_token.source_range.end))
コード例 #13
0
ファイル: parser.py プロジェクト: mamba-lang/mamba
    def parse_object_type(self) -> ast.Node:
        # Parse a left brace.
        start_token = self.consume(TokenKind.lbrace)
        if start_token is None:
            raise self.unexpected_token(expected='{')

        # Parse the key/value pairs of the type.
        properties = self.parse_sequence(TokenKind.rbrace,
                                         self.parse_object_type_property)
        end_token = self.consume(TokenKind.rbrace)
        if end_token is None:
            raise self.unexpected_token(expected='}')

        return ast.ObjectType(properties=properties,
                              source_range=SourceRange(
                                  start=start_token.source_range.start,
                                  end=end_token.source_range.end))
コード例 #14
0
ファイル: parser.py プロジェクト: mamba-lang/mamba
    def parse_object_literal_property(self) -> ast.ObjectLiteralProperty:
        # Parse the name of the item.
        key = self.parse_property_key()

        # Parse the binding operator.
        self.consume_newlines()
        if self.consume(TokenKind.bind) is None:
            raise self.unexpected_token(expected='=')

        # Parse the value of the item.
        value = self.parse_expression()

        return ast.ObjectLiteralProperty(key=key,
                                         value=value,
                                         source_range=SourceRange(
                                             start=key.source_range.start,
                                             end=value.source_range.end))
コード例 #15
0
ファイル: parser.py プロジェクト: mamba-lang/mamba
    def parse_object_literal(self) -> ast.ObjectLiteral:
        # Parse a left brace.
        start_token = self.consume(TokenKind.lbrace)
        if start_token is None:
            raise self.unexpected_token(expected='{')

        # Parse the items of the object.
        properties = self.parse_sequence(TokenKind.rbrace,
                                         self.parse_object_literal_property)
        end_token = self.consume(TokenKind.rbrace)
        if end_token is None:
            raise self.unexpected_token(expected='}')

        return ast.ObjectLiteral(properties=properties,
                                 source_range=SourceRange(
                                     start=start_token.source_range.start,
                                     end=end_token.source_range.end))
コード例 #16
0
ファイル: parser.py プロジェクト: mamba-lang/mamba
    def parse_identifier(self) -> ast.Node:
        # Parse the identifier's name.
        identifier_token = self.consume(TokenKind.identifier)
        if identifier_token is None:
            raise self.expected_identifier()

        # Parse the optional specializers.
        backtrack = self.stream_position
        self.consume_newlines()
        if self.consume(TokenKind.lbracket):
            # We first try to parse a single annotation, with no label, so as to handle the
            # syntactic sugar consisting of omitting labels for for generic types with only a
            # single placeholder.
            self.consume_newlines()
            sugar_backtrack = self.stream_position
            try:
                specializers = {'_0': self.parse_type()}
                self.consume_newlines()
                end_token = self.consume(TokenKind.rbracket)
                if end_token is None:
                    raise self.unexpected_token(expected=']')
            except:
                self.rewind_to(sugar_backtrack)
                pairs = self.parse_sequence(TokenKind.rbracket,
                                            self.parse_specializer)
                specializers = {}
                for (name_token, value) in pairs:
                    if name_token.value in specializers:
                        raise exc.DuplicateKey(key=name_token)
                    specializers[name_token.value] = value
                end_token = self.consume(TokenKind.rbracket)
                if end_token is None:
                    raise self.unexpected_token(expected=']')
            end = end_token.source_range.end
        else:
            self.rewind_to(backtrack)
            specializers = None
            end = identifier_token.source_range.end

        return ast.Identifier(name=identifier_token.value,
                              specializers=specializers,
                              source_range=SourceRange(
                                  start=identifier_token.source_range.start,
                                  end=end))
コード例 #17
0
ファイル: parser.py プロジェクト: mamba-lang/mamba
    def parse_prefix_expression(self) -> ast.PrefixExpression:
        # Parse the operator of the expression.
        start_token = self.consume()
        if (start_token is None) or (start_token.value
                                     not in self.prefix_operators):
            raise self.unexpected_token(expected='prefix operator')

        operator_identifier = ast.Identifier(
            name=start_token.value,
            specializers=None,
            source_range=start_token.source_range)

        # Parse the operand of the expression.
        operand = self.parse_expression()
        return ast.PrefixExpression(
            operator=operator_identifier,
            operand=operand,
            source_range=SourceRange(
                start=operator_identifier.source_range.start,
                end=operand.source_range.end))
コード例 #18
0
ファイル: parser.py プロジェクト: mamba-lang/mamba
    def parse_object_type_property(self) -> ast.Node:
        # Parse the name of the property.
        name_token = self.consume()
        if (name_token is None) or (name_token.kind != TokenKind.identifier):
            raise self.expected_identifier()
        name = name_token.value

        # Parse the optional annotation of the property.
        backtrack = self.stream_position
        self.consume_newlines()
        if self.consume(TokenKind.colon) is not None:
            annotation = self.parse_type()
            end = annotation.source_range.end
        else:
            self.rewind_to(backtrack)
            annotation = None
            end = name_token.source_range.end

        return ast.ObjectTypeProperty(name=name,
                                      annotation=annotation,
                                      source_range=SourceRange(
                                          start=name_token.source_range.start,
                                          end=end))
コード例 #19
0
ファイル: parser.py プロジェクト: mamba-lang/mamba
    def parse_atom(self) -> ast.Node:
        start_token = self.peek()

        if start_token.kind == TokenKind.lparen:
            atom = self.parse_parenthesized(self.parse_expression)

        elif start_token.kind in scalar_literal_kinds:
            token = self.consume()
            atom = ast.ScalarLiteral(value=token.value,
                                     source_range=token.source_range)
        elif start_token.kind == TokenKind.argref:
            token = self.consume()
            atom = ast.ArgRef(source_range=token.source_range)
        elif start_token.kind == TokenKind.identifier:
            atom = self.parse_identifier()
        elif start_token.kind == TokenKind.lbracket:
            atom = self.parse_list_literal()
        elif start_token.kind == TokenKind.lbrace:
            atom = self.parse_object_literal()
        elif start_token.kind == TokenKind.if_:
            atom = self.parse_if_expression()
        elif start_token.kind == TokenKind.match:
            atom = self.parse_match_expression()
        elif (start_token.kind
              == TokenKind.operator) and (start_token.value
                                          in self.prefix_operators):
            atom = self.parse_prefix_expression()
        else:
            raise self.unexpected_token(expected='expression')

        # Parse the optional "suffix" of the expression.
        while True:
            backtrack = self.stream_position
            self.consume_newlines()

            # If we can parse an object, we interpret it as a call expression.
            try:
                argument = self.attempt(self.parse_object_literal)
                if argument is None:
                    value = self.parse_expression()

                    # Operators that can act as both an infix and a prefix or postfix operator
                    # introduce some ambuiguity, as to how an expression like `a + b` should be
                    # parsed. The most intuitive way to interpret this expression is arguably to
                    # see `+` as an infix operator, but one may also see this as the application of
                    # `a` to the expression `+b` (i.e. `a { _0 = +b }`), or the application of `a+`
                    # the expression `b` (i.e. `a+ { _0 = b }`).
                    # We choose to desambiguise this situation by prioritizing infix expressions.
                    if isinstance(value, ast.PrefixExpression):
                        if value.operator.name in self.infix_operators:
                            self.rewind_to(backtrack)
                            break

                    # If the value is the argument reference (i.e. `$`), we use it as an object
                    # literal so that calls of the form `f $` aren't reduced to `f { _0 = $ }`.
                    if isinstance(value, ast.ArgRef):
                        argument = value
                    else:
                        key = ast.ScalarLiteral(
                            value='_0',
                            source_range=SourceRange(
                                start=self.peek().source_range.start))

                        prop = ast.ObjectLiteralProperty(
                            key=key,
                            value=value,
                            source_range=SourceRange(
                                start=key.source_range.start,
                                end=value.source_range.end))
                        argument = ast.ObjectLiteral(
                            properties=[prop], source_range=prop.source_range)

                atom = ast.CallExpression(callee=atom,
                                          argument=argument,
                                          source_range=SourceRange(
                                              start=atom.source_range.start,
                                              end=argument.source_range.end))
                continue

            except:
                self.rewind_to(backtrack)
                self.consume_newlines()

            suffix_token = self.peek()

            # An underscore corresponds to a call to a function without any argument.
            if suffix_token == TokenKind.underscore:
                end_token = self.consume()
                atom = ast.CallExpression(callee=atom,
                                          argument=None,
                                          source_range=SourceRange(
                                              start=atom.source_range.start,
                                              end=end_token.source_range.end))
                continue

            # If we can parse a postfix operator, we interpret it as a postfix expression.
            if suffix_token.kind == TokenKind.operator and (
                    suffix_token.value in self.postfix_operators):
                operator = self.consume()

                # Backtrack if the operator is also infix and the remainder of the stream can be
                # parsed as an expression.
                if operator.value in self.infix_operators:
                    if self.attempt(self.parse_expression) is not None:
                        self.rewind_to(backtrack)
                        break

                operator_identifier = ast.Identifier(
                    name=operator.value,
                    specializers=None,
                    source_range=operator.source_range)
                atom = ast.PostfixExpression(
                    operator=operator_identifier,
                    operand=atom,
                    source_range=SourceRange(
                        start=operator_identifier.source_range.start,
                        end=atom.source_range.end))
                continue

            self.rewind_to(backtrack)
            break

        return atom
コード例 #20
0
ファイル: parser.py プロジェクト: mamba-lang/mamba
    def parse_expression(self) -> ast.Node:
        # Attempt to parse a binding.
        if self.peek().kind == TokenKind.let:
            return self.parse_binding()

        # Attempt to parse a term.
        left = self.attempt(self.parse_closure_expression) or self.parse_atom()

        # Attempt to parse the remainder of an infix expression.
        while True:
            backtrack = self.stream_position
            self.consume_newlines()
            operator = self.consume(TokenKind.operator)
            if operator is None:
                self.rewind_to(backtrack)
                break
            if operator.value not in self.infix_operators:
                raise exc.UnknownOperator(operator=operator)

            # The infix operators `.`, `?` or `!` represent attribute retrieval expressions. Just
            # like object keys, an identifier with no specializers on the right operand of an
            # attribute retrieval expression is interpreted as a character string by default.
            # Hence, we need to treat this syntactic sugar case here.
            if operator.value in {'.', '?', '!'}:
                right = self.parse_property_key()
            else:
                right = self.parse_atom()

            operator_identifier = ast.Identifier(
                name=operator.value,
                specializers=None,
                source_range=operator.source_range)

            # If the left operand is an infix expression, we should check the precedence and
            # associativity of its operator against the current one.
            if isinstance(left, ast.InfixExpression):
                lprec = self.infix_operators[left.operator.name]['precedence']
                rprec = self.infix_operators[operator.value]['precedence']
                associativity = self.infix_operators[
                    left.operator.name]['associativity']

                if ((lprec < rprec)
                        or ((left.operator.name == operator.value) and
                            (associativity == 'right'))):

                    new_right = ast.InfixExpression(
                        operator=operator_identifier,
                        left=left.right,
                        right=right,
                        source_range=SourceRange(
                            start=left.right.source_range.start,
                            end=right.source_range.end))
                    left = ast.InfixExpression(
                        operator=left.operator,
                        left=left.left,
                        right=new_right,
                        source_range=SourceRange(
                            start=left.left.source_range.start,
                            end=right.source_range.end))
                    continue

            left = ast.InfixExpression(operator=operator_identifier,
                                       left=left,
                                       right=right,
                                       source_range=SourceRange(
                                           start=left.source_range.start,
                                           end=right.source_range.end))
            continue

        return left