def __init__(
        self,
        lexer: Optional[IPythonDiceLexer] = None,
        probability_distribution_factory: Optional[
            IProbabilityDistributionFactory] = None,
        probability_distribution_state_factory: Optional[
            IProbabilityDistributionStateFactory] = None,
    ):
        if lexer is None:
            lexer = PythonDiceLexer()
        if probability_distribution_factory is None:
            probability_distribution_factory = ProbabilityDistributionFactory()
        if probability_distribution_state_factory is None:
            probability_distribution_state_factory = ProbabilityDistributionStateFactory(
            )
        self._lexer = lexer
        parser_generator = rply.ParserGenerator(
            [syntax.get_token_name() for syntax in LEXER_SYNTAX],
            precedence=PRECEDENCE,
        )

        for syntax in PARSER_EXPRESSIONS:
            syntax.add_production_function(parser_generator,
                                           probability_distribution_factory)

        #  pylint: disable=unused-variable
        @parser_generator.error
        def error_handler(_: ProbabilityDistributionState,
                          token: rply.token.Token):
            raise ValueError(
                f"Ran into a {token.gettokentype()} ({token.getstr()}) where it wasn't expected, at position {token.getsourcepos()}."
            )

        self._parser = parser_generator.build()
        self._probability_distribution_state_factory = probability_distribution_state_factory
Beispiel #2
0
def get_parser():
    pg = rply.ParserGenerator(TOKENS,
                              precedence=[
                                  ('left', ['AND']),
                                  ('left', ['OR']),
                                  ('left',
                                   ['EQ', 'LT', 'GT', 'IN', 'NE', 'LE', 'GE']),
                                  ('left', ['PLUS', 'MINUS']),
                                  ('left', ['TRUEDIV', 'STAR']),
                                  ('left', ['DOT']),
                                  ('left', ['LEFT_PAREN']),
                              ])
    pg.error(errorhandler)

    @pg.production('program : body')
    def program_body(state, p):
        element_list = p[0].get_element_list()
        for elem in element_list:
            if (isinstance(elem, ast.VarDeclaration)
                    or isinstance(elem, ast.VarDeclarationConstant)):
                raise errorhandler(state, elem,
                                   'var declarations in body disallowed')
        return ast.Program(element_list, srcpos=sr(element_list))

    @pg.production('body :')
    def body_empty(state, p):
        return ast.FunctionBody(None, None)

    @pg.production('body : body body_element')
    def body_body_element(state, p):
        return ast.FunctionBody(p[1], p[0])

    @pg.production('body_element : function')
    def body_function(state, p):
        return p[0]

    @pg.production('body_element : class_definition')
    def body_element_class_definition(state, p):
        return p[0]

    @pg.production('body_element : SEMICOLON')
    def body_element_semicolon(state, p):
        return None

    @pg.production('body_element : global_var_declaration')
    def body_element_var_decl(state, p):
        return p[0]

    @pg.production('body_element : IMPORT IDENTIFIER dot_identifier_list'
                   ' optional_import SEMICOLON')
    def body_element_import_statement(state, p):
        basename = p[1].getstr()
        if p[2] is None and p[3] is None:
            return ast.Import([basename], [], srcpos=sr(p))
        if p[3] is None:
            names = p[2].get_names()
            return ast.Import([basename] + names[:-1], [names[-1]],
                              srcpos=sr(p))
        if p[2] is None:
            return ast.Import([basename], p[3].get_names(), srcpos=sr(p))
        return ast.Import([basename] + p[2].get_names(),
                          p[3].get_names(),
                          srcpos=sr(p))

    @pg.production('optional_import :')
    def optional_import_empty(state, p):
        return None

    @pg.production('optional_import : '
                   'LEFT_CURLY_BRACE IDENTIFIER identifier_list '
                   'RIGHT_CURLY_BRACE')
    def optional_import_brace(state, p):
        return ast.IdentifierListPartial(p[1].getstr(), p[2])

    @pg.production('class_definition : CLASS IDENTIFIER LEFT_CURLY_BRACE body '
                   'RIGHT_CURLY_BRACE')
    def class_definition(state, p):
        return ast.ClassDefinition(p[1].getstr(), p[3], srcpos=sr(p))

    @pg.production('class_definition : CLASS IDENTIFIER LEFT_PAREN IDENTIFIER '
                   'RIGHT_PAREN LEFT_CURLY_BRACE body RIGHT_CURLY_BRACE')
    def class_definition_inheritance(state, p):
        return ast.ClassDefinition(p[1].getstr(),
                                   p[6],
                                   p[3].getstr(),
                                   srcpos=sr(p))

    @pg.production('function : DEF IDENTIFIER arglist LEFT_CURLY_BRACE'
                   ' function_body RIGHT_CURLY_BRACE')
    def function_function_body(state, p):
        lineno = p[0].getsourcepos().lineno
        return ast.Function(p[1].getstr(),
                            p[2].get_vars(),
                            p[4].get_element_list(),
                            lineno,
                            srcpos=sr(p))

    @pg.production('function_body :')
    def function_body_empty(state, p):
        return ast.FunctionBody(None, None)

    @pg.production('function_body : function_body statement')
    def function_body_statement(state, p):
        return ast.FunctionBody(p[1], p[0])

    @pg.production('statement : expression SEMICOLON')
    def statement_expression(state, p):
        return ast.Statement(p[0], srcpos=sr(p))

    @pg.production('statement : SEMICOLON')
    def staement_empty(state, p):
        return None

    @pg.production('statement : var_declaration')
    def statement_var_decl(state, p):
        return p[0]

    @pg.production('global_var_declaration : LET IDENTIFIER type_decl '
                   'arg_decl SEMICOLON')
    @pg.production(
        'global_var_declaration : LET IDENTIFIER ASSIGN constant_val '
        'type_decl arg_decl SEMICOLON')
    def global_var_declaration(state, p):
        if len(p) == 5:
            vars = [ast.Var(p[1].getstr(), p[2], None, srcpos=sr([p[1], p[2]]))] + \
                p[3].get_vars()
        else:
            vars = [ast.Var(p[1].getstr(), p[4], p[3], srcpos=sr([p[1], p[2], p[3]]))] + \
                p[5].get_vars()
        return ast.VarDeclarationConstant(vars, srcpos=sr(p))

    @pg.production('var_declaration : LET IDENTIFIER type_decl var_decl '
                   'SEMICOLON')
    @pg.production('var_declaration : LET IDENTIFIER ASSIGN expression '
                   'type_decl var_decl SEMICOLON')
    def var_declaration(state, p):
        if len(p) == 5:
            vars = [ast.Var(p[1].getstr(), p[2], None, srcpos=sr([p[1], p[2]]))] + \
                p[3].get_vars()
        else:
            vars = [ast.Var(p[1].getstr(), p[4], p[3], srcpos=sr([p[1], p[2], p[3]]))] + \
                p[5].get_vars()
        return ast.VarDeclaration(vars, srcpos=sr(p))

    @pg.production('arg_decl : ')
    @pg.production('var_decl : ')
    def arg_decl_empty(state, p):
        return ast.VarDeclPartial(None, None, None, None, srcpos=(0, 0))

    @pg.production('arg_decl : COMMA IDENTIFIER type_decl arg_decl')
    @pg.production('arg_decl : COMMA IDENTIFIER ASSIGN constant_val type_decl '
                   'arg_decl')
    @pg.production('var_decl : COMMA IDENTIFIER type_decl var_decl')
    @pg.production('var_decl : COMMA IDENTIFIER ASSIGN expression type_decl '
                   'var_decl')
    def arg_decl_identifier(state, p):
        if len(p) == 4:
            return ast.VarDeclPartial(p[1].getstr(),
                                      p[2],
                                      None,
                                      p[3],
                                      srcpos=sr(p))
        else:
            return ast.VarDeclPartial(p[1].getstr(),
                                      p[4],
                                      p[3],
                                      p[5],
                                      srcpos=sr(p))

    @pg.production('type_decl : ')
    def type_decl_empty(state, p):
        return ast.NoTypeDecl(srcpos=(0, 0))

    @pg.production('type_decl : COLON IDENTIFIER')
    def type_decl_non_empty(state, p):
        return ast.BaseTypeDecl(p[1].getstr(), srcpos=sr([p[1]]))

    @pg.production('statement : IDENTIFIER ASSIGN expression SEMICOLON')
    def statement_identifier_assign_expr(state, p):
        return ast.Assignment(p[0].getstr(), p[2], srcpos=sr(p))

    @pg.production(
        'statement : atom DOT IDENTIFIER ASSIGN expression SEMICOLON')
    def statement_setattr(state, p):
        return ast.Setattr(p[0], p[2].getstr(), p[4], srcpos=sr(p))

    @pg.production(
        'statement : atom LEFT_SQUARE_BRACKET expression RIGHT_SQUARE_BRACKET'
        ' ASSIGN expression SEMICOLON')
    def statement_setitem(state, p):
        return ast.Setitem(p[0], p[2], p[5], srcpos=sr(p))

    @pg.production('statement : RETURN expression SEMICOLON')
    def statement_return(state, p):
        return ast.Return(p[1], srcpos=sr(p))

    @pg.production(
        'statement : WHILE expression LEFT_CURLY_BRACE function_body'
        ' RIGHT_CURLY_BRACE')
    def statement_while_loop(state, p):
        return ast.While(p[1], p[3].get_element_list(), srcpos=sr(p))

    @pg.production('statement : FOR IDENTIFIER IN expression LEFT_CURLY_BRACE '
                   'function_body RIGHT_CURLY_BRACE')
    def statement_for_loop(state, p):
        return ast.For(p[1].getstr(),
                       p[3],
                       p[5].get_element_list(),
                       srcpos=sr(p))

    @pg.production('statement : IF expression LEFT_CURLY_BRACE function_body'
                   ' RIGHT_CURLY_BRACE optional_else_block')
    def statement_if_block(state, p):
        if p[5] is None:
            else_block = None
        else:
            else_block = p[5].get_element_list()
        return ast.If(p[1],
                      p[3].get_element_list(),
                      else_block,
                      srcpos=sr([p[0], p[1]]))

    @pg.production('optional_else_block : ')
    def optional_else_block_empty(state, p):
        return None

    @pg.production('optional_else_block : ELSE LEFT_CURLY_BRACE function_body '
                   'RIGHT_CURLY_BRACE')
    def optional_else_block(state, p):
        return p[2]

    @pg.production('statement : RAISE expression SEMICOLON')
    def statement_raise(state, p):
        return ast.Raise(p[1], srcpos=sr(p))

    @pg.production('statement : TRY LEFT_CURLY_BRACE function_body '
                   'RIGHT_CURLY_BRACE except_finally_clauses')
    def statement_try_except(state, p):
        if p[4] is None:
            errorhandler(state, p[0])
        lst = p[4].gather_list()
        return ast.TryExcept(p[2].get_element_list(),
                             lst,
                             srcpos=sr([p[0], lst[-1]]))

    @pg.production('except_finally_clauses : ')
    def except_finally_clases_empty(state, p):
        return None

    @pg.production(
        'except_finally_clauses : EXCEPT expression LEFT_CURLY_BRACE'
        ' function_body RIGHT_CURLY_BRACE except_finally_clauses')
    def except_finally_clauses_except(state, p):
        # We want the position information for the clause, not the list.
        return ast.ExceptClauseList([p[1]],
                                    None,
                                    p[3].get_element_list(),
                                    p[5],
                                    srcpos=sr(p[:-1]))

    @pg.production('except_finally_clauses : EXCEPT expression AS IDENTIFIER '
                   'LEFT_CURLY_BRACE function_body RIGHT_CURLY_BRACE '
                   'except_finally_clauses')
    def except_finally_clauses_except_as_identifier(state, p):
        # We want the position information for the clause, not the list.
        return ast.ExceptClauseList([p[1]],
                                    p[3].getstr(),
                                    p[5].get_element_list(),
                                    p[7],
                                    srcpos=sr(p[:-1]))

    @pg.production('except_finally_clauses : FINALLY LEFT_CURLY_BRACE '
                   'function_body RIGHT_CURLY_BRACE')
    def except_finally_clauses_finally(state, p):
        return ast.FinallyClause(p[2].get_element_list(), False, srcpos=sr(p))

    @pg.production('except_finally_clauses : ELSE LEFT_CURLY_BRACE '
                   'function_body RIGHT_CURLY_BRACE')
    def except_finally_clause_else(state, p):
        return ast.FinallyClause(p[2].get_element_list(), True, srcpos=sr(p))

    @pg.production('identifier_list : COMMA IDENTIFIER identifier_list')
    def identifier_list_arglist(state, p):
        return ast.IdentifierListPartial(p[1].getstr(), p[2])

    @pg.production('identifier_list :')
    def rest_of_identifier_list_empty(state, p):
        return None

    @pg.production('dot_identifier_list : DOT IDENTIFIER dot_identifier_list')
    def dot_identifier_list_arglist(state, p):
        return ast.IdentifierListPartial(p[1].getstr(), p[2])

    @pg.production('dot_identifier_list :')
    def rest_of_dot_identifier_list_empty(state, p):
        return None

    @pg.production('arglist : LEFT_PAREN RIGHT_PAREN')
    def arglist(state, p):
        return ast.ArgList([], srcpos=sr(p))

    @pg.production('arglist : LEFT_PAREN IDENTIFIER type_decl arg_decl'
                   ' RIGHT_PAREN')
    @pg.production('arglist : LEFT_PAREN IDENTIFIER ASSIGN constant_val '
                   'type_decl arg_decl RIGHT_PAREN')
    def arglist_non_empty(state, p):
        if len(p) == 5:
            vars = ([ast.Var(p[1].getstr(), p[2], None, p[1].getsrcpos())] +
                    p[3].get_vars())
        else:
            vars = ([ast.Var(p[1].getstr(), p[4], p[3], p[1].getsrcpos())] +
                    p[5].get_vars())
        return ast.ArgList(vars, srcpos=sr(p))

    @pg.production('constant_val : INTEGER')
    def constant_val_int(state, p):
        return ast.Number(int(p[0].getstr()), srcpos=sr(p))

    @pg.production('constant_val : string')
    def constant_val_str(state, p):
        return p[0]

    @pg.production('expression : INTEGER')
    def expression_number(state, p):
        return ast.Number(int(p[0].getstr()), srcpos=sr(p))

    @pg.production('expression : string')
    def expression_string(state, p):
        return p[0]

    @pg.production('string : ST_DQ_STRING stringcontent ST_ENDSTRING')
    @pg.production('string : ST_SQ_STRING stringcontent ST_ENDSTRING')
    @pg.production('string : ST_RAW_DQ_STRING rawstringcontent ST_ENDRAW')
    @pg.production('string : ST_RAW_SQ_STRING rawstringcontent ST_ENDRAW')
    def expression_string_expand(state, p):
        val = ''.join(p[1].get_strparts())
        try:
            str_decode_utf_8(val, len(val), 'strict', final=True)
        except UnicodeDecodeError:
            raise errorhandler(state, p[1], msg="Unicode error")
        return ast.String(val, srcpos=sr(p))

    @pg.production('expression : ST_INTERP_STRING interpstr ST_ENDSTRING')
    def expression_interpstring(state, p):
        strings = p[1].get_strings()
        return ast.InterpString(strings, p[1].get_exprs(), srcpos=sr(p))

    @pg.production('expression : atom')
    def expression_atom(state, p):
        return p[0]

    @pg.production('expression : expression OR expression')
    def expression_or_expression(state, p):
        return ast.Or(p[0], p[2], srcpos=sr(p))

    @pg.production('expression : expression AND expression')
    def expression_and_expression(state, p):
        return ast.And(p[0], p[2], srcpos=sr(p))

    @pg.production('interpstr : stringcontent')
    def interpstr_start(state, p):
        val = ''.join(p[0].get_strparts())
        str_decode_utf_8(val, len(val), 'strict', final=True)
        return ast.InterpStringContents([val], [])

    @pg.production(
        'interpstr : interpstr ST_INTERP expression RIGHT_CURLY_BRACE stringcontent'
    )
    def interpstr_part(state, p):
        val = ''.join(p[4].get_strparts())
        str_decode_utf_8(val, len(val), 'strict', final=True)
        return ast.InterpStringContents(p[0].get_strings() + [val],
                                        p[0].get_exprs() + [p[2]])

    @pg.production('stringcontent : ')
    def string_empty(state, p):
        return ast.StringContent([], srcpos=(0, 0))

    @pg.production('stringcontent : stringcontent ESC_QUOTE')
    def string_esc_quote(state, p):
        return ast.StringContent(p[0].get_strparts() + [p[1].getstr()[1]],
                                 srcpos=sr(p))

    @pg.production('stringcontent : stringcontent ESC_ESC')
    def string_esc_esc(state, p):
        return ast.StringContent(p[0].get_strparts() + ['\\'], srcpos=sr(p))

    @pg.production('stringcontent : stringcontent ESC_SIMPLE')
    def string_esc_simple(state, p):
        part = {
            'a': '\a',
            'b': '\b',
            'f': '\f',
            'n': '\n',
            'r': '\r',
            't': '\t',
            'v': '\v',
            '0': '\0',
            '$': '$',
        }[p[1].getstr()[1]]
        return ast.StringContent(p[0].get_strparts() + [part], srcpos=sr(p))

    @pg.production('stringcontent : stringcontent ESC_HEX_8')
    @pg.production('stringcontent : stringcontent ESC_HEX_16')
    def string_esc_hex_fixed(state, p):
        s = p[1].getstr()
        return ast.StringContent(p[0].get_strparts() +
                                 [hex_to_utf8(state, p[0], s[2:])],
                                 srcpos=sr(p))

    @pg.production('stringcontent : stringcontent ESC_HEX_ANY')
    def string_esc_hex_any(state, p):
        s = p[1].getstr()
        end = len(s) - 1
        assert end >= 0
        return ast.StringContent(p[0].get_strparts() +
                                 [hex_to_utf8(state, p[0], s[3:end])],
                                 srcpos=sr(p))

    @pg.production('stringcontent : stringcontent ESC_UNRECOGNISED')
    def string_esc_unrecognised(state, p):
        return ast.StringContent(p[0].get_strparts() + [p[1].getstr()],
                                 srcpos=sr(p))

    @pg.production('stringcontent : stringcontent CHAR')
    def string_char(state, p):
        return ast.StringContent(p[0].get_strparts() + [p[1].getstr()],
                                 srcpos=sr(p))

    @pg.production('rawstringcontent : ')
    def rawstring_empty(state, p):
        return ast.StringContent([], srcpos=(0, 0))

    @pg.production('rawstringcontent : rawstringcontent RAW_ESC')
    @pg.production('rawstringcontent : rawstringcontent RAW_CHAR')
    def rawstring_char(state, p):
        return ast.StringContent(p[0].get_strparts() + [p[1].getstr()],
                                 srcpos=sr(p))

    @pg.production('atom : TRUE')
    def atom_true(state, p):
        return ast.TrueNode(srcpos=sr(p))

    @pg.production('atom : NONE')
    def atom_none(state, p):
        return ast.NoneNode(srcpos=sr(p))

    @pg.production('atom : IDENTIFIER')
    def atom_identifier(state, p):
        return ast.Identifier(p[0].getstr(), srcpos=sr(p))

    @pg.production('atom : FALSE')
    def atom_false(state, p):
        return ast.FalseNode(srcpos=sr(p))

    @pg.production('atom : atom LEFT_PAREN call_args_list ' 'RIGHT_PAREN')
    def atom_call(state, p):
        arglist = p[2].get_element_list()
        rawargs = []
        namedargs = []
        now_named = False
        for arg in arglist:
            if isinstance(arg, ast.NamedArg):
                now_named = True
                namedargs.append(arg)
            else:
                if now_named:
                    errorhandler(state,
                                 p[2],
                                 msg="Named args before regular args")
                rawargs.append(arg)
        return ast.Call(p[0], rawargs[:], namedargs[:], srcpos=sr(p))

    @pg.production('call_args_list :')
    def call_args_list_empty(state, p):
        return ast.ExpressionListPartial(None, None)

    @pg.production('call_args_list : expression ASSIGN expression rest_of_args'
                   )
    def call_args_list_named_rest(state, p):
        id = p[0]
        if not isinstance(id, ast.Identifier):
            raise errorhandler(state,
                               p[0],
                               msg="Named argument is not an identifier")
        return ast.ExpressionListPartial(ast.NamedArg(id.name, p[2]), p[3])

    @pg.production('call_args_list : expression rest_of_args')
    def call_args_list_rest(state, p):
        return ast.ExpressionListPartial(p[0], p[1])

    @pg.production('rest_of_args : COMMA expression ASSIGN expression'
                   ' rest_of_args')
    def rest_of_args_named_arg(state, p):
        id = p[1]
        if not isinstance(id, ast.Identifier):
            raise errorhandler(state,
                               p[1],
                               msg="Named argument is not an identifier")
        return ast.ExpressionListPartial(
            ast.NamedArg(id.name, p[3], srcpos=sr([p[1], p[2], p[3]])), p[4])

    @pg.production('rest_of_args : COMMA expression rest_of_args')
    def rest_of_args_arg(state, p):
        return ast.ExpressionListPartial(p[1], p[2])

    @pg.production('rest_of_args :')
    def rest_of_args_empty(state, p):
        return ast.ExpressionListPartial(None, None)

    @pg.production('atom : LEFT_PAREN expression RIGHT_PAREN')
    def atom_paren_expression_paren(state, p):
        return p[1]

    @pg.production('atom : atom DOT IDENTIFIER')
    def atom_dot_identifier(state, p):
        return ast.Getattr(p[0], p[2].getstr(), srcpos=sr(p))

    @pg.production(
        'atom : LEFT_SQUARE_BRACKET expression_list RIGHT_SQUARE_BRACKET')
    def atom_list_literal(state, p):
        return ast.List(p[1].get_element_list(), srcpos=sr(p))

    @pg.production('atom : LEFT_CURLY_BRACE dict_pair_list RIGHT_CURLY_BRACE')
    def atom_dict_literal(state, p):
        return ast.Dict(p[1].get_element_list(), srcpos=sr(p))

    @pg.production(
        'atom : atom LEFT_SQUARE_BRACKET expression RIGHT_SQUARE_BRACKET')
    def atom_getitem(state, p):
        return ast.Getitem(p[0], p[2], srcpos=sr(p))

    @pg.production('expression : expression PLUS expression')
    @pg.production('expression : expression MINUS expression')
    @pg.production('expression : expression STAR expression')
    @pg.production('expression : expression TRUEDIV expression')
    @pg.production('expression : expression LT expression')
    @pg.production('expression : expression GT expression')
    @pg.production('expression : expression LE expression')
    @pg.production('expression : expression GE expression')
    @pg.production('expression : expression EQ expression')
    @pg.production('expression : expression IN expression')
    @pg.production('expression : expression NE expression')
    def expression_binop_expression(state, p):
        return ast.BinOp(p[1].getstr(), p[0], p[2], sr([p[1]]), srcpos=sr(p))

    @pg.production('expression : expression NOT IN expression')
    def expression_not_in_expression(state, p):
        return ast.BinOp('not in', p[0], p[3], sr([p[1], p[2]]), srcpos=sr(p))

    @pg.production('expression_list : ')
    def expression_list_empty(state, p):
        return ast.ExpressionListPartial(None, None)

    @pg.production('expression_list : expression expression_sublist')
    def expression_list_expression(state, p):
        return ast.ExpressionListPartial(p[0], p[1])

    @pg.production('expression_sublist : ')
    @pg.production('expression_sublist : COMMA')
    def expression_sublist_empty(state, p):
        return ast.ExpressionListPartial(None, None)

    @pg.production('expression_sublist : COMMA expression expression_sublist')
    def expression_sublist_expression(state, p):
        return ast.ExpressionListPartial(p[1], p[2])

    @pg.production('dict_pair_list : ')
    def dict_pair_list_empty(state, p):
        return ast.ExpressionListPartial(None, None)

    @pg.production(
        'dict_pair_list : expression COLON expression dict_pair_sublist')
    def dict_pair_list_expression(state, p):
        return ast.ExpressionListPartial(p[0],
                                         ast.ExpressionListPartial(p[2], p[3]))

    @pg.production('dict_pair_sublist : ')
    @pg.production('dict_pair_sublist : COMMA')
    def dict_pair_sublist_empty(state, p):
        return ast.ExpressionListPartial(None, None)

    @pg.production(
        'dict_pair_sublist : COMMA expression COLON expression dict_pair_sublist'
    )
    def dict_pair_sublist_expression(state, p):
        return ast.ExpressionListPartial(p[1],
                                         ast.ExpressionListPartial(p[3], p[4]))

    res = pg.build()
    if res.lr_table.sr_conflicts:
        raise Exception("shift reduce conflicts")
    return res
Beispiel #3
0
    ('PAREN_BRACE_LEFT', r'\{'),
    ('PAREN_BRACE_RIGHT', r'\}'),
]

lg = rply.LexerGenerator()

for token, rule in tokens:
    lg.add(token, rule)

lg.ignore(r'#.*(?=\r|\n|$)')
lg.ignore(r'//.*(?=\r|\n|$)')
lg.ignore(r'\s+')
lexer = lg.build()

pg = rply.ParserGenerator([token for token, _ in tokens],
                          precedence=[
                              ('left', ['ASTERISK']),
                          ])


@pg.production('expression : graphs')
def expression_paren(p):
    """Top-level"""
    return p[0]


@pg.production('graphs : graph SEMICOLON graphs')
def graphs_semicolon(p):
    """Graphs

    semicolon separated
    """
Beispiel #4
0
LEXER = lexer()

def lex(src):
    for t in LEXER.lex(src):
        if t.name == 'ID' and t.value in KEYWORDS:
            t.name = t.value.upper()
        elif t.name == 'LITERAL':
            t.value = t.value[1:-1]
        elif t.name == 'COMMENT':
            continue
        yield t

pg = rply.ParserGenerator([
    'AMP', 'CNAME', 'COMBINE', 'COMMA', 'DOCUMENTATION', 'EQUAL', 'ID',
    'LBRACE', 'LBRACKET', 'LPAREN', 'LIST', 'LITERAL', 'MINUS', 'MIXED',
    'PLUS', 'PIPE', 'QID', 'QMARK', 'RBRACE', 'RBRACKET', 'RPAREN', 'STAR',
    'TILDE',
] + [s.upper() for s in KEYWORDS], precedence=[("left", ['TILDE'])])


class Node(object):
    __slots__ = 'type', 'name', 'value'
    def __init__(self, type, name, value=None):
        self.type = type
        self.name = name
        self.value = value or []
        assert isinstance(self.value, list), self.value
    def __repr__(self):
        bits = [(k, getattr(self, k, None)) for k in self.__slots__]
        strs = ['%s=%r' % (k, v) for (k, v) in bits if v is not None]
        return 'Node(%s)' % ', '.join(strs)
Beispiel #5
0
import rply

from .lexer import lg
from .utils.parser import build_call, get_filter_func, get_lookup_name


pg = rply.ParserGenerator(
    [rule.name for rule in lg.rules],
    precedence=[
        ('left', ['COMMA']),
        ('right', ['ASSIGN']),
        ('left', ['PIPE']),
        ('left', ['AND', 'OR']),
        ('left', ['EQUAL', 'NEQUAL',
                  'LT', 'LTE', 'GT', 'GTE',
                  'IN', 'NOTIN',
                  'ISNOT', 'IS']),
        ('left', ['PLUS', 'MINUS']),
        ('left', ['MUL', 'DIV', 'MOD']),
        ('left', ['LSQB', 'RSQB']),
        ('left', ['DOT']),
        ('left', ['LPAREN', 'RPAREN']),
    ],
)

"""

arg     :   expr

arg_list    :   arg
            |   arg_list COMMA arg
Beispiel #6
0
pg = rply.ParserGenerator([
    'AMP',
    'AND',
    'ARROW',
    'AS',
    'ASGT',
    'BOOL',
    'BREAK',
    'CARET',
    'CLASS',
    'COLON',
    'COMMA',
    'CONTINUE',
    'DEDENT',
    'DEF',
    'DIV',
    'DOLLAR',
    'DOT',
    'ELIF',
    'ELSE',
    'EQ',
    'EXCEPT',
    'FOR',
    'FROM',
    'GE',
    'GT',
    'IADD',
    'IF',
    'IMPORT',
    'IN',
    'INDENT',
    'IS',
    'LBRA',
    'LE',
    'LPAR',
    'LT',
    'MINUS',
    'MUL',
    'MOD',
    'NAME',
    'NE',
    'NL',
    'NONE',
    'NOT',
    'NUM',
    'OR',
    'PASS',
    'PIPE',
    'PLUS',
    'QM',
    'RAISE',
    'RBRA',
    'RETURN',
    'RPAR',
    'STR',
    'TRAIT',
    'TRY',
    'WHILE',
    'YIELD',
],
                          precedence=[
                              ('left', ['COMMA']),
                              ('left', ['IF']),
                              ('left', ['OR']),
                              ('left', ['AND']),
                              ('right', ['NOT']),
                              ('left',
                               ['LT', 'LE', 'GT', 'GE', 'NE', 'EQ', 'IS']),
                              ('left', ['PIPE']),
                              ('left', ['CARET']),
                              ('left', ['AMP']),
                              ('left', ['PLUS', 'MINUS']),
                              ('left', ['MUL', 'DIV', 'MOD']),
                              ('left', ['AS']),
                              ('right', ['LBRA']),
                              ('left', ['DOT']),
                          ])
Beispiel #7
0
class Add(rply.token.BaseBox):
   def __init__(self, cdr):
      self.cdr =cdr
   def eval(self):
      # print("CDR",self.cdr)
      return sum(self.cdr[0].eval())
      # return self.cdr.eval()+.2


pg = rply.ParserGenerator(
    # A list of all token names, accepted by the parser.
    ['NUMBER', 'OPEN', 'CLOSE',
     'PLUS'
    ],
    # A list of precedence rules with ascending precedence, to
    # disambiguate ambiguous production rules.
    # precedence=[
        # ('left', ['PLUS', 'MINUS']),
        # ('left', ['MUL', 'DIV'])
    # ]
)

@pg.production('expression : NUMBER')
def expression_number_____(p):
    # p is a list of the pieces matched by the right hand side of the
    # rule
    # print("---------",p[0].getstr().split(" "))
    # print("==========",Number([int(x) for x in p[0].getstr().split(" ")]).eval())
    return(Number([int(x) for x in p[0].getstr().split(" ")]))
    # return Number(int(p[0].getstr()))
Beispiel #8
0
from functools import reduce

import rply

from hindley_milner.src import utils
from hindley_milner.src import typ
from hindley_milner.src import syntax
from hindley_milner.src.parse import lexer

pg = rply.ParserGenerator(lexer.all_tokens,
                          precedence=[
                              ("left", ["FN", "ROCKET"]),
                              ("left", ["IF", "THEN", "ELSE"]),
                              ("left", ["INT_LIT", "BOOL_LIT", "IDENT"]),
                              ("left", ["VAL", "FUN"]),
                              ("left", ["LET", "EQ", "IN", "END"]),
                              ("left", ["LPAREN", "RPAREN"]),
                              ("left", ["application"]),
                          ],
                          cache_id="hindley-milner")


@pg.production("expr : IF expr THEN expr ELSE expr")
def if_expr(s):
    return syntax.If(s[1], s[3], s[5])


@pg.production("expr : LET decl IN expr END")
def let_expr(s):
    decl = s[1]
    return syntax.Let(decl["lhs"], decl["rhs"], s[3])
Beispiel #9
0
def part_one():
    return sum(evaluate_expression(line) for line in data)


import rply

lg = rply.LexerGenerator()
lg.add("NUM", r"\d+")
lg.add("ADD", r"\+")
lg.add("MUL", r"\*")
lg.add("LPAREN", r"\(")
lg.add("RPAREN", r"\)")
lg.ignore("\s+")
lexer = lg.build()
pg = rply.ParserGenerator(["NUM", "ADD", "MUL", "LPAREN", "RPAREN"],
                          [("left", ["MUL"]), ("left", ["ADD"])])


@pg.production("expr : NUM")
def _(tokens):
    return int(tokens[0].value)


@pg.production("expr : expr ADD expr")
def _(tokens):
    return tokens[0] + tokens[2]


@pg.production("expr : expr MUL expr")
def _(tokens):
    return tokens[0] * tokens[2]
Beispiel #10
0
    'TRUE',
    'NOT',
    'EQUALS',
    'HAS',
    'ASSIGN',
    'COMMA',
    'VAR',
    'STR',
    'L',
    'R',
    'LE',
    'GE',
    'LT',
    'GT',
]
pg = rply.ParserGenerator(tokens)


@pg.production('expression : expression AND expression')
def expression_and(p):
    return And(p[0], p[2])


@pg.production('expression : expression OR expression')
def expression_or(p):
    return Or(p[0], p[2])


@pg.production('expression : NOT expression')
def expression_not(p):
    return Not(p[1])
Beispiel #11
0
import rply
from Math import *

parser = rply.ParserGenerator(
    [
        'num',
        'add',
        'sub',
        #'equ',
        #'end',
        'fac',
        'mul',
        'div',
        'open-paren',
        'close-paren',
        'newline'
        #'open-braces',
        #'close-brace'
    ],
    precedence=[('left', ['add', 'sub']), ('left', ['mul', 'div'])])


@parser.production('expression : num')
def expression_number(p):
    # p is a list of the pieces matched by the right hand side of the
    # rule
    return Number(int(p[0].getstr()))


@parser.production('expression : open-paren expression close-paren')
def expression_parens(p):
Beispiel #12
0
def get_parser():
    pg = rply.ParserGenerator(TOKENS, precedence=[])
    pg.error(errorhandler)

    @pg.production('program : body')
    def program_body(state, p):
        return p[0]

    @pg.production('body :')
    def body_empty(state, p):
        return ast.Program([])

    @pg.production('body : body body_element')
    def body_body_element(state, p):
        p[0].get_element_list().append(p[1])
        return p[0]

    @pg.production('body_element : function')
    def body_function(state, p):
        return p[0]

    @pg.production('function : FUNCTION IDENTIFIER arglist LEFT_CURLY_BRACE'
                   ' function_body RIGHT_CURLY_BRACE')
    def function_function_body(state, p):
        return ast.Function(p[1].getstr(), p[2].get_element_list(),
                            p[4].get_element_list())

    @pg.production('function_body :')
    def function_body_empty(state, p):
        return ast.FunctionBody([])

    @pg.production('function_body : function_body statement')
    def function_body_statement(state, p):
        p[0].get_element_list().append(p[1])
        return p[0]

    @pg.production('statement : expression SEMICOLON')
    def statement_expression(state, p):
        return ast.Statement(p[0])

    @pg.production('statement : VAR IDENTIFIER var_decl SEMICOLON')
    def statement_var_decl(state, p):
        return ast.VarDeclaration([p[1].getstr()] + p[2].get_names())

    @pg.production('var_decl : ')
    def var_decl_empty(state, p):
        return ast.VarDeclPartial([])

    @pg.production('var_decl : COMMA IDENTIFIER var_decl')
    def var_decl_identifier(state, p):
        return ast.VarDeclPartial([p[1].getstr()] + p[2].get_names())

    @pg.production('statement : IDENTIFIER EQUALS expression SEMICOLON')
    def statement_identifier_equals_expr(state, p):
        return ast.Assignment(p[0].getstr(), p[2])

    @pg.production('statement : RETURN expression SEMICOLON')
    def statement_return(state, p):
        return ast.Return(p[1])

    @pg.production(
        'statement : WHILE expression LEFT_CURLY_BRACE function_body'
        ' RIGHT_CURLY_BRACE')
    def statement_while_loop(state, p):
        return ast.While(p[1], p[3].get_element_list())

    @pg.production('arglist : LEFT_PAREN RIGHT_PAREN')
    def arglist(state, p):
        return ast.ArgList([])

    @pg.production('expression : INTEGER')
    def expression_number(state, p):
        return ast.Number(int(p[0].getstr()))

    @pg.production('expression : IDENTIFIER')
    def expression_identifier(state, p):
        return ast.Identifier(p[0].getstr())

    @pg.production('expression : expression LEFT_PAREN expression_list '
                   'RIGHT_PAREN')
    def expression_call(state, p):
        return ast.Call(p[0], p[2].get_element_list())

    @pg.production('expression : LEFT_PAREN expression RIGHT_PAREN')
    def expression_paren_expression_paren(state, p):
        return p[1]

    @pg.production('expression : expression PLUS expression')
    def expression_plus_expression(state, p):
        return ast.BinOp('+', p[0], p[2])

    @pg.production('expression : expression LT expression')
    def expression_lt_expression(state, p):
        return ast.BinOp('<', p[0], p[2])

    @pg.production('expression_list : ')
    def expression_list_empty(state, p):
        return ast.ExpressionListPartial([])

    @pg.production('expression_list : expression expression_sublist')
    def expression_list_expression(state, p):
        return ast.ExpressionListPartial([p[0]] + p[1].get_element_list())

    @pg.production('expression_sublist : ')
    def expression_sublist_empty(state, p):
        return ast.ExpressionListPartial([])

    @pg.production('expression_sublist : COMMA expression expression_sublist')
    def expression_sublist_expression(state, p):
        return ast.ExpressionListPartial([p[1]] + p[2].get_element_list())

    return pg.build()
Beispiel #13
0
 def spg(self):
     if self._spg is None:
         self._spg = rply.ParserGenerator(
             [rule.name for rule in lexers.slg.rules], precedence=[])
     return self._spg
Beispiel #14
0
def create_parser(lexer):
    pg = rply.ParserGenerator(
        [rule.name for rule in lexer.rules],
        precedence=[
            ('right', ['IF', 'ELSE']),
            ('left', ['OR']),
            ('left', ['AND']),
            ('left', ['EQ']),
            ('left', ['CMP']),
            ('left', ['ADDSUB']),
            ('left', ['MULDIV']),
            ('right', ['NOT']),
        ],
        cache_id='i18nspector-intexpr',
    )
    ast_bool = {
        '&&': ast.And(),
        '||': ast.Or(),
    }
    ast_cmp = {
        '==': ast.Eq(),
        '!=': ast.NotEq(),
        '<': ast.Lt(),
        '<=': ast.LtE(),
        '>': ast.Gt(),
        '>=': ast.GtE(),
    }
    ast_arithmetic = {
        '+': ast.Add(),
        '-': ast.Sub(),
        '*': ast.Mult(),
        '/': ast.Div(),
        '%': ast.Mod(),
    }
    ast_not = ast.Not()
    # pylint: disable=unused-variable
    @pg.production('start : exp')
    def eval_start(p):
        [exp] = p
        return ast.Expr(exp)

    @pg.production('exp : exp IF exp ELSE exp')
    def expr_ifelse(p):
        [cond, _, body, _, orelse] = p
        return ast.IfExp(cond, body, orelse)

    @pg.production('exp : exp OR exp')
    @pg.production('exp : exp AND exp')
    def expr_bool(p):
        [left, tok, right] = p
        op = ast_bool[tok.getstr()]
        return ast.BoolOp(op, [left, right])

    @pg.production('exp : exp EQ exp')
    @pg.production('exp : exp CMP exp')
    def expr_cmp(p):
        [left, tok, right] = p
        op = ast_cmp[tok.getstr()]
        return ast.Compare(left, [op], [right])

    @pg.production('exp : exp ADDSUB exp')
    @pg.production('exp : exp MULDIV exp')
    def expr_arithmetic(p):
        [left, tok, right] = p
        op = ast_arithmetic[tok.getstr()]
        return ast.BinOp(left, op, right)

    @pg.production('exp : NOT exp')
    def expr_not(p):
        [_, value] = p
        return ast.UnaryOp(ast_not, value)

    @pg.production('exp : LPAR exp RPAR')
    def expr_par(p):
        [_, exp, _] = p
        return exp

    @pg.production('exp : VAR')
    def expr_var(p):
        [tok] = p
        ident = tok.getstr()
        assert ident == 'n'
        return ast.Name(ident, ast.Load())

    @pg.production('exp : INT')
    def expr_int(p):
        [tok] = p
        n = int(tok.getstr())
        return ast.Num(n)

    # pylint: enable=unused-variable
    with misc.throwaway_tempdir('rply'):
        # Use private temporary directory
        # to mitigate RPLY's insecure use of /tmp:
        # CVE-2014-1604, CVE-2014-1938
        return pg.build()