Esempio n. 1
0
 def handle_newline(token):
     text = token.getstr()
     indent_str = text.rsplit('\n', 1)[1]
     indent = indent_str.count(' ') + indent_str.count('\t') * tab_len
     if indent > indent_level[-1]:
         indent_level.append(indent)
         indent_token = Token('INDENT', indent_str)
         indent_token.source_pos = token.getsourcepos()
         token_queue.append(indent_token)
     else:
         while indent < indent_level[-1]:
             indent_level.pop()
             dedent_token = Token('DEDENT', indent_str)
             token_queue.append(dedent_token)
     return token
Esempio n. 2
0
 def handle_newline(token):
     text = token.getstr()
     indent_str = text.rsplit('\n', 1)[1]
     indent = indent_str.count(' ') + indent_str.count('\t') * tab_len
     if indent > indent_level[-1]:
         indent_level.append(indent)
         indent_token = Token('INDENT', indent_str)
         indent_token.source_pos = token.getsourcepos()
         token_queue.append(indent_token)
     else:
         while indent < indent_level[-1]:
             indent_level.pop()
             dedent_token = Token('DEDENT', indent_str)
             token_queue.append(dedent_token)
     return token
Esempio n. 3
0
def mod_lex(lexer, repl_mode=False):
    paren_openers = {'LPAREN', 'LBRACE', 'LBRACK'}
    paren_closers = {'RPAREN', 'RBRACE', 'RBRACK'}

    token_queue = []
    indent_level = [0]
    ignore_newline = False
    paren_level = 0
    tab_len = 4

    def handle_newline(token):
        text = token.getstr()
        indent_str = text.rsplit('\n', 1)[1]
        indent = indent_str.count(' ') + indent_str.count('\t') * tab_len
        if indent > indent_level[-1]:
            indent_level.append(indent)
            indent_token = Token('INDENT', indent_str)
            indent_token.source_pos = token.getsourcepos()
            token_queue.append(indent_token)
        else:
            while indent < indent_level[-1]:
                indent_level.pop()
                dedent_token = Token('DEDENT', indent_str)
                token_queue.append(dedent_token)
        return token

    for token in lexer:
        while len(token_queue) > 0:
            queued_token = token_queue.pop()
            if queued_token.gettokentype() in paren_openers:
                paren_level += 1
            elif queued_token.gettokentype() in paren_closers:
                paren_level -= 1
            ignore_newline = (paren_level > 0)
            yield queued_token

        if token.name == 'NAME':
            for rule in klg.rules:
                if rule.matches(token.value, 0):
                    token.name = rule.name
                    break
        elif token.gettokentype() == 'NEWLINE':
            if not ignore_newline:
                yield handle_newline(token)
            continue

        if token.gettokentype() in paren_openers:
            paren_level += 1
        elif token.gettokentype() in paren_closers:
            paren_level -= 1
        ignore_newline = (paren_level > 0)

        if token.gettokentype() == 'NAME' and token.getstr().startswith('&'):
            amp = Token('AMP', '&')
            amp.source_pos = token.getsourcepos()
            comma = Token('COMMA', ',')
            amp.source_pos = token.getsourcepos()
            name = Token('NAME', token.getstr()[1:])
            name.source_pos = token.getsourcepos()
            yield amp
            yield comma
            yield name
        else:
            yield token

    if repl_mode and len(indent_level) > 1:
        yield REPL_CONTINUE
    elif repl_mode and paren_level > 0:
        yield REPL_CONTINUE
    else:
        while len(indent_level) > 1:
            indent_level.pop()
            yield Token('DEDENT', '')

        for token in token_queue:
            yield token
Esempio n. 4
0
def mod_lex(lexer, repl_mode=False):
    paren_openers = {'LPAREN', 'LBRACE', 'LBRACK'}
    paren_closers = {'RPAREN', 'RBRACE', 'RBRACK'}

    token_queue = []
    indent_level = [0]
    ignore_newline = False
    paren_level = 0
    tab_len = 4

    def handle_newline(token):
        text = token.getstr()
        indent_str = text.rsplit('\n', 1)[1]
        indent = indent_str.count(' ') + indent_str.count('\t') * tab_len
        if indent > indent_level[-1]:
            indent_level.append(indent)
            indent_token = Token('INDENT', indent_str)
            indent_token.source_pos = token.getsourcepos()
            token_queue.append(indent_token)
        else:
            while indent < indent_level[-1]:
                indent_level.pop()
                dedent_token = Token('DEDENT', indent_str)
                token_queue.append(dedent_token)
        return token

    for token in lexer:
        while len(token_queue) > 0:
            queued_token = token_queue.pop()
            if queued_token.gettokentype() in paren_openers:
                paren_level += 1
            elif queued_token.gettokentype() in paren_closers:
                paren_level -= 1
            ignore_newline = (paren_level > 0)
            yield queued_token

        if token.name == 'NAME':
            for rule in klg.rules:
                if rule.matches(token.value, 0):
                    token.name = rule.name
                    break
        elif token.gettokentype() == 'NEWLINE':
            if not ignore_newline:
                yield handle_newline(token)
            continue

        if token.gettokentype() in paren_openers:
            paren_level += 1
        elif token.gettokentype() in paren_closers:
            paren_level -= 1
        ignore_newline = (paren_level > 0)

        if token.gettokentype() == 'NAME' and token.getstr().startswith('&'):
            amp = Token('AMP', '&')
            amp.source_pos = token.getsourcepos()
            comma = Token('COMMA', ',')
            amp.source_pos = token.getsourcepos()
            name = Token('NAME', token.getstr()[1:])
            name.source_pos = token.getsourcepos()
            yield amp
            yield comma
            yield name
        else:
            yield token

    if repl_mode and len(indent_level) > 1:
        yield REPL_CONTINUE
    elif repl_mode and paren_level > 0:
        yield REPL_CONTINUE
    else:
        while len(indent_level) > 1:
            indent_level.pop()
            yield Token('DEDENT', '')

        for token in token_queue:
            yield token
Esempio n. 5
0
def mod_lex(lexer, repl_mode=False):
    paren_openers = {'LPAREN', 'LBRACE', 'LBRACK'}
    paren_closers = {'RPAREN', 'RBRACE', 'RBRACK'}

    token_queue = Queue()
    indent_level = [0]
    ignore_newline = False
    paren_level = 0
    tab_len = 4

    def handle_newline(token):
        text = token.getstr()
        indent_str = text.rsplit('\n', 1)[1]
        indent = indent_str.count(' ') + indent_str.count('\t') * tab_len
        if indent > indent_level[-1]:
            indent_level.append(indent)
            indent_token = Token('INDENT', indent_str)
            indent_token.source_pos = token.getsourcepos()
            token_queue.put(indent_token)
        else:
            while indent < indent_level[-1]:
                indent_level.pop()
                dedent_token = Token('DEDENT', indent_str)
                token_queue.put(dedent_token)
        return token

    for token in lexer:
        while not token_queue.empty():
            queued_token = token_queue.get()
            if queued_token.gettokentype() in paren_openers:
                paren_level += 1
            elif queued_token.gettokentype() in paren_closers:
                paren_level -= 1
            ignore_newline = (paren_level > 0)

            if queued_token.gettokentype() == 'NAME' and queued_token.getstr(
            ).startswith('&'):
                amp = Token('AMP', '&')
                amp.source_pos = queued_token.getsourcepos()
                comma = Token('COMMA', ',')
                amp.source_pos = queued_token.getsourcepos()
                name = Token('NAME', queued_token.getstr()[1:])
                name.source_pos = queued_token.getsourcepos()
                yield amp
                yield comma
                yield name
            else:
                yield queued_token

        if token.name == 'NAME':
            for rule in klg.rules:
                if rule.matches(token.value, 0):
                    token.name = rule.name
                    break
        if token.gettokentype() in INFIX_OPERATORS:
            ahead_token = next(lexer)
            if ahead_token.gettokentype() == 'NEWLINE':
                pass
            else:
                token_queue.put(ahead_token)
        elif token.gettokentype() == 'NEWLINE':
            try:
                ahead_token = next(lexer)
                _set_keyword(ahead_token)
                ahead_token_type = ahead_token.gettokentype()
            except StopIteration:
                ahead_token = None
                ahead_token_type = None
            if not (ignore_newline or ((ahead_token is not None) and
                                       ((ahead_token_type in INFIX_OPERATORS)
                                        or ahead_token_type == 'DOT_NAME'))):
                yield handle_newline(token)
            if ahead_token is not None:
                token_queue.put(ahead_token)
            continue

        if token.gettokentype() in paren_openers:
            paren_level += 1
        elif token.gettokentype() in paren_closers:
            paren_level -= 1
        ignore_newline = (paren_level > 0)

        if token.gettokentype() == 'NAME' and token.getstr().startswith('&'):
            amp = Token('AMP', '&')
            amp.source_pos = token.getsourcepos()
            comma = Token('COMMA', ',')
            amp.source_pos = token.getsourcepos()
            name = Token('NAME', token.getstr()[1:])
            name.source_pos = token.getsourcepos()
            yield amp
            yield comma
            yield name
        else:
            yield token

    if repl_mode and len(indent_level) > 1:
        yield REPL_CONTINUE
    elif repl_mode and paren_level > 0:
        yield REPL_CONTINUE
    else:
        while not token_queue.empty():
            yield token_queue.get()

        while len(indent_level) > 1:
            indent_level.pop()
            yield Token('DEDENT', '')
Esempio n. 6
0
File: lexer.py Progetto: tlvu/mochi
def mod_lex(lexer, repl_mode=False):
    paren_openers = {'LPAREN', 'LBRACE', 'LBRACK'}
    paren_closers = {'RPAREN', 'RBRACE', 'RBRACK'}

    token_queue = Queue()
    indent_level = [0]
    ignore_newline = False
    paren_level = 0
    tab_len = 4

    def handle_newline(token):
        text = token.getstr()
        indent_str = text.rsplit('\n', 1)[1]
        indent = indent_str.count(' ') + indent_str.count('\t') * tab_len
        if indent > indent_level[-1]:
            indent_level.append(indent)
            indent_token = Token('INDENT', indent_str)
            indent_token.source_pos = token.getsourcepos()
            token_queue.put(indent_token)
        else:
            while indent < indent_level[-1]:
                indent_level.pop()
                dedent_token = Token('DEDENT', indent_str)
                token_queue.put(dedent_token)
        return token

    for token in lexer:
        while not token_queue.empty():
            queued_token = token_queue.get()
            if queued_token.gettokentype() in paren_openers:
                paren_level += 1
            elif queued_token.gettokentype() in paren_closers:
                paren_level -= 1
            ignore_newline = (paren_level > 0)

            if queued_token.gettokentype() == 'NAME' and queued_token.getstr().startswith('&'):
                amp = Token('AMP', '&')
                amp.source_pos = queued_token.getsourcepos()
                comma = Token('COMMA', ',')
                amp.source_pos = queued_token.getsourcepos()
                name = Token('NAME', queued_token.getstr()[1:])
                name.source_pos = queued_token.getsourcepos()
                yield amp
                yield comma
                yield name
            else:
                yield queued_token

        if token.name == 'NAME':
            for rule in klg.rules:
                if rule.matches(token.value, 0):
                    token.name = rule.name
                    break
        if token.gettokentype() in INFIX_OPERATORS:
            ahead_token = next(lexer)
            if ahead_token.gettokentype() == 'NEWLINE':
                pass
            else:
                token_queue.put(ahead_token)
        elif token.gettokentype() == 'NEWLINE':
            try:
                ahead_token = next(lexer)
                _set_keyword(ahead_token)
                ahead_token_type = ahead_token.gettokentype()
            except StopIteration:
                ahead_token = None
                ahead_token_type = None
            if not (ignore_newline or ((ahead_token is not None)
                                       and ((ahead_token_type in INFIX_OPERATORS)
                                            or ahead_token_type == 'DOT_NAME'))):
                yield handle_newline(token)
            if ahead_token is not None:
                token_queue.put(ahead_token)
            continue

        if token.gettokentype() in paren_openers:
            paren_level += 1
        elif token.gettokentype() in paren_closers:
            paren_level -= 1
        ignore_newline = (paren_level > 0)

        if token.gettokentype() == 'NAME' and token.getstr().startswith('&'):
            amp = Token('AMP', '&')
            amp.source_pos = token.getsourcepos()
            comma = Token('COMMA', ',')
            amp.source_pos = token.getsourcepos()
            name = Token('NAME', token.getstr()[1:])
            name.source_pos = token.getsourcepos()
            yield amp
            yield comma
            yield name
        else:
            yield token

    if repl_mode and len(indent_level) > 1:
        yield REPL_CONTINUE
    elif repl_mode and paren_level > 0:
        yield REPL_CONTINUE
    else:
        while not token_queue.empty():
            yield token_queue.get()

        while len(indent_level) > 1:
            indent_level.pop()
            yield Token('DEDENT', '')
Esempio n. 7
0
def mod_lex(lexer, repl_mode=False):
    paren_openers = {
        'LPAREN', 'LBRACE', 'LBRACK', 'NAME_LPAREN', 'DOT_NAME_LPAREN'
    }
    paren_closers = {
        'RPAREN', 'RBRACE', 'RBRACK', 'NAME_RPAREN', 'DOT_NAME_RPAREN'
    }

    token_queue = Queue()
    paren_level = 0

    # def handle_newline(token):
    #     text = token.getstr()
    #     indent_str = text.rsplit('\n', 1)[1]
    #     indent = indent_str.count(' ') + indent_str.count('\t') * tab_len
    #     if indent > indent_level[-1]:
    #         indent_level.append(indent)
    #         indent_token = Token('INDENT', indent_str)
    #         indent_token.source_pos = token.getsourcepos()
    #         token_queue.put(indent_token)
    #     else:
    #         while indent < indent_level[-1]:
    #             indent_level.pop()
    #             dedent_token = Token('DEDENT', indent_str)
    #             token_queue.put(dedent_token)
    #         if len(text.rsplit('\n')) > 2:
    #             token_queue.put(Token('NEWLINE', '\n'))
    #     return token

    for token in lexer:
        while not token_queue.empty():
            queued_token = token_queue.get()
            if queued_token.gettokentype() in paren_openers:
                paren_level += 1
            elif queued_token.gettokentype() in paren_closers:
                paren_level -= 1

            if queued_token.gettokentype() == 'NAME' and queued_token.getstr(
            ).startswith('&'):
                amp = Token('AMP', '&')
                amp.source_pos = queued_token.getsourcepos()
                comma = Token('COMMA', ',')
                amp.source_pos = queued_token.getsourcepos()
                name = Token('NAME', queued_token.getstr()[1:])
                name.source_pos = queued_token.getsourcepos()
                yield amp
                yield comma
                yield name
            else:
                yield queued_token

        if token.name == 'NAME':
            for rule in klg.rules:
                if rule.matches(token.value, 0):
                    token.name = rule.name
                    break
        if token.name == 'NAME_LPAREN':
            for rule in klg.rules:
                if rule.matches(token.value, 0):
                    token.name = rule.name
                    break
        if token.gettokentype() in INFIX_OPERATORS:
            ahead_token = next(lexer)
            _set_keyword(ahead_token)
            if ahead_token.gettokentype() == 'NEWLINE':
                pass
            else:
                token_queue.put(ahead_token)
        elif token.gettokentype() == 'NEWLINE':
            continue

        if token.gettokentype() in paren_openers:
            paren_level += 1
        elif token.gettokentype() in paren_closers:
            paren_level -= 1

        if token.gettokentype() == 'NAME' and token.getstr().startswith('&'):
            amp = Token('AMP', '&')
            amp.source_pos = token.getsourcepos()
            comma = Token('COMMA', ',')
            amp.source_pos = token.getsourcepos()
            name = Token('NAME', token.getstr()[1:])
            name.source_pos = token.getsourcepos()
            yield amp
            yield comma
            yield name
        else:
            yield token

    if repl_mode and paren_level > 0:
        yield REPL_CONTINUE
    else:
        while not token_queue.empty():
            yield token_queue.get()