Python Tokenの例、cmp.utils.Token Pythonの例

コード例 #1

0

ファイルを表示

def regex_tokenizer(text, G, skip_whitespaces=True):
    tokens = []
    # > fixed_tokens = ???
    # Your code here!!!
    
    skip = False
    for i, char in enumerate(text):
        
        if skip:
            skip = False
            continue
        
        if skip_whitespaces and char.isspace():
            continue
        # Your code here!!!
        if char == '\\':
            try:
                tokens.append(Token(text[i+1],symbol))
            except IndexError:
                tokens.append(Token('\\',symbol))
            skip = True
            continue
                        
        try:
            tokens.append(fixed_tokens[char])
        except KeyError:
            tokens.append(Token(char,symbol))
        
    tokens.append(Token('$', G.EOF))
    return tokens

コード例 #2

0

ファイルを表示

def __test_lexer_2(lexer):
    text = '5465 for 45foreach fore'
    tokens, errors = lexer(text)
    assert errors == []
    assert [t.token_type for t in tokens] == [
        'num', 'space', 'for', 'space', 'num', 'foreach', 'space', 'id', 'eof'
    ]
    assert [t.lex for t in tokens
            ] == ['5465', ' ', 'for', ' ', '45', 'foreach', ' ', 'fore', '$']

    text = '4forense forforeach for4foreach foreach 4for'
    tokens, errors = lexer(text)
    assert errors == []
    assert [t.token_type for t in tokens] == [
        'num', 'id', 'space', 'id', 'space', 'id', 'space', 'foreach', 'space',
        'num', 'for', 'eof'
    ]
    assert [t.lex for t in tokens] == [
        '4', 'forense', ' ', 'forforeach', ' ', 'for4foreach', ' ', 'foreach',
        ' ', '4', 'for', '$'
    ]

    text = "LAexer"
    tokens, errors = lexer(text)
    assert errors == ["L", "A"]
    assert tokens == [Token("exer", "id"), Token("$", "eof")]

コード例 #3

0

ファイルを表示

def tokenize(input_program: str):
    tokenizer = lex.lex()
    tokenizer.input(input_program)
    toks = []
    for tok in tokenizer:
        toks.append(Token(tok.value, token_type[tok.type]))
    toks.append(Token('$', G.EOF))

    return toks

コード例 #4

0

ファイルを表示

 def tokenize_text(text):
     tokens = []
     for item in text.split():
         try:
             float(item)
             token = Token(item, G['num'])
         except ValueError:
             try:
                 token = fixed_tokens[item]
             except:
                 token = UnknownToken(item)
         tokens.append(token)
     eof = Token('$', G.EOF)
     tokens.append(eof)
     return tokens

コード例 #5

0

ファイルを表示

def _regex_tokenizer(G, symbol, text, skip_whitespaces=False):
    tokens = []
    if len(text) == 1:
        tokens.append(Token(text[0], G[symbol]))
    else:
        for char in text:
            if skip_whitespaces and char.isspace():
                continue
            temp = G[char]
            if not temp is None:
                tokens.append(Token(char, temp))
            else:
                tokens.append(Token(char, G[symbol]))

    tokens.append(Token('$', G.EOF))
    return tokens

コード例 #6

0

ファイルを表示

ファイル: regex.py プロジェクト: cicr99/TypeInferencer

def regex_tokenizer(text, G, skip_whitespaces=True):
    tokens = []
    jump = False

    # print(text)

    for char in text:
        if skip_whitespaces and char.isspace():
            continue
        elif(char == '\\' and not jump):
            jump = True
        elif(char == '*' and not jump):
            tokens.append(Token('*', star))
        elif(char == '(' and not jump):
            tokens.append(Token('(', opar))
        elif(char == ')' and not jump):
            tokens.append(Token(')', cpar))
        elif(char == '|' and not jump):
            tokens.append(Token('|', pipe))
        elif(char == 'ε' and not jump):
            tokens.append(Token('ε', epsilon))
        else:
            tokens.append(Token(char, symbol))
            jump = False

    tokens.append(Token('$', G.EOF))
    return tokens

コード例 #7

0

ファイルを表示

    def tokenize_text(text):
        tokens = []

        for item in text.split():
            try:
                float(item)
                token = Token(item, num)
            except ValueError:
                try:
                    token = fixed_tokens[item]
                except:
                    raise Exception('Undefined token')
            tokens.append(token)

        eof = Token('$', G.EOF)
        tokens.append(eof)

        return tokens

コード例 #8

0

ファイルを表示

 def __init__(self, idx, args, obj=None, at_type=None, token = Token("", "", (-1,-1))):
     self.obj = obj
     self.id = idx
     self.args = args
     self.at_type = at_type
     if token.location[0] == -1:
         self.token = idx
     else:
         self.token = token

コード例 #9

0

ファイルを表示

ファイル: language.py プロジェクト: WataLuisoDalmauCompiler/cool-compiler-2021

 def _fix_tokens(self, tokens, errors):
     """
     If there are a token_type named 'space' this are discarted from the parsing tokens\n
     also transform lexer tokens to grammar tokens 
     """
     fix_tokens = []
     for x in tokens:
         if x.token_type != 'space':
             try:
                 if x.token_type in self.token_parse:
                     tok = Token(self.token_parse[x.token_type](x.lex),
                                 x.token_type)
                 else:
                     tok = Token(x.lex, x.token_type)
                 fix_tokens.append(tok)
             except KeyError:
                 errors.append(
                     f'The grammar does not recognize the token {x}')
     return fix_tokens

コード例 #10

0

ファイルを表示

def get_grammar_tokens(gram_def,errors:list):
    tokens = []
    for x in gram_lexer(gram_def):
        if x.token_type != 'space':
            try:
               tok = Token(x.lex,symbols[x.token_type])
               tokens.append(tok)
            except KeyError:
                errors.append(f'Unknown Token({x.lex},{x.token_type}) in gram_def')
    return tokens

コード例 #11

0

ファイルを表示

def regex_tokenizer(text, G, skip_whitespaces=True):
    tokens = []
    fixed_tokens = {lex: Token(lex, G[lex]) for lex in '| * ( ) ε [ ] ? + -'.split()}
    open_pos = 0
    inside_squares = False
    set_literal = False
    for i, char in enumerate(text):
        if skip_whitespaces and char.isspace():
            continue

        if not set_literal and char == '\\':
            set_literal = True
            continue

        if set_literal:
            tokens.append(Token(char, G['symbol']))
            set_literal = False
            continue

        if not inside_squares:
            if char in (']', '-') or char not in fixed_tokens:
                tokens.append(Token(char, G['symbol']))
            else:
                tokens.append(fixed_tokens[char])

            open_pos = i
            inside_squares = char == '['

        else:
            if char == ']':
                if i - open_pos == 1:
                    tokens.append(Token(char, G['symbol']))
                else:
                    inside_squares = False
                    tokens.append(fixed_tokens[char])
            elif char == '-':
                if is_minus_a_symbol(G, text, tokens, i, open_pos):
                    tokens.append(Token(char, G['symbol']))
                else:
                    tokens.append(fixed_tokens[char])
            else:
                tokens.append(Token(char, G['symbol']))

    if inside_squares:
        raise Exception(f'Unterminated character set at position {open_pos}')

    tokens.append(Token('$', G.EOF))
    return tokens

コード例 #12

0

ファイルを表示

ファイル: cool_tokenizer.py プロジェクト: peanut-butter-jellyyy/cool-compiler-2021

def tokenize_cool_text(G, text, idx, num, print_tokens=False):
    fixed_tokens = {
        t.Name: Token(t.Name, t) for t in G.terminals if t not in {idx, num}
    }

    @tokenizer(G, fixed_tokens)
    def tokenize_text(token):
        lex = token.lex
        try:
            float(lex)
            return token.transform_to(num)
        except ValueError:  # verificar los string
            return token.transform_to(idx)

    # (do something like if(lex[0] == " and lex[-1] =="))
    tokens = tokenize_text(text)
    if print_tokens:
        pprint_tokens(tokens)
    return tokens

コード例 #13

0

ファイルを表示

ファイル: regular_expression.py プロジェクト: osmany-perez-1998/Grammar-Analyser

def regex_tokenizer(text, G, skip_whitespaces=True):
    tokens = []
    fixed_tokens = {
        '|': Token('|', pipe),
        '*': Token('*', star),
        '(': Token('(', opar),
        ')': Token(')', cpar),
        'ε': Token('ε', epsilon)
    }
    for char in text:
        if skip_whitespaces and char.isspace():
            continue
        char_token = ''
        try:
            char_token = fixed_tokens[char]
        except KeyError:
            char_token = Token(char, symbol)
        tokens.append(char_token)

    tokens.append(Token('$', G.EOF))
    return tokens

コード例 #14

0

ファイルを表示

ファイル: main.py プロジェクト: alejandroklever/GrammarAnalyzer

def manual_input_app():
    ################
    # Declarations #
    ################
    G = Grammar()
    parsers = {
        'LL(1)': LL1Parser,
        'SLR(1)': SLR1Parser,
        'LR(1)': LR1Parser,
        'LALR(1)': LALR1Parser
    }

    #################
    # Input Options #
    #################
    options = ('terminal id', 'terminal id + value',
               'terminal id + value + regex')
    option = st.sidebar.selectbox('Entrada de los terminales',
                                  options,
                                  index=2)

    ###################
    # Parser Selector #
    ###################
    parser_type = st.sidebar.selectbox('Seleccione el algoritmo de Parsing',
                                       ('LL(1)', 'SLR(1)', 'LR(1)', 'LALR(1)'),
                                       index=1)

    ################################################
    # Start Symbol, Non terminal & terminals Input #
    ################################################
    start_symbol = st.sidebar.text_input('Simbolo inicial: ',
                                         value=AritmethicStartSymbol)
    input_nonterminals = st.sidebar.text_input('No Terminales :',
                                               value=AritmethicNonTerminalsLR)
    input_terminals = st.sidebar.text_input('Terminales :',
                                            value=AritmethicTerminals)

    terminals_id, terminals_regex = terminals_input_control(
        option, options, input_terminals)

    ###################
    # Get Productions #
    ###################
    input_productions = st.text_area('Producciones :',
                                     value=AritmethicProductionsLR)

    nonterminals_variables = ', '.join(input_nonterminals.split())
    terminal_variables = ', '.join(terminals_id[term]
                                   for term in input_terminals.split())

    #####################################################
    # Declarando instrucciones para ejecutar con exec() #
    #####################################################
    inst1 = f'{start_symbol} = G.NonTerminal("{start_symbol}", True)'
    if len(input_nonterminals) == 1:
        inst2 = f'{nonterminals_variables} = G.NonTerminal("{input_nonterminals}")'
    else:
        inst2 = f'{nonterminals_variables} = G.NonTerminals("{input_nonterminals}")'
    inst3 = f'{terminal_variables} = G.Terminals("{input_terminals}")'

    ##########
    # exec() #
    ##########
    exec_instructions(G, inst1, inst2, inst3, input_productions)

    #########################
    # Preparacion del Lexer #
    #########################
    if terminals_regex:
        table = [(G[t], re) for t, re in terminals_regex.items()] + [
            ('space', '  *'),
        ]
        lexer = Lexer(table, G.EOF)
    else:
        lexer = tokenizer(G, {t.Name: Token(t.Name, t) for t in G.terminals})

    ##########################
    # Preparacion del parser #
    ##########################
    ParserClass = parsers[parser_type]
    parser = ParserClass(G)

    ##########
    # Salvar #
    ##########
    gName = st.sidebar.text_input("Nombre del archivo")
    if st.sidebar.button("Salvar"):
        try:
            f = open(gName + '.json', 'x')
            s = G.to_json
            json.dump(s, f, indent=4)
            st.sidebar.success(f'Salvado {gName}.json')
        except FileExistsError:
            st.sidebar.error('Ya existe un archivo con ese nombre')

    ############################
    # Regular Grammar Checking #
    ############################
    re_grammar = RegularGrammar(G)
    if re_grammar.valid:
        st.sidebar.success("Esta Gramatica es Regular")
        dfa = re_grammar.dfa
        regex = re_grammar.regex
        if st.checkbox('Mostrar DFA de la Gramatica'):
            st.graphviz_chart(str(dfa.graph()))
        if st.checkbox('Mostrar la expresion regular'):
            st.latex(regex)

    ###########################
    # Visualizar la gramatica #
    ###########################
    if st.checkbox('Mostrar Gramatica'):
        show_grammar(G)

    ####################
    # Fisrts & Follows #
    ####################
    if st.checkbox('Mostrar Firsts & Follows'):
        st.subheader("Firsts :")
        st.dataframe(set_to_dataframe(parser.G, parser.firsts))
        st.subheader("Follows :")
        st.dataframe(set_to_dataframe(parser.G, parser.follows))

    ##################
    #  Parsing Table #
    ##################
    if st.checkbox('Mostrar Tabla de Parsing'):
        if parser_type == 'LL(1)':
            st.subheader("Table :")
            st.dataframe(lltable_to_dataframe(parser.table))
        else:
            st.subheader("Action :")
            st.dataframe(lrtable_to_dataframe(parser.action))
            st.subheader("Goto :")
            st.dataframe(lrtable_to_dataframe(parser.goto))

    ###############
    # Automata LR #
    ###############
    if parser_type != 'LL(1)':
        if st.checkbox('Mostrar Automata LR'):
            st.graphviz_chart(str(parser.automaton.graph()))
        dtree = LRDerivationTree
    else:
        dtree = LLDerivationTree

    ################################
    # Modificacion de la Gramatica #
    ################################
    modify_grammar(G)

    ####################
    # Parsing Conflict #
    ####################
    if parser.conflict is not None:
        deal_with_conflict(parser, parser_type)

    else:
        ####################
        # Analizar cadenas #
        ####################
        text = st.text_input('Introduzca una cadena para analizar', value='')

        if st.button('Analyze'):
            tokens = [t for t in lexer(text) if t.token_type != 'space']
            derivation = parser(tokens)
            st.graphviz_chart(str(dtree(derivation).graph()))

コード例 #15

0

ファイルを表示

from cmp.utils import Token, tokenizer
from Grammar import get_grammar

G, idx, num, string, ocur, ccur, semi = get_grammar()

fixed_tokens = {
    t.Name: Token(t.Name, t)
    for t in G.terminals if t not in {idx, num, string}
}


@tokenizer(G, fixed_tokens)
def tokenize_text(token):
    lex = token.lex
    try:
        float(lex)
        return token.transform_to(num)
    except ValueError:
        return token.transform_to(idx)


def pprint_tokens(tokens):
    indent = 0
    pending = []
    ret_text = ''
    for token in tokens:
        pending.append(token)
        if token.token_type in {ocur, ccur, semi}:
            if token.token_type == ccur:
                indent -= 1
            print('    ' * indent +

コード例 #16

0

ファイルを表示

 def __init__(self, G):
     self.G = G
     self.fixed_tokens = {
         lex: Token(lex, G[lex])
         for lex in '+ - * / ( )'.split()
     }

コード例 #17

0

ファイルを表示

class PlusNode(UnaryNode):
    @staticmethod
    def operate(value):
        # Your code here!!!
        return f'({value[0]})+',automata_concatenation(value[1],automata_closure(value[1])) 

class QuestionNode(UnaryNode):
    @staticmethod
    def operate(value):
        # Your code here!!!
        epsilon = NFA(1,{0},{},0)
        return f'({value[0]})?',automata_union(value[1],epsilon) 

fixed_tokens = {
        '*'     : Token('*'     , star),
        '('     : Token('('     , opar),
        ')'     : Token(')'     , cpar),
        '|'     : Token('|'     , pipe),
        '?'     : Token('?'     , qtn),
        '+'     : Token('+'     , plus),
        '['     : Token('['     , obra),
        ']'     : Token(']'     , cbra),
        EPSILON : Token(EPSILON , epsilon),
    }

def regex_tokenizer(text, G, skip_whitespaces=True):
    tokens = []
    # > fixed_tokens = ???
    # Your code here!!!

コード例 #18

0

ファイルを表示

 def __call__(self, text):
     errors = []
     return [
         Token(lex, ttype) for lex, ttype in self._tokenize(text, errors)
     ], errors

コード例 #19

0

ファイルを表示

def __unit_testing_regex_tokenizer():
    G, symbol = grammar_for_regex()
    tokens = _regex_tokenizer(G, symbol, "a*(a|b)*cd|ε")
    assert tokens == [
        Token("a", G[symbol]),
        Token("*", G["*"]),
        Token("(", G["("]),
        Token("a", G[symbol]),
        Token("|", G["|"]),
        Token("b", G[symbol]),
        Token(")", G[")"]),
        Token("*", G["*"]),
        Token("c", G[symbol]),
        Token("d", G[symbol]),
        Token("|", G["|"]),
        Token("ε", G["ε"]),
        Token("$", G.EOF)
    ], "regex tokenizer error in 'a*(a|b)*cd|ε'"
    tokens = _regex_tokenizer(G, symbol, "*")
    assert tokens == [Token("*", G[symbol]),
                      Token("$", G.EOF)], "regex tokenizer error in '*'"

コード例 #20

0

ファイルを表示

    def __call__(self, tokens, errors, finding_conflict=False):
        stack = [0]
        cursor = 0
        output = []
        tokens = [x for x in tokens]
        while True and cursor < len(tokens):
            state = stack[-1]
            lookahead = tokens[cursor]
            if self.verbose: print(stack, '<---||--->', tokens[cursor:])

            # Your code here!!! (Detect error)
            try:
                action = self.action[state, lookahead.token_type]
                if isinstance(action, tuple):
                    action, tag = action
                else:
                    return None if not finding_conflict else (state, lookahead,
                                                              output)
            except KeyError:
                # errors.append(f'Invalid transition ({state},{lookahead}) doesnt exist expected {[ x[1] for x in self.action if x[0] == state ]}')
                posibles = [x for x in self.action if x[0] == state]
                arg = f"{lookahead.lex[0]}" if lookahead.is_eof else lookahead.lex[
                    0]
                errors.append(
                    SyntacticCoolError(SYNTACTIC_ERROR, arg, token=lookahead))
                # errors.append(f"Invalid transition near '{lookahead.lex[0]}'. Expected: {', '.join([ str(x[1]) for x in posibles ])}. Line:{lookahead.lex[1] + 1} Column:{lookahead.lex[2] + 1}")
                if len(posibles) == 1 and not lookahead.is_eof:
                    tokens.insert(
                        cursor + 1,
                        Token((str(posibles[0][1]), lookahead.lex[1],
                               lookahead.lex[2]), posibles[0][1]))
                    cursor += 1
                    continue
                return None if not finding_conflict else (state, lookahead,
                                                          output)

            if action == self.SHIFT:
                # Your code here!!! (Shift case)
                stack.append(lookahead.token_type)
                stack.append(tag)
                cursor += 1
            elif action == self.REDUCE:
                # Your code here!!! (Reduce case)
                for i in range(len(tag.Right)):
                    stack.pop()
                    top = stack.pop()
                    if top != tag.Right[-(i + 1)]:
                        errors.append(
                            f"Productions reduce doesnt match: {top} != {tag.Right[-(i+1)]}"
                        )

                index = self.goto[stack[-1], tag.Left]
                stack.append(tag.Left)
                stack.append(index)
                output.append(tag)
            elif action == self.OK:
                # Your code here!!! (OK case)
                return output if not finding_conflict else (state, lookahead,
                                                            output)
            # Your code here!!! (Invalid case)
            else:
                errors.append(f"Invalid case: {action}")
                return None if not finding_conflict else (state, lookahead,
                                                          output)

        if cursor == len(tokens):
            errors.append('EOF token missing')

        else:
            errors.append(
                'No valid derivation tree can be built with the given tokens')

コード例 #21

0

ファイルを表示

def unit_testing():
    G = Grammar()
    E = G.NonTerminal('E', True)
    T,F,X,Y = G.NonTerminals('T F X Y')
    plus, minus, star, div, opar, cpar, num = G.Terminals('+ - * / ( ) num')

    E %= T + X, lambda h,s: s[2], None, lambda h,s: s[1]                    
    
    X %= plus + T + X, lambda h,s: s[3], None, None, lambda h,s: s[2] + h[0]
    X %= minus + T + X, lambda h,s: s[3], None, None, lambda h,s: h[0] - s[2] 
    X %= G.Epsilon, lambda h,s: h[0]                                                    
    
    T %= F + Y, lambda h,s: s[2], None, lambda h,s: s[1]                                            
    
    Y %= star + F + Y, lambda h,s: s[3], None, None, lambda h,s: h[0] * s[2]                               
    Y %= div + F + Y, lambda h,s: s[3], None, None, lambda h,s: h[0]/s[2]                                
    Y %= G.Epsilon, lambda h,s: h[0]                                                    
    
    F %= num, lambda h,s: float(s[1]), None                                                    
    F %= opar + E + cpar, lambda h,s: s[2], None, None, None     
    
    xcool = BasicXCool(G)
    tokens = [num, star, num, star, num, plus, num, star, num, plus, num, plus, num, G.EOF]

    M = _build_parsing_table(G,xcool.firsts,xcool.follows)
    assert M == xcool.table ,"Test Error in  build_parsing_table"

    print(" - buider table ;) ")

####################################################################
    parser = _buid_parsing_func(G,M)
    left_parse,error = parser(tokens)
    assert error == []
    assert left_parse == [ 
       Production(E, Sentence(T, X)),
       Production(T, Sentence(F, Y)),
       Production(F, Sentence(num)),
       Production(Y, Sentence(star, F, Y)),
       Production(F, Sentence(num)),
       Production(Y, Sentence(star, F, Y)),
       Production(F, Sentence(num)),
       Production(Y, G.Epsilon),
       Production(X, Sentence(plus, T, X)),
       Production(T, Sentence(F, Y)),
       Production(F, Sentence(num)),
       Production(Y, Sentence(star, F, Y)),
       Production(F, Sentence(num)),
       Production(Y, G.Epsilon),
       Production(X, Sentence(plus, T, X)),
       Production(T, Sentence(F, Y)),
       Production(F, Sentence(num)),
       Production(Y, G.Epsilon),
       Production(X, Sentence(plus, T, X)),
       Production(T, Sentence(F, Y)),
       Production(F, Sentence(num)),
       Production(Y, G.Epsilon),
       Production(X, G.Epsilon),
    ] ,"Test Error in  parser_library.LL1.parser"

    print(" - buider func  ;) ")

###################################################################
    fixed_tokens = {
    '+'  :   Token( '+', plus  ),
    '-'  :   Token( '-', minus ),
    '*'  :   Token( '*', star  ),
    '/'  :   Token( '/', div   ),
    '('  :   Token( '(', opar  ),
    ')'  :   Token( ')', cpar  ),
    }

    def tokenize_text(text):
        tokens = []

        for item in text.split():
            try:
                float(item)
                token = Token(item, num)
            except ValueError:
                try:
                    token = fixed_tokens[item]
                except:
                    raise Exception('Undefined token')
            tokens.append(token)

        eof = Token('$', G.EOF)
        tokens.append(eof)

        return tokens


    text = '5.9 + 4'
    tokens = [ Token('5.9', num), Token('+', plus), Token('4', num), Token('$', G.EOF) ]
    left_parse,error = parser(tokens)
    assert len(left_parse) == 9 and len(error) == 0,"Test Error in  parser func"
    result = _evaluate_parse(left_parse, tokens)
    assert result == 9.9,"Test Error in  eval parser"

    text = '1 - 1 - 1'
    tokens = tokenize_text(text)
    left_parse,error = parser(tokens)
    assert len(left_parse) == 13 and len(error) == 0,"Test Error in  parser func"
    result = _evaluate_parse(left_parse, tokens)
    assert result == -1,"Test Error in  eval parser"

    text = '1 - ( 1 - 1 )'
    tokens = tokenize_text(text)
    left_parse,error = parser(tokens)
    assert len(left_parse) == 18 and len(error) == 0,"Test Error in  parser func"
    result = _evaluate_parse(left_parse, tokens)
    assert result == 1,"Test Error in  eval parser"

    print(" - method eval  ;) ")

#############################################################
    return "LL1"

コード例 #22

0

ファイルを表示

 def __init__(self,  declarations, context=None):
     super().__init__(Token("", "", (0,0))) # symbolic initial token
     self.declarations = declarations
     self.context = context

コード例 #23

0

ファイルを表示

 def __init__(self, idx, typex, init_exp=None, token = Token("", "", (0,0))):
     self.id = idx
     self.type = typex
     self.init_exp = init_exp
     self.token = token

コード例 #24

0

ファイルを表示

 def __call__(self, text):
     return [Token(lex, ttype) for lex, ttype in self._tokenize(text)]

コード例 #25

0

ファイルを表示

ファイル: lexical_analizer.py プロジェクト: peanut-butter-jellyyy/cool-compiler-2021

def tokenize_cool_text(grammar,
                       idx,
                       type_id,
                       string,
                       num,
                       data,
                       errors,
                       printing=False):
    # lexer starts with: lexpos = 0, lineno = 1, last_new_line = 0
    # lexpos: Within token rule functions, this points to the first character after the matched text.
    lexer = lex.lex(module=tokens_rules)
    lexer.last_new_line_pos = 0
    lexer.errors = errors

    # Give the lexer some input
    lexer.input(data)

    lessequal = grammar.__getitem__("<=")
    rarrow = grammar.__getitem__("=>")
    larrow = grammar.__getitem__("<-")

    fixed_tokens_names = {
        t.Name: (t.Name, t)
        for t in grammar.terminals
        if t not in {idx, type_id, string, num, lessequal, rarrow, larrow}
    }

    fixed_tokens_names["larrow"] = ("<-", larrow)
    fixed_tokens_names["rarrow"] = ("=>", rarrow)
    fixed_tokens_names["lessequal"] = ("<=", lessequal)

    tokens = []
    pos_data = []
    # Tokenize
    while True:
        tok = lexer.token()
        if not tok:  # append EOF
            if len(pos_data) > 0:
                last_lineno, last_col = pos_data[-1]
                col = last_col + len(tokens[-1].lex)
            else:  # empty program
                last_lineno = 0
                col = -1
            tokens.append(
                Token("$", grammar.EOF, (last_lineno, find_column(data, col))))
            break  # No more input
        else:
            try:
                tval, ttype = fixed_tokens_names[tok.type]
            except:
                tval = tok.value
                if tok.type == "string":
                    ttype = string
                elif tok.type == "id":
                    ttype = idx
                elif tok.type == "type_id":
                    ttype = type_id
                else:
                    ttype = num
            tokens.append(
                Token(tval, ttype,
                      (tok.lineno, find_column(data, tok.lexpos))))

    if printing:
        pprint_tokens(tokens)
    return tokens