def token(self):
    t = LexToken()
    
    c = self.cur
    if c >= len(self.str):
      return None
      
    c = self.str[c]
    if c == "\\": t.type = "BACKSLASH"
    elif c == "/": t.type = "DIVIDE"
    elif c == "[": t.type = "LSBRACKET"
    elif c == "]": t.type = "RSBRACKET"
    elif c == "*": t.type = "STAR"
    elif c == "\n" or c == "\r": t.type = "LT"
    elif re.match(r"[a-zA-Z0-9_$]+", c) != None:
      t.type = "ID_PART"
    else: t.type = "UCHAR"
    
    t.value = c
    t.lineno = 0
    t.lexpos = self.cur

    self.cur += 1
    
    print(t)
    return t
Beispiel #2
0
 def clone_token(old_token, new_type):
   token = LexToken()
   token.type = new_type
   token.value = old_token.value
   token.lineno = old_token.lineno
   token.lexpos = old_token.lexpos
   return token
Beispiel #3
0
 def _create_token(self, type):
     token = LexToken()
     token.type = type
     token.value = ''
     token.lineno = 0
     token.lexpos = 0
     return token
Beispiel #4
0
def new_dedent(amount, token):
    tok = LexToken()
    tok.type = "DEDENT"
    tok.value = amount
    tok.lineno = token.lineno
    tok.lexpos = token.lexpos
    return tok
Beispiel #5
0
def _new_token(type, token):
    tok = LexToken()
    tok.type = type
    tok.value = token.value
    tok.lineno = token.lineno
    tok.lexpos = token.lexpos
    return tok
Beispiel #6
0
 def _to_yacc(self, token_type, token_data):
     token = LexToken()
     token.type = token_type
     token.value = (token_type, token_data)
     token.lineno = 0 # TODO: file offset
     token.lexpos = 0
     self.__to_yacc(token)
Beispiel #7
0
 def _lextoken( self, type_, value ) :
     tok = LexToken()
     tok.type = type_
     tok.value = value
     tok.lineno = self.lexer.lineno
     tok.lexpos = self.lexer.lexpos
     return tok
 def token(self, value, ty=None):
     t = LexToken()
     t.type = ty if ty != None else value
     t.value = value
     t.lineno = -1
     t.lexpos = -1
     return t
Beispiel #9
0
def p_error(p):
  """
  print(p.lexer.prev.lineno, p.lineno)
  if p.lexer.prev.lineno < p.lineno or p.type == "RBRACKET":
    yacc.errok()
    return
  """
      
  if p == None:
    if not restricted() and glob.g_tried_semi == False:
      t = LexToken()
      t.type = "SEMI"
      t.value = ";"
      t.lexpos = -1
      t.lineno = -1
      glob.g_lexer.push(t)
      glob.g_tried_semi = True
      yacc.errok()
    else:
      sys.stderr.write(glob.g_file + ": error: unexpected end of file\n")
    return
  else:
    glob.g_error_pre = p
    if handle_semi_error(p):
      t = LexToken()
      t.type = "SEMI"
      t.value = ";"
      t.lexpos = p.lexpos
      t.lineno = p.lineno
      #glob.g_lexer.push(t)
      #glob.g_tried_semi = True
      
      yacc.errok()
      glob.g_error = False
      return
    else:      
      glob.g_error = True
      print_err(p)
      return
      
  if glob.g_error:
    print_err(glob.g_error_pre)
    
  glob.g_error_pre = p
  glob.g_error = True
  
  try:
    line = int(p.lineno)
  except:
    line = p.lineno(1)
  
  try:
    lexdata = p.lexer.lexer.lexdata
    sline = p.lexer.lexer.lexpos
  except:
    lexdata = p.lexer.lexdata
    sline = p.lexer.lexpos
  
  sline = lexdata[sline-40:sline+1]
Beispiel #10
0
 def createFunctionDefinition(self, def_token, var_token, params, val_node):
    lamToken = LexToken()
    lamToken.value = 'lambda'
    lamToken.type = 'LAMBDA'
    return LetNode(def_token, [ 
         VariableNode(var_token),
         LambdaNode(lamToken, [ Node(None, None, nodes(params)), val_node ]), 
         ])
Beispiel #11
0
def gen(code):
	for line in code:
		for item in line:
			t = LexToken()
			t.type = item[1]
			t.value = item[0]
			yield t
	yield None
Beispiel #12
0
 def newtok(tok, ttype=None):
     if tok.type != ttype and (ttype != None or tok.value != ""):
         if tok.type != None:
             push(tok)
         tok = LexToken()
         tok.type = ttype
         tok.value = ""
     return tok
Beispiel #13
0
 def _new_token(self, type=None, value=None, lexpos=None, lineno=None) -> LexToken:
     """
     Creates a new lexer token with the given properties.
     :return: a new lexer token with the given properties.
     """
     token = LexToken()
     token.type = type
     token.value = value
     token.lexpos = lexpos
     token.lineno = lineno
Beispiel #14
0
	def _lextoken_from_html(self, html_token):
		token = LexToken()
		token.type = {
			0 : 'HTML_DOCTYPE',
			1 : 'HTML_CHARS',
			2 : 'HTML_WS',
			3 : 'HTML_STARTTAG',
			4 : 'HTML_ENDTAG',
			5 : 'HTML_EMPTYTAG',
			6 : 'HTML_COMMENT',
			7 : 'HTML_PARSEERROR',
		}[html_token['type']]
		# TODO: fix lineno/lexpos
		token.lineno = self.lineno
		token.lexpos = self.lexpos
		
		token.value = {
			'self_closing' : html_token.get('selfClosing', False),
			'name' : html_token.get('name', None),
		}
		
		if isinstance(html_token['data'], (list, tuple)):
			token.value['attrs'] = html_token['data']
			token.value['data'] = ''
			if token.value['name'].lower() in voidElements:
				token.type = 'HTML_VOID_TAG'
		else:
			token.value['data'] = html_token['data']
		
		if token.type == tokenTypes['ParseError']:
			raise SyntaxError("Got HTML Parse Error for token {}".format(html_token))
		
		return token
Beispiel #15
0
 def _new_token(self, new_type, new_value, lineno: int, lexpos: int):
     """
     Creates a new token with the given data.
     :return: new token with the given data.
     """
     token = LexToken()
     token.type = new_type
     token.value = new_value
     token.lineno = lineno
     token.lexpos = lexpos
     return token
Beispiel #16
0
 def p_error(self, p):
     # TODO
     if p:
         self._errors.append(p)
         pass # self._parser.errok()
     else:
         # hack handle eof, don't know why ply behaves this way
         from ply.lex import LexToken
         tok = LexToken()
         tok.value = self.lexer.lexdata[self.lexer.lexpos:]
         tok.lineno = self.lexer.lineno
         tok.type = 'error'
         tok.lexpos = self.lexer.lexpos
         self._parser.errok()
         return tok
Beispiel #17
0
 def t_ANY_STRING(self, t: LexToken) -> LexToken:
     for g in self.string_group_enc:
         str_value = t.lexer.lexmatch.group(g)
         if str_value:
             t.value = str_value
             break
     return t
Beispiel #18
0
def handle_semi_error(p):
  tok = p.lexer.peek()
  if len(p.lexer.peeks) > 1:
    prev = p.lexer.peeks[-2]
  else:
    prev = p.lexer.prev
  cur = p.lexer.cur
  
  if prev == None:
    prev = tok
  if cur == None:
    cur = tok
    
  if type(prev) == list: prev = prev[0]
  if type(cur) == list: cur = cur[0]
  if type(tok) == list: tok = tok[0]
  
  ret = tok == None or cur == None or prev.lineno < tok.lineno
  ret = ret or tok.type == "RBRACKET" or prev.type == "RBRACKET" 
  ret = ret or cur.type == "RBRACKET"
  
  p2 = restricted()
  if p2 != None and not (prev.type in ["RSBRACKET", "RPAREN"] and restrict_prev() == None):
    ret = False
    p = p2
    glob.g_line = p.lineno
    glob.g_lexpos = p.lexpos
    
  if ret and not glob.g_tried_semi:
    t = LexToken()
    t.type = "SEMI"
    t.value = ";"
    t.lineno = cur.lineno
    t.lexpos = cur.lexpos
    
    p.lexer.push(p.lexer.cur)
    p.lexer.push(t)
    
    yacc.errok()
    glob.g_error = False
    glob.g_tried_semi = True
  else:
    ret = False
    glob.g_error = True
    glob.g_error_pre = p
    
  return ret
Beispiel #19
0
	def _parse_chars(self, data):
		m = js_start_rx.match(data)
		
		if m is None:
			return None
		
		pretext = m.group(1)
		start_type = m.group(2)
		
		self.lexpos -= len(data)
		
		if len(pretext):
			pretext_tok = LexToken()
			pretext_tok.type = 'HTML_CHARS'
			pretext_tok.value = pretext
			pretext_tok.lineno = self.lineno - pretext.count("\n")
			pretext_tok.lexpos = self.lexpos
			self.next_tokens.append(pretext_tok)
			self.lexpos += len(pretext)
		
		start_tok = LexToken()
		start_tok.type = self.tbtype[start_type]
		start_tok.value = start_type
		start_tok.lineno = self.lineno
		start_tok.lexpos = self.lexpos
		self.next_tokens.append(start_tok)
		self.lexpos += len(start_type)
		
		js_lexer = JSLexer()
		js_lexer.input(data[m.end(2):])
		for t in js_lexer:
			t.lineno += self.lineno - 1
			t.lexpos = self.lexpos
			self.lexpos += js_lexer.lexer.lexpos
			
			if t.type in ('EXPRESSION_TERMINATOR', 'ESCAPED_TERMINATOR', 'JS_TERMINATOR'):
				if t.type != self.ttype[start_type]:
					raise SyntaxError("Expected {} but got {} in char data `{}`".format(self.ttype[start_type], t.type, data))
				self.next_tokens.append(t)
				break
			
			self.next_tokens.append(t)
		remaining_text = data[m.end(2) + js_lexer.lexer.lexpos:]
		self.lexpos += len(remaining_text)
		return remaining_text
Beispiel #20
0
    def gen_token(value, type, line, lexpos):
        t = LexToken()
        t.value = value
        t.type = type
        t.line = line
        t.lexpos = lexpos
        t.lexer = self

        return t
Beispiel #21
0
 def _gen_token(self, type, value='', lnum=None, position=0, lexpos=None):
     """
     Generates a LexToken with the paramaters given.
     """
     tok = LexToken()
     tok.lexer = self.lex
     tok.type = type
     tok.value = value
     tok.line_position = position
     # I think this will work...
     tok.lineno = self.lex.lineno if lnum is None else lnum
     tok.lexpos = self.lex.lexpos if lexpos is None else lexpos
     return tok
Beispiel #22
0
 def t_DECIMAL_ID(token: LexToken) -> LexToken:
     r'([1-9][0-9]+|[0-9])\.[1-9][0-9]*'
     token.value = tuple(map(int, token.value.split('.')))
     token.lexer.begin('v0')
     return token
def t_mls_newline(t: lex.LexToken) -> lex.LexToken:  # noqa: N802
    r"\n+"
    t.lexer.lineno += len(t.value)
    t.lexer.linestart = t.lexer.lexpos
    t.type = "MLS"
    return t
Beispiel #24
0
#             dentstack += [tokens[i].value]
#         elif tokens[i].value < dentstack[-1]:
#             newtok.type = 'DEDENT'
#             dentstack += [tokens[i].value]
#         else:
#             newtok.type = 'NEWLINE'
#     
#         fixedtokens += [newtok]
#     else:
#         fixedtokens += [tokens[i]]

for i in range(len(tokens)-1):
    print ">>>", tokens[i].type, tokens[i].value
    fixedtokens += [tokens[i]]
    if tokens[i].type == 'NEWLINE':
        newtok = LexToken()
        newtok.value = None
        newtok.lineno = tokens[i].lineno
        newtok.lexpos = tokens[i].lexpos
        if tokens[i].value > dentstack[-1]:
            newtok.type = 'INDENT'
            dentstack += [tokens[i].value]
        elif tokens[i].value < dentstack[-1]:
            newtok.type = 'DEDENT'
            dentstack = dentstack[:-1]
        
        if 'type' in newtok.__dict__:
            fixedtokens += [newtok]

fixedtokens += [tokens[-1]]
dedent = LexToken()
Beispiel #25
0
 def t_time_array(self, t: LexToken):
     r'.*? \[.*\]'
     t.type = 'CMDARRAY'
     t.value = re.findall(r'\[.*\]', t.value)[0]
     # print(t)
     return t
Beispiel #26
0
def _new_token(type, value, pos):
    o = LexToken()
    o.type = type
    o.value = value
    o.lineno, o.lexpos = pos
    return o
Beispiel #27
0
 def t_c7_DEPREL(token: LexToken) -> LexToken:
     r'[^\n\t ]+'
     token.value = None if token.value == '_' else token.value
     token.lexer.begin('v7')
     return token
Beispiel #28
0
 def t_c4_XPOS(token: LexToken) -> LexToken:
     r'[^\n\t ]+'
     token.value = None if token.value == '_' else token.value
     token.lexer.begin('v4')
     return token
    def parse(self):
        i = 0
        data = self.lexdata
        states = self.bracketstates
        prev = self.get_prev
        next = self.get_next

        toks = []
        tok = LexToken()
        tok.type = None
        tok.value = ""
        stack = []

        def escape(i1):
            if i1 == None: i1 = i
            return prev(i1) == "\\" and prev(i1, 2) != "\\"

        def inc_i(i, off=1):
            for j in range(abs(off)):
                if i < 0 or i >= len(data): break

                if data[i] in ["\n", "\r"]:
                    self.lineno += 1
                self.lexpos += 1

                if off < 0:
                    i -= 1
                else:
                    i += 1

            return i

        def push(tok):
            if tok.type == None:
                traceback.print_stack()
                print("ERROR: None token!")
                return

            tok.lineno = self.lineno
            tok.lexpos = self.lexpos
            tok.lexer = self
            toks.append(tok)
            self.tokens.append(tok)
            #print(tok)

        def newtok(tok, ttype=None):
            if tok.type != ttype and (ttype != None or tok.value != ""):
                if tok.type != None:
                    push(tok)
                tok = LexToken()
                tok.type = ttype
                tok.value = ""
            return tok

        in_set = 0
        while i < len(data):
            cp = prev(i)
            cc = data[i]
            cn = next(i)

            handled = False
            if not escape(i):
                if cc == "$":
                    tok = newtok(tok)

                    if cn == "{":
                        tok.type = "LBK"
                        tok.value = "${"
                        i = inc_i(i)
                        in_set += 1
                        for k in states.keys():
                            states[k].append(0)
                    else:
                        tok.type = "SPECIAL"
                        tok.value = "$"

                    handled = True
                elif cc == "}" and cn == "$":
                    tok = newtok(tok)
                    tok.type = "RBK"
                    tok.value = "$}"
                    i = inc_i(i)
                    in_set -= 1

                    for k in states.keys():
                        states[k].pop(-1)

                    handled = True
                elif cp == "*" and cn == "$":
                    tok = newtok(tok)
                    tok.type = "STAR"
                    tok.value = "*"

                    i = inc_i(i)
                    handled = True
                elif cp == "^" and cn == "$":
                    tok = newtok(tok)
                    tok.type = "NOT"
                    tok.value = "^"

                    i = inc_i(i)
                    handled = True
                elif cp == "|" and cn == "$":
                    tok = newtok(tok)
                    tok.type = "OR"
                    tok.value = "|"

                    i = inc_i(i)
                    handled = True
                elif cc == "," and in_set:
                    k = 0

                    for t in self.bracketstates.keys():
                        s = self.bracketstates[t]

                        if s[-1] < 0:
                            #print(t, prev(i, 2), cp, cc, cn, "end")
                            pass

                        k += s[-1]

                    #print(k)
                    if k == 0:
                        tok = newtok(tok)
                        tok.type = "COMMA"
                        tok.value = ","
                        handled = True

                if not handled and in_set > 0:
                    if cc in self.bracketstates:
                        states[cc][-1] += 1
                    elif cc in self.bracket_endchars:
                        states[self.bracket_endchars[cc]][-1] -= 1

            def is_word_char(cc):
                return re_word_pat.match(cc) != None

            if not handled:
                cp = prev(i)
                if cp == "$" and tok.type not in ["WORD", "CODE"
                                                  ] and is_word_char(cc):
                    tok = newtok(tok)
                    tok.type = "WORD"
                    tok.value = cc

                    while i < len(data) and re_word_pat.match(
                            tok.value).span() == (0, len(tok.value)):
                        i = inc_i(i)
                        cc = data[i]
                        tok.value += cc

                    i = inc_i(i, -1)
                    tok.value = tok.value[:-1]
                    tok = newtok(tok)
                else:
                    tok = newtok(tok, "CODE")
                    tok.value += cc

            i = inc_i(i)

        if tok.type != None:
            push(tok)
Beispiel #30
0
def p_error(p):
  """
  print(p.lexer.prev.lineno, p.lineno)
  if p.lexer.prev.lineno < p.lineno or p.type == "RBRACKET":
    yacc.errok()
    return
  """
  if glob.g_production_debug:
    if p == None:
      print("in p_error")
    else:
      print("in p_error", p.type, p.value)
      
  if p == None:
    if not restricted() and glob.g_tried_semi == False:
      t = LexToken()
      t.type = "SEMI"
      t.value = ";"
      t.lexpos = -1
      t.lineno = -1
      glob.g_lexer.push(t)
      glob.g_tried_semi = True
      yacc.errok()
    else:
      sys.stderr.write(glob.g_file + ": error: unexpected end of file\n")
    return
  else:
    glob.g_error_pre = p
    if handle_semi_error(p):
      t = LexToken()
      t.type = "SEMI"
      t.value = ";"
      t.lexpos = p.lexpos
      t.lineno = p.lineno
      #glob.g_lexer.push(t)
      #glob.g_tried_semi = True
      
      yacc.errok()
      glob.g_error = False
      if glob.g_production_debug or glob.g_semi_debug:
        linestr, colstr = err_find_line(p.lexer, p.lexpos);
        lineno = p.lineno if type(p.lineno) == int else p.lineno(0)
        
        sys.stdout.write("handled semicolon error : %d\n" % lineno)
        sys.stdout.write(linestr+"\n")
        sys.stdout.write(colstr+"\n")
      return
    else:      
      glob.g_error = True
      print_err(p)
      return
      
  if glob.g_error:
    print_err(glob.g_error_pre)
    
  glob.g_error_pre = p
  glob.g_error = True
  
  try:
    line = int(p.lineno)
  except:
    line = p.lineno(1)
  
  try:
    lexdata = p.lexer.lexer.lexdata
    sline = p.lexer.lexer.lexpos
  except:
    lexdata = p.lexer.lexdata
    sline = p.lexer.lexpos
  
  sline = lexdata[sline-40:sline+1]
Beispiel #31
0
def handle_semi_error(p):
  if glob.g_production_debug:
    print("in handle_semi_error")
    
  tok = p.lexer.peek()
  if len(p.lexer.peeks) > 1:
    prev = p.lexer.peeks[-2]
  else:
    prev = p.lexer.prev
  cur = p.lexer.cur
  
  if prev == None:
    prev = tok
  if cur == None:
    cur = tok
    
  #print("p", prev)
  #print("c", cur)
  #print("t", tok)
  if type(prev) == list: prev = prev[0]
  if type(cur) == list: cur = cur[0]
  if type(tok) == list: tok = tok[0]
  
  if p != None and type(p) != LexToken:
    print(list(p))
  
  ret = tok == None or cur == None or prev.lineno < tok.lineno
  ret = ret or tok.type == "RBRACKET" or prev.type == "RBRACKET" 
  ret = ret or cur.type == "RBRACKET"
  
  p2 = restricted()
  if p2 != None and not (prev.type in ["RSBRACKET", "RPAREN"] and restrict_prev() == None):
    ret = False
    p = p2
    print(prev.type, cur.type, p2, restrict_prev())
    print("didn't handle semi error")
    glob.g_line = p.lineno
    glob.g_lexpos = p.lexpos
    #print_err(p)
    
  if ret and not glob.g_tried_semi:
    #"""
    t = LexToken()
    t.type = "SEMI"
    t.value = ";"
    t.lineno = cur.lineno
    t.lexpos = cur.lexpos
    #"""
    
    p.lexer.push(p.lexer.cur)
    p.lexer.push(t)
    
    yacc.errok()
    glob.g_error = False
    glob.g_tried_semi = True
  else:
    ret = False
    glob.g_error = True
    glob.g_error_pre = p
    #for l in prodname_log[-5:-1]:
    #  print(l)
      
    #print("a real error occurred 2!?")
    #print_err(p)
    
  return ret
Beispiel #32
0
def t_NUMBER(t: lex.LexToken):
    r'\d+'
    t.value = int(t.value)
    return t
def t_FLOAT(t: lex.LexToken) -> lex.LexToken:  # noqa: N802
    r"[-]?[0-9]*[.][0-9]+"
    t.value = float(t.value)
    return t
Beispiel #34
0
 def t_NEWLINE(self, t: LexToken) -> LexToken:
     t.lexer.lineno += len(t.value)
     t.value = t.value[0]
     return t
    def token(self):
        t = LexToken()

        c = self.cur
        if c >= len(self.str):
            return None

        c = self.str[c]
        if c == "\\": t.type = "BACKSLASH"
        elif c == "/": t.type = "DIVIDE"
        elif c == "[": t.type = "LSBRACKET"
        elif c == "]": t.type = "RSBRACKET"
        elif c == "*": t.type = "STAR"
        elif c == "\n" or c == "\r": t.type = "LT"
        elif re.match(r"[a-zA-Z0-9_$]+", c) != None:
            t.type = "ID_PART"
        else:
            t.type = "UCHAR"

        t.value = c
        t.lineno = 0
        t.lexpos = self.cur

        self.cur += 1

        print(t)
        return t
Beispiel #36
0
 def t_INTEGER_ID(token: LexToken) -> LexToken:
     r'[1-9][0-9]*'
     token.value = int(token.value)
     token.lexer.begin('v0')
     return token
Beispiel #37
0
 def t_c3_UPOS(token: LexToken) -> LexToken:
     # pylint: disable=missing-docstring
     token.value = None if token.value == '_' else token.value
     token.lexer.begin('v3')
     return token
Beispiel #38
0
 def t_ANY_NUMBER(self, t: LexToken) -> LexToken:
     t.value = int(t.value)
     return t
Beispiel #39
0
 def t_c6_HEAD(token: LexToken) -> LexToken:
     r'([1-9][0-9]+|[0-9])|_'
     token.value = None if token.value == '_' else int(token.value)
     token.lexer.begin('v6')
     return token
Beispiel #40
0
 def t_CONSTANT(self, token_: LexToken) -> LexToken:
     token_.type = self.keywords.get(token_.value, "CONSTANT")
     return token_
Beispiel #41
0
 def t_c9_MISC(token: LexToken) -> LexToken:
     r'[^\n\t ]+'
     token.value = None if token.value == '_' else token.value
     token.lexer.begin('v9')
     return token
 def lexToken(self, typ, val, line, lexpos=0):
     # Method helper to construct a LexToken
     lt = LexToken()
     lt.type, lt.value, lt.lineno, lt.lexpos = typ, val, line, lexpos
     return lt
Beispiel #43
0
 def t_ANY_IRI(self, t: LexToken) -> LexToken:
     lexmatch = t.lexer.lexmatch
     t.value = (lexmatch.group('prefix_name'), lexmatch.group('prefix_ind'), lexmatch.group('iri_value'))
     return t
Beispiel #44
0
    def parse(self):
        i = 0
        data = self.lexdata
        states = self.bracketstates
        prev = self.get_prev
        next = self.get_next

        toks = []
        tok = LexToken()
        tok.type = None
        tok.value = ""
        stack = []

        def escape(i1):
            if i1 == None:
                i1 = i
            return prev(i1) == "\\" and prev(i1, 2) != "\\"

        def inc_i(i, off=1):
            for j in range(abs(off)):
                if i < 0 or i >= len(data):
                    break

                if data[i] in ["\n", "\r"]:
                    self.lineno += 1
                self.lexpos += 1

                if off < 0:
                    i -= 1
                else:
                    i += 1

            return i

        def push(tok):
            if tok.type == None:
                traceback.print_stack()
                print("ERROR: None token!")
                return

            tok.lineno = self.lineno
            tok.lexpos = self.lexpos
            tok.lexer = self
            toks.append(tok)
            self.tokens.append(tok)
            # print(tok)

        def newtok(tok, ttype=None):
            if tok.type != ttype and (ttype != None or tok.value != ""):
                if tok.type != None:
                    push(tok)
                tok = LexToken()
                tok.type = ttype
                tok.value = ""
            return tok

        in_set = 0
        while i < len(data):
            cp = prev(i)
            cc = data[i]
            cn = next(i)

            handled = False
            if not escape(i):
                if cc == "$":
                    tok = newtok(tok)

                    if cn == "{":
                        tok.type = "LBK"
                        tok.value = "${"
                        i = inc_i(i)
                        in_set += 1
                        for k in states.keys():
                            states[k].append(0)
                    else:
                        tok.type = "SPECIAL"
                        tok.value = "$"

                    handled = True
                elif cc == "}" and cn == "$":
                    tok = newtok(tok)
                    tok.type = "RBK"
                    tok.value = "$}"
                    i = inc_i(i)
                    in_set -= 1

                    for k in states.keys():
                        states[k].pop(-1)

                    handled = True
                elif cp == "*" and cn == "$":
                    tok = newtok(tok)
                    tok.type = "STAR"
                    tok.value = "*"

                    i = inc_i(i)
                    handled = True
                elif cp == "^" and cn == "$":
                    tok = newtok(tok)
                    tok.type = "NOT"
                    tok.value = "^"

                    i = inc_i(i)
                    handled = True
                elif cp == "|" and cn == "$":
                    tok = newtok(tok)
                    tok.type = "OR"
                    tok.value = "|"

                    i = inc_i(i)
                    handled = True
                elif cc == "," and in_set:
                    k = 0

                    for t in self.bracketstates.keys():
                        s = self.bracketstates[t]

                        if s[-1] < 0:
                            # print(t, prev(i, 2), cp, cc, cn, "end")
                            pass

                        k += s[-1]

                    # print(k)
                    if k == 0:
                        tok = newtok(tok)
                        tok.type = "COMMA"
                        tok.value = ","
                        handled = True

                if not handled and in_set > 0:
                    if cc in self.bracketstates:
                        states[cc][-1] += 1
                    elif cc in self.bracket_endchars:
                        states[self.bracket_endchars[cc]][-1] -= 1

            def is_word_char(cc):
                return re_word_pat.match(cc) != None

            if not handled:
                cp = prev(i)
                if cp == "$" and tok.type not in ["WORD", "CODE"] and is_word_char(cc):
                    tok = newtok(tok)
                    tok.type = "WORD"
                    tok.value = cc

                    while i < len(data) and re_word_pat.match(tok.value).span() == (0, len(tok.value)):
                        i = inc_i(i)
                        cc = data[i]
                        tok.value += cc

                    i = inc_i(i, -1)
                    tok.value = tok.value[:-1]
                    tok = newtok(tok)
                else:
                    tok = newtok(tok, "CODE")
                    tok.value += cc

            i = inc_i(i)

        if tok.type != None:
            push(tok)
Beispiel #45
0
def t_IDENTIFIER(t: LexToken):
    r'[A-Za-z_][A-Za-z_0-9]*'
    t.type = reserved.get(t.value, 'IDENTIFIER')
    return t
Beispiel #46
0
    def token(self):
        if self.tokens:
            return self.tokens.pop(0)

        token = None
        while not token:
            if self.lineno >= self.line_count:
                return None

            line = self.lines[self.lineno]
            line_size = len(line)
            line = line.strip()

            if line in literals:
                token = LexToken()
                token.type = line
                token.value = line
                token.lineno = self.lineno
                token.lexpos = self.lexpos
            elif line.startswith('0x'):
                pass
            elif line.startswith(':'):
                cap = line[1:]
                skip = cap not in self.used_tokens and cap > ''

                if skip:
                    line = self.lines[self.lineno]
                    skip_break = line[:len(line) - len(line.lstrip())] + '.'
                    while skip:
                        line = self.lines[self.lineno]
                        skip = not line.startswith(skip_break)
                        self.lexpos += len(line) + 1
                        self.lineno += 1
                    continue

                token = LexToken()
                token.type = 'CAP'
                token.value = cap
                token.lineno = self.lineno
                token.lexpos = self.lexpos
            else:
                prop, _, value = line.partition(':')
                skip = prop not in self.used_tokens and prop > '' and value > ''

                if value and not skip:
                    token = LexToken()
                    token.type = 'CONST'
                    token.value = value
                    token.lineno = self.lineno
                    token.lexpos = self.lexpos + line_size - len(value)
                    self.tokens.append(token)
                if prop and not skip:
                    token = LexToken()
                    token.type = 'PROP'
                    token.value = prop
                    token.lineno = self.lineno
                    token.lexpos = self.lexpos

            self.lexpos += line_size + 1
            self.lineno += 1

        return token
Beispiel #47
0
def t_string_END(t: LexToken):
    r'"'
    t.value = t.lexer.lexdata[t.lexer.code_start:t.lexer.lexpos - 1]
    t.type = "STRING_LITERAL"
    t.lexer.begin('INITIAL')
    return t
 def lexToken(self, typ, val, line, lexpos=0):
     # Method helper to construct a LexToken
     lt = LexToken()
     lt.type, lt.value, lt.lineno, lt.lexpos = typ, val, line, lexpos
     return lt
Beispiel #49
0
def t_DOUBLE_LITERAL(t: LexToken):
    r'[0-9]+\.[0-9]'
    t.value = float(t.value)
    return t
def t_INT(t: lex.LexToken) -> lex.LexToken:  # noqa: N802
    r"[-]?[0-9]+"
    t.value = int(t.value)
    return t
Beispiel #51
0
 def t_IDENTIFIER(self, token_: LexToken) -> LexToken:
     token_.type = self.keywords.get(token_.value, "IDENTIFIER")
     return token_
Beispiel #52
0
def t_INT(t: lex.LexToken) -> lex.LexToken:
    t.value = int(t.value)

    return t
Beispiel #53
0
def indent_generator(toks):
    """Post process the given stream of tokens to generate INDENT/DEDENT
    tokens.
    
    Note
    ----
    Each generated token's value is the total amount of spaces from the
    beginning of the line.
    
    The way indentation tokens are generated is similar to how it works in
    python."""
    stack = [0]

    # Dummy token to track the token just before the current one
    former = LexToken()
    former.type = "NEWLINE"
    former.value = "dummy"
    former.lineno = 0
    former.lexpos = -1

    def generate_dedent(stck, tok):
        amount = stck.pop(0)
        return new_dedent(amount, tok)

    for token in toks:
        if former.type == "NEWLINE":
            if token.type == "WS":
                indent = len(token.value)
            else:
                indent = 0

            if indent == stack[0]:
                former = token
                if indent > 0:
                    token = six.advance_iterator(toks)
                    former = token
                    yield token
                else:
                    yield former
            elif indent > stack[0]:
                stack.insert(0, indent)
                ind = new_indent(indent, token)
                former = ind
                yield ind
            elif indent < stack[0]:
                if not indent in stack:
                    raise ValueError("Wrong indent at line %d" % token.lineno)
                while stack[0] > indent:
                    former = generate_dedent(stack, token)
                    yield former
                if stack[0] > 0:
                    former = six.advance_iterator(toks)
                    yield former
                else:
                    former = token
                    yield token
        else:
            former = token
            yield token

    # Generate additional DEDENT so that the number of INDENT/DEDENT always
    # match
    while len(stack) > 1:
        former = generate_dedent(stack, token)
        yield former
Beispiel #54
0
 def t_NEWLINE(self, token_: LexToken) -> LexToken:
     r"""\n"""
     token_.lexer.lineno += 1
     token_.type = 'NEWLINE'
     return token_
Beispiel #55
0
 def t_COMMENT(token: LexToken) -> LexToken:
     r'[#][^\n]*'
     token.value = token.value[1:].strip()
     return token
Beispiel #56
0
 def t_RANGE_ID(token: LexToken) -> LexToken:
     r'[1-9][0-9]*-[1-9][0-9]*'
     token.value = tuple(map(int, token.value.split('-')))
     token.lexer.begin('v0')
     return token
Beispiel #57
0
def _new_token(type, value, pos):
    o = LexToken()
    o.type = type
    o.value = value
    o.lineno, o.lexpos = pos
    return o
Beispiel #58
0
def t_STR_end(t: lex.LexToken) -> lex.LexToken:
    t.value = t.lexer.lexdata[t.lexer.string_start: t.lexer.lexpos]
    t.type = 'STRING'
    t.lexer.pop_state()

    return t