def t_ID(t: lex.LexToken) -> lex.LexToken: if t.value[0] in ('t', 'f') and t.value.lower() in ('true', 'false'): t.type = 'BOOL' else: t.type = reserved.get(t.value.lower(), 'ID') return t
def _lextoken_from_html(self, html_token): token = LexToken() token.type = { 0 : 'HTML_DOCTYPE', 1 : 'HTML_CHARS', 2 : 'HTML_WS', 3 : 'HTML_STARTTAG', 4 : 'HTML_ENDTAG', 5 : 'HTML_EMPTYTAG', 6 : 'HTML_COMMENT', 7 : 'HTML_PARSEERROR', }[html_token['type']] # TODO: fix lineno/lexpos token.lineno = self.lineno token.lexpos = self.lexpos token.value = { 'self_closing' : html_token.get('selfClosing', False), 'name' : html_token.get('name', None), } if isinstance(html_token['data'], (list, tuple)): token.value['attrs'] = html_token['data'] token.value['data'] = '' if token.value['name'].lower() in voidElements: token.type = 'HTML_VOID_TAG' else: token.value['data'] = html_token['data'] if token.type == tokenTypes['ParseError']: raise SyntaxError("Got HTML Parse Error for token {}".format(html_token)) return token
def p_error(p): """ print(p.lexer.prev.lineno, p.lineno) if p.lexer.prev.lineno < p.lineno or p.type == "RBRACKET": yacc.errok() return """ if p == None: if not restricted() and glob.g_tried_semi == False: t = LexToken() t.type = "SEMI" t.value = ";" t.lexpos = -1 t.lineno = -1 glob.g_lexer.push(t) glob.g_tried_semi = True yacc.errok() else: sys.stderr.write(glob.g_file + ": error: unexpected end of file\n") return else: glob.g_error_pre = p if handle_semi_error(p): t = LexToken() t.type = "SEMI" t.value = ";" t.lexpos = p.lexpos t.lineno = p.lineno #glob.g_lexer.push(t) #glob.g_tried_semi = True yacc.errok() glob.g_error = False return else: glob.g_error = True print_err(p) return if glob.g_error: print_err(glob.g_error_pre) glob.g_error_pre = p glob.g_error = True try: line = int(p.lineno) except: line = p.lineno(1) try: lexdata = p.lexer.lexer.lexdata sline = p.lexer.lexer.lexpos except: lexdata = p.lexer.lexdata sline = p.lexer.lexpos sline = lexdata[sline-40:sline+1]
def t_ID(t: LexToken): r"""[_a-zA-Z][_a-zA-Z0-9]*""" # upper the token because MyLang is case insensitive t.value = t.value.upper() # handle special condition if t.value == "TRUE" or t.value == "FALSE": t.type = "BOOL" t.value = str2bool(t.value) elif t.value in constants.reserved: t.type = t.value return t
def t_helpline_line(self, t: LexToken): r'[ \t]*.+\n' if t.lexer.help_indent == None: t.lexer.help_indent = re.findall('^[ \t]*', t.value)[0] else: if not t.value.startswith(t.lexer.help_indent): t.lexer.skip(-len(t.value)) t.lexer.pop_state() t.type = 'CONFIG_HELP_END' return t t.type = "CONFIG_HELP_LINE" t.lexer.lineno += 1 return t
def t_ID(t: lex.LexToken) -> lex.LexToken: # noqa: N802 r"[a-zA-Z_][a-zA-Z_0-9-]*" t.type = reserved.get(t.value, "ID") # Check for reserved words if t.value[0].isupper(): t.type = "CID" lexer = t.lexer end = lexer.lexpos - lexer.linestart + 1 (s, e) = lexer.lexmatch.span() start = end - (e - s) t.value = LocatableString( t.value, Range(lexer.inmfile, lexer.lineno, start, lexer.lineno, end), lexer.lexpos, lexer.namespace) return t
def new_dedent(amount, token): tok = LexToken() tok.type = "DEDENT" tok.value = amount tok.lineno = token.lineno tok.lexpos = token.lexpos return tok
def clone_token(old_token, new_type): token = LexToken() token.type = new_type token.value = old_token.value token.lineno = old_token.lineno token.lexpos = old_token.lexpos return token
def str2tok(tok_string): t = LexToken() t.type, t.value, t.lineno, t.lexpos = re.fullmatch( pattern=r'LexToken\(([A-Z_]+),\'([^\']+)\',([0-9]+),([0-9]+)\)', string=tok_string ).groups() return t
def t_IDENTIFIER(self, t: LexToken) -> LexToken: r"[a-zA-Z_][a-zA-Z0-9_]*" if t.value in self.keywords: # May match keywords. t.type = t.value.upper() return t return t
def _new_token(type, token): tok = LexToken() tok.type = type tok.value = token.value tok.lineno = token.lineno tok.lexpos = token.lexpos return tok
def _create_token(self, type): token = LexToken() token.type = type token.value = '' token.lineno = 0 token.lexpos = 0 return token
def emit_autoend(self): tok = LexToken() tok.type = "AUTO_END" tok.value = "" tok.lineno = self.lineno tok.lexpos = self.lexpos return tok
def _to_yacc(self, token_type, token_data): token = LexToken() token.type = token_type token.value = (token_type, token_data) token.lineno = 0 # TODO: file offset token.lexpos = 0 self.__to_yacc(token)
def _lextoken( self, type_, value ) : tok = LexToken() tok.type = type_ tok.value = value tok.lineno = self.lexer.lineno tok.lexpos = self.lexer.lexpos return tok
def token(self, value, ty=None): t = LexToken() t.type = ty if ty != None else value t.value = value t.lineno = -1 t.lexpos = -1 return t
def make_tok(type_, value, lineno, lexpos): token = LexToken() token.type = type_ token.value = value token.lineno = lineno token.lexpos = lexpos return token
def t_provides_item(self, t: LexToken): r'[^ \n]+' t.type = 'PROVIDES_ITEM' version = re.findall(r'([a-zA-Z-_\d.]+)([>=<]+)([\d.]+)', t.value) if len(version): # version syntax t.value = version[0] return t
def correct_tag_name(original): token = LexToken() token.type = original.type token.value = original.value[:-1] token.lineno = original.lineno token.lexpos = original.lexpos token.lexer = original.lexer return token
def p_class0(self, p): """ class : CLASS TYPEID '{' feature_list '}' ';' """ tok = LexToken() tok.type = "OBJECTID" tok.value = "object" tok.lineno = -1 #this is just inserted not present tok.lexpos = -1 p[0] = class_(p[2], tok, p[4], self.filename)
def createFunctionDefinition(self, def_token, var_token, params, val_node): lamToken = LexToken() lamToken.value = 'lambda' lamToken.type = 'LAMBDA' return LetNode(def_token, [ VariableNode(var_token), LambdaNode(lamToken, [Node(None, None, nodes(params)), val_node]), ])
def remove_first_and_last_char(original): token = LexToken() token.type = original.type token.value = original.value[1:-1] token.lineno = original.lineno token.lexpos = original.lexpos token.lexer = original.lexer return token
def t_config_help(self, t: LexToken): r'(help|\-{3}help\-{3})\n' t.lexer.help_indent = None t.lexer.push_state('helpline') t.type = "CONFIG_HELP" t.value = t.value[0:-1] t.lexer.lineno += 1 return t
def gen(code): for line in code: for item in line: t = LexToken() t.type = item[1] t.value = item[0] yield t yield None
def new_tok(lexpos, tok_type, lineno, value): # Create a token for return tok = LexToken() tok.value = value tok.lineno = lineno tok.lexpos = lexpos tok.type = tok_type return tok
def t_ANY_COMMENT(self, token_: LexToken) -> LexToken: r"""[\%]|[\n]""" if token_.lexer.current_state() == 'string': token_.type = 'NEWLINE' token_.lexer.begin('INITIAL') else: token_.lexer.begin('string') return token_
def newtok(tok, ttype=None): if tok.type != ttype and (ttype != None or tok.value != ""): if tok.type != None: push(tok) tok = LexToken() tok.type = ttype tok.value = "" return tok
def createFunctionDefinition(self, def_token, var_token, params, val_node): lamToken = LexToken() lamToken.value = 'lambda' lamToken.type = 'LAMBDA' return LetNode(def_token, [ VariableNode(var_token), LambdaNode(lamToken, [ Node(None, None, nodes(params)), val_node ]), ])
def make_ws(tok): lt = LexToken() lt.type = "CPP_WS" lt.value = "\n" lt.lexer = lexer lt.lineno = tok.lineno lt.lexpos = tok.lexpos return lt
def _parse_chars(self, data): m = js_start_rx.match(data) if m is None: return None pretext = m.group(1) start_type = m.group(2) self.lexpos -= len(data) if len(pretext): pretext_tok = LexToken() pretext_tok.type = 'HTML_CHARS' pretext_tok.value = pretext pretext_tok.lineno = self.lineno - pretext.count("\n") pretext_tok.lexpos = self.lexpos self.next_tokens.append(pretext_tok) self.lexpos += len(pretext) start_tok = LexToken() start_tok.type = self.tbtype[start_type] start_tok.value = start_type start_tok.lineno = self.lineno start_tok.lexpos = self.lexpos self.next_tokens.append(start_tok) self.lexpos += len(start_type) js_lexer = JSLexer() js_lexer.input(data[m.end(2):]) for t in js_lexer: t.lineno += self.lineno - 1 t.lexpos = self.lexpos self.lexpos += js_lexer.lexer.lexpos if t.type in ('EXPRESSION_TERMINATOR', 'ESCAPED_TERMINATOR', 'JS_TERMINATOR'): if t.type != self.ttype[start_type]: raise SyntaxError("Expected {} but got {} in char data `{}`".format(self.ttype[start_type], t.type, data)) self.next_tokens.append(t) break self.next_tokens.append(t) remaining_text = data[m.end(2) + js_lexer.lexer.lexpos:] self.lexpos += len(remaining_text) return remaining_text
def p_expr4(self, p): """ expr : OBJECTID '(' expr_arg_list ')' """ tok = LexToken() tok.type = "OBJECTID" tok.value = "self" tok.lineno = -1 #this is just inserted not present tok.lexpos = -1 p[0] = dispatch(object_(tok), p[1], p[3]) p[0].lineno = p.lineno(1)
def p_expr5(self, p): """ expr : OBJECTID '(' ')' """ tok = LexToken() tok.type = "OBJECTID" tok.value = "self" tok.lineno = -1 #this is just inserted not present tok.lexpos = -1 p[0] = dispatch(object_(tok), p[1], nil_Expressions()) p.lineno = p.lineno(1)
def gen_token(value, type, line, lexpos): t = LexToken() t.value = value t.type = type t.line = line t.lexpos = lexpos t.lexer = self return t
def _new_token(self, type=None, value=None, lexpos=None, lineno=None) -> LexToken: """ Creates a new lexer token with the given properties. :return: a new lexer token with the given properties. """ token = LexToken() token.type = type token.value = value token.lexpos = lexpos token.lineno = lineno
def _new_token(self, new_type, new_value, lineno: int, lexpos: int): """ Creates a new token with the given data. :return: new token with the given data. """ token = LexToken() token.type = new_type token.value = new_value token.lineno = lineno token.lexpos = lexpos return token
def to_tokens(self, token_list): result = [] for values in token_list: token = LexToken() token.type = values[0] token.value = values[1] token.lineno = values[2] token.lexpos = values[3] token.lexer = self.lexer result.append(token) return result
def token(self): t = LexToken() c = self.cur if c >= len(self.str): return None c = self.str[c] if c == "\\": t.type = "BACKSLASH" elif c == "/": t.type = "DIVIDE" elif c == "[": t.type = "LSBRACKET" elif c == "]": t.type = "RSBRACKET" elif c == "*": t.type = "STAR" elif c == "\n" or c == "\r": t.type = "LT" elif re.match(r"[a-zA-Z0-9_$]+", c) != None: t.type = "ID_PART" else: t.type = "UCHAR" t.value = c t.lineno = 0 t.lexpos = self.cur self.cur += 1 print(t) return t
def _gen_token(self, type, value='', lnum=None, position=0, lexpos=None): """ Generates a LexToken with the paramaters given. """ tok = LexToken() tok.lexer = self.lex tok.type = type tok.value = value tok.line_position = position # I think this will work... tok.lineno = self.lex.lineno if lnum is None else lnum tok.lexpos = self.lex.lexpos if lexpos is None else lexpos return tok
def p_error(self, p): # TODO if p: self._errors.append(p) pass # self._parser.errok() else: # hack handle eof, don't know why ply behaves this way from ply.lex import LexToken tok = LexToken() tok.value = self.lexer.lexdata[self.lexer.lexpos:] tok.lineno = self.lexer.lineno tok.type = 'error' tok.lexpos = self.lexer.lexpos self._parser.errok() return tok
def handle_semi_error(p): tok = p.lexer.peek() if len(p.lexer.peeks) > 1: prev = p.lexer.peeks[-2] else: prev = p.lexer.prev cur = p.lexer.cur if prev == None: prev = tok if cur == None: cur = tok if type(prev) == list: prev = prev[0] if type(cur) == list: cur = cur[0] if type(tok) == list: tok = tok[0] ret = tok == None or cur == None or prev.lineno < tok.lineno ret = ret or tok.type == "RBRACKET" or prev.type == "RBRACKET" ret = ret or cur.type == "RBRACKET" p2 = restricted() if p2 != None and not (prev.type in ["RSBRACKET", "RPAREN"] and restrict_prev() == None): ret = False p = p2 glob.g_line = p.lineno glob.g_lexpos = p.lexpos if ret and not glob.g_tried_semi: t = LexToken() t.type = "SEMI" t.value = ";" t.lineno = cur.lineno t.lexpos = cur.lexpos p.lexer.push(p.lexer.cur) p.lexer.push(t) yacc.errok() glob.g_error = False glob.g_tried_semi = True else: ret = False glob.g_error = True glob.g_error_pre = p return ret
def _new_token(type, value, pos): o = LexToken() o.type = type o.value = value o.lineno, o.lexpos = pos return o
def t_ANY_BRACES_OPEN(self, t: LexToken): t.type = t.value if self.brace_level == 0: t.lexer.begin('inbraces') self.brace_level += 1 return t
def t_ANY_BRACES_CLOSE(self, t: LexToken): t.type = t.value self.brace_level -= 1 if self.brace_level == 0: t.lexer.begin('INITIAL') return t
def t_ANY_NAME(self, t: LexToken) -> LexToken: t.type = 'NAME' if not keyword.isKeyword(t.value) else t.value return t
def lexToken(self, typ, val, line, lexpos=0): # Method helper to construct a LexToken lt = LexToken() lt.type, lt.value, lt.lineno, lt.lexpos = typ, val, line, lexpos return lt
# newtok.type = 'NEWLINE' # # fixedtokens += [newtok] # else: # fixedtokens += [tokens[i]] for i in range(len(tokens)-1): print ">>>", tokens[i].type, tokens[i].value fixedtokens += [tokens[i]] if tokens[i].type == 'NEWLINE': newtok = LexToken() newtok.value = None newtok.lineno = tokens[i].lineno newtok.lexpos = tokens[i].lexpos if tokens[i].value > dentstack[-1]: newtok.type = 'INDENT' dentstack += [tokens[i].value] elif tokens[i].value < dentstack[-1]: newtok.type = 'DEDENT' dentstack = dentstack[:-1] if 'type' in newtok.__dict__: fixedtokens += [newtok] fixedtokens += [tokens[-1]] dedent = LexToken() dedent.value = None dedent.lineno = 0 dedent.lexpos = 0 dedent.type = 'DEDENT'
def indent_generator(toks): """Post process the given stream of tokens to generate INDENT/DEDENT tokens. Note ---- Each generated token's value is the total amount of spaces from the beginning of the line. The way indentation tokens are generated is similar to how it works in python.""" stack = [0] # Dummy token to track the token just before the current one former = LexToken() former.type = "NEWLINE" former.value = "dummy" former.lineno = 0 former.lexpos = -1 def generate_dedent(stck, tok): amount = stck.pop(0) return new_dedent(amount, tok) for token in toks: if former.type == "NEWLINE": if token.type == "WS": indent = len(token.value) else: indent = 0 if indent == stack[0]: former = token if indent > 0: token = six.advance_iterator(toks) former = token yield token else: yield former elif indent > stack[0]: stack.insert(0, indent) ind = new_indent(indent, token) former = ind yield ind elif indent < stack[0]: if not indent in stack: raise ValueError("Wrong indent at line %d" % token.lineno) while stack[0] > indent: former = generate_dedent(stack, token) yield former if stack[0] > 0: former = six.advance_iterator(toks) yield former else: former = token yield token else: former = token yield token # Generate additional DEDENT so that the number of INDENT/DEDENT always # match while len(stack) > 1: former = generate_dedent(stack, token) yield former
def handle_semi_error(p): if glob.g_production_debug: print("in handle_semi_error") tok = p.lexer.peek() if len(p.lexer.peeks) > 1: prev = p.lexer.peeks[-2] else: prev = p.lexer.prev cur = p.lexer.cur if prev == None: prev = tok if cur == None: cur = tok #print("p", prev) #print("c", cur) #print("t", tok) if type(prev) == list: prev = prev[0] if type(cur) == list: cur = cur[0] if type(tok) == list: tok = tok[0] if p != None and type(p) != LexToken: print(list(p)) ret = tok == None or cur == None or prev.lineno < tok.lineno ret = ret or tok.type == "RBRACKET" or prev.type == "RBRACKET" ret = ret or cur.type == "RBRACKET" p2 = restricted() if p2 != None and not (prev.type in ["RSBRACKET", "RPAREN"] and restrict_prev() == None): ret = False p = p2 print(prev.type, cur.type, p2, restrict_prev()) print("didn't handle semi error") glob.g_line = p.lineno glob.g_lexpos = p.lexpos #print_err(p) if ret and not glob.g_tried_semi: #""" t = LexToken() t.type = "SEMI" t.value = ";" t.lineno = cur.lineno t.lexpos = cur.lexpos #""" p.lexer.push(p.lexer.cur) p.lexer.push(t) yacc.errok() glob.g_error = False glob.g_tried_semi = True else: ret = False glob.g_error = True glob.g_error_pre = p #for l in prodname_log[-5:-1]: # print(l) #print("a real error occurred 2!?") #print_err(p) return ret
def p_error(p): """ print(p.lexer.prev.lineno, p.lineno) if p.lexer.prev.lineno < p.lineno or p.type == "RBRACKET": yacc.errok() return """ if glob.g_production_debug: if p == None: print("in p_error") else: print("in p_error", p.type, p.value) if p == None: if not restricted() and glob.g_tried_semi == False: t = LexToken() t.type = "SEMI" t.value = ";" t.lexpos = -1 t.lineno = -1 glob.g_lexer.push(t) glob.g_tried_semi = True yacc.errok() else: sys.stderr.write(glob.g_file + ": error: unexpected end of file\n") return else: glob.g_error_pre = p if handle_semi_error(p): t = LexToken() t.type = "SEMI" t.value = ";" t.lexpos = p.lexpos t.lineno = p.lineno #glob.g_lexer.push(t) #glob.g_tried_semi = True yacc.errok() glob.g_error = False if glob.g_production_debug or glob.g_semi_debug: linestr, colstr = err_find_line(p.lexer, p.lexpos); lineno = p.lineno if type(p.lineno) == int else p.lineno(0) sys.stdout.write("handled semicolon error : %d\n" % lineno) sys.stdout.write(linestr+"\n") sys.stdout.write(colstr+"\n") return else: glob.g_error = True print_err(p) return if glob.g_error: print_err(glob.g_error_pre) glob.g_error_pre = p glob.g_error = True try: line = int(p.lineno) except: line = p.lineno(1) try: lexdata = p.lexer.lexer.lexdata sline = p.lexer.lexer.lexpos except: lexdata = p.lexer.lexdata sline = p.lexer.lexpos sline = lexdata[sline-40:sline+1]
def parse(self): i = 0 data = self.lexdata states = self.bracketstates prev = self.get_prev next = self.get_next toks = [] tok = LexToken() tok.type = None tok.value = "" stack = [] def escape(i1): if i1 == None: i1 = i return prev(i1) == "\\" and prev(i1, 2) != "\\" def inc_i(i, off=1): for j in range(abs(off)): if i < 0 or i >= len(data): break if data[i] in ["\n", "\r"]: self.lineno += 1 self.lexpos += 1 if off < 0: i -= 1 else: i += 1 return i def push(tok): if tok.type == None: traceback.print_stack() print("ERROR: None token!") return tok.lineno = self.lineno tok.lexpos = self.lexpos tok.lexer = self toks.append(tok) self.tokens.append(tok) # print(tok) def newtok(tok, ttype=None): if tok.type != ttype and (ttype != None or tok.value != ""): if tok.type != None: push(tok) tok = LexToken() tok.type = ttype tok.value = "" return tok in_set = 0 while i < len(data): cp = prev(i) cc = data[i] cn = next(i) handled = False if not escape(i): if cc == "$": tok = newtok(tok) if cn == "{": tok.type = "LBK" tok.value = "${" i = inc_i(i) in_set += 1 for k in states.keys(): states[k].append(0) else: tok.type = "SPECIAL" tok.value = "$" handled = True elif cc == "}" and cn == "$": tok = newtok(tok) tok.type = "RBK" tok.value = "$}" i = inc_i(i) in_set -= 1 for k in states.keys(): states[k].pop(-1) handled = True elif cp == "*" and cn == "$": tok = newtok(tok) tok.type = "STAR" tok.value = "*" i = inc_i(i) handled = True elif cp == "^" and cn == "$": tok = newtok(tok) tok.type = "NOT" tok.value = "^" i = inc_i(i) handled = True elif cp == "|" and cn == "$": tok = newtok(tok) tok.type = "OR" tok.value = "|" i = inc_i(i) handled = True elif cc == "," and in_set: k = 0 for t in self.bracketstates.keys(): s = self.bracketstates[t] if s[-1] < 0: # print(t, prev(i, 2), cp, cc, cn, "end") pass k += s[-1] # print(k) if k == 0: tok = newtok(tok) tok.type = "COMMA" tok.value = "," handled = True if not handled and in_set > 0: if cc in self.bracketstates: states[cc][-1] += 1 elif cc in self.bracket_endchars: states[self.bracket_endchars[cc]][-1] -= 1 def is_word_char(cc): return re_word_pat.match(cc) != None if not handled: cp = prev(i) if cp == "$" and tok.type not in ["WORD", "CODE"] and is_word_char(cc): tok = newtok(tok) tok.type = "WORD" tok.value = cc while i < len(data) and re_word_pat.match(tok.value).span() == (0, len(tok.value)): i = inc_i(i) cc = data[i] tok.value += cc i = inc_i(i, -1) tok.value = tok.value[:-1] tok = newtok(tok) else: tok = newtok(tok, "CODE") tok.value += cc i = inc_i(i) if tok.type != None: push(tok)