def p_error(p): """ print(p.lexer.prev.lineno, p.lineno) if p.lexer.prev.lineno < p.lineno or p.type == "RBRACKET": yacc.errok() return """ if p == None: if not restricted() and glob.g_tried_semi == False: t = LexToken() t.type = "SEMI" t.value = ";" t.lexpos = -1 t.lineno = -1 glob.g_lexer.push(t) glob.g_tried_semi = True yacc.errok() else: sys.stderr.write(glob.g_file + ": error: unexpected end of file\n") return else: glob.g_error_pre = p if handle_semi_error(p): t = LexToken() t.type = "SEMI" t.value = ";" t.lexpos = p.lexpos t.lineno = p.lineno #glob.g_lexer.push(t) #glob.g_tried_semi = True yacc.errok() glob.g_error = False return else: glob.g_error = True print_err(p) return if glob.g_error: print_err(glob.g_error_pre) glob.g_error_pre = p glob.g_error = True try: line = int(p.lineno) except: line = p.lineno(1) try: lexdata = p.lexer.lexer.lexdata sline = p.lexer.lexer.lexpos except: lexdata = p.lexer.lexdata sline = p.lexer.lexpos sline = lexdata[sline-40:sline+1]
def token(self): t = LexToken() c = self.cur if c >= len(self.str): return None c = self.str[c] if c == "\\": t.type = "BACKSLASH" elif c == "/": t.type = "DIVIDE" elif c == "[": t.type = "LSBRACKET" elif c == "]": t.type = "RSBRACKET" elif c == "*": t.type = "STAR" elif c == "\n" or c == "\r": t.type = "LT" elif re.match(r"[a-zA-Z0-9_$]+", c) != None: t.type = "ID_PART" else: t.type = "UCHAR" t.value = c t.lineno = 0 t.lexpos = self.cur self.cur += 1 print(t) return t
def token(self): t = LexToken() c = self.cur if c >= len(self.str): return None c = self.str[c] if c == "\\": t.type = "BACKSLASH" elif c == "/": t.type = "DIVIDE" elif c == "[": t.type = "LSBRACKET" elif c == "]": t.type = "RSBRACKET" elif c == "*": t.type = "STAR" elif c == "\n" or c == "\r": t.type = "LT" elif re.match(r"[a-zA-Z0-9_$]+", c) != None: t.type = "ID_PART" else: t.type = "UCHAR" t.value = c t.lineno = 0 t.lexpos = self.cur self.cur += 1 print(t) return t
def _new_token(type, token): tok = LexToken() tok.type = type tok.value = token.value tok.lineno = token.lineno tok.lexpos = token.lexpos return tok
def _create_token(self, type): token = LexToken() token.type = type token.value = '' token.lineno = 0 token.lexpos = 0 return token
def _lextoken( self, type_, value ) : tok = LexToken() tok.type = type_ tok.value = value tok.lineno = self.lexer.lineno tok.lexpos = self.lexer.lexpos return tok
def _to_yacc(self, token_type, token_data): token = LexToken() token.type = token_type token.value = (token_type, token_data) token.lineno = 0 # TODO: file offset token.lexpos = 0 self.__to_yacc(token)
def new_dedent(amount, token): tok = LexToken() tok.type = "DEDENT" tok.value = amount tok.lineno = token.lineno tok.lexpos = token.lexpos return tok
def token(self, value, ty=None): t = LexToken() t.type = ty if ty != None else value t.value = value t.lineno = -1 t.lexpos = -1 return t
def emit_autoend(self): tok = LexToken() tok.type = "AUTO_END" tok.value = "" tok.lineno = self.lineno tok.lexpos = self.lexpos return tok
def clone_token(old_token, new_type): token = LexToken() token.type = new_type token.value = old_token.value token.lineno = old_token.lineno token.lexpos = old_token.lexpos return token
def clone_token(old_token, new_type): token = LexToken() token.type = new_type token.value = old_token.value token.lineno = old_token.lineno token.lexpos = old_token.lexpos return token
def new_dedent(amount, token): tok = LexToken() tok.type = "DEDENT" tok.value = amount tok.lineno = token.lineno tok.lexpos = token.lexpos return tok
def _lextoken_from_html(self, html_token): token = LexToken() token.type = { 0 : 'HTML_DOCTYPE', 1 : 'HTML_CHARS', 2 : 'HTML_WS', 3 : 'HTML_STARTTAG', 4 : 'HTML_ENDTAG', 5 : 'HTML_EMPTYTAG', 6 : 'HTML_COMMENT', 7 : 'HTML_PARSEERROR', }[html_token['type']] # TODO: fix lineno/lexpos token.lineno = self.lineno token.lexpos = self.lexpos token.value = { 'self_closing' : html_token.get('selfClosing', False), 'name' : html_token.get('name', None), } if isinstance(html_token['data'], (list, tuple)): token.value['attrs'] = html_token['data'] token.value['data'] = '' if token.value['name'].lower() in voidElements: token.type = 'HTML_VOID_TAG' else: token.value['data'] = html_token['data'] if token.type == tokenTypes['ParseError']: raise SyntaxError("Got HTML Parse Error for token {}".format(html_token)) return token
def _new_token(type, token): tok = LexToken() tok.type = type tok.value = token.value tok.lineno = token.lineno tok.lexpos = token.lexpos return tok
def make_tok(type_, value, lineno, lexpos): token = LexToken() token.type = type_ token.value = value token.lineno = lineno token.lexpos = lexpos return token
def _to_yacc(self, token_type, token_data): token = LexToken() token.type = token_type token.value = (token_type, token_data) token.lineno = 0 # TODO: file offset token.lexpos = 0 self.__to_yacc(token)
def token(self, value, ty=None): t = LexToken() t.type = ty if ty != None else value t.value = value t.lineno = -1 t.lexpos = -1 return t
def new_tok(lexpos, tok_type, lineno, value): # Create a token for return tok = LexToken() tok.value = value tok.lineno = lineno tok.lexpos = lexpos tok.type = tok_type return tok
def correct_tag_name(original): token = LexToken() token.type = original.type token.value = original.value[:-1] token.lineno = original.lineno token.lexpos = original.lexpos token.lexer = original.lexer return token
def make_ws(tok): lt = LexToken() lt.type = "CPP_WS" lt.value = "\n" lt.lexer = lexer lt.lineno = tok.lineno lt.lexpos = tok.lexpos return lt
def p_class0(self, p): """ class : CLASS TYPEID '{' feature_list '}' ';' """ tok = LexToken() tok.type = "OBJECTID" tok.value = "object" tok.lineno = -1 #this is just inserted not present tok.lexpos = -1 p[0] = class_(p[2], tok, p[4], self.filename)
def remove_first_and_last_char(original): token = LexToken() token.type = original.type token.value = original.value[1:-1] token.lineno = original.lineno token.lexpos = original.lexpos token.lexer = original.lexer return token
def _parse_chars(self, data): m = js_start_rx.match(data) if m is None: return None pretext = m.group(1) start_type = m.group(2) self.lexpos -= len(data) if len(pretext): pretext_tok = LexToken() pretext_tok.type = 'HTML_CHARS' pretext_tok.value = pretext pretext_tok.lineno = self.lineno - pretext.count("\n") pretext_tok.lexpos = self.lexpos self.next_tokens.append(pretext_tok) self.lexpos += len(pretext) start_tok = LexToken() start_tok.type = self.tbtype[start_type] start_tok.value = start_type start_tok.lineno = self.lineno start_tok.lexpos = self.lexpos self.next_tokens.append(start_tok) self.lexpos += len(start_type) js_lexer = JSLexer() js_lexer.input(data[m.end(2):]) for t in js_lexer: t.lineno += self.lineno - 1 t.lexpos = self.lexpos self.lexpos += js_lexer.lexer.lexpos if t.type in ('EXPRESSION_TERMINATOR', 'ESCAPED_TERMINATOR', 'JS_TERMINATOR'): if t.type != self.ttype[start_type]: raise SyntaxError("Expected {} but got {} in char data `{}`".format(self.ttype[start_type], t.type, data)) self.next_tokens.append(t) break self.next_tokens.append(t) remaining_text = data[m.end(2) + js_lexer.lexer.lexpos:] self.lexpos += len(remaining_text) return remaining_text
def p_expr4(self, p): """ expr : OBJECTID '(' expr_arg_list ')' """ tok = LexToken() tok.type = "OBJECTID" tok.value = "self" tok.lineno = -1 #this is just inserted not present tok.lexpos = -1 p[0] = dispatch(object_(tok), p[1], p[3]) p[0].lineno = p.lineno(1)
def gen_token(value, type, line, lexpos): t = LexToken() t.value = value t.type = type t.line = line t.lexpos = lexpos t.lexer = self return t
def p_expr5(self, p): """ expr : OBJECTID '(' ')' """ tok = LexToken() tok.type = "OBJECTID" tok.value = "self" tok.lineno = -1 #this is just inserted not present tok.lexpos = -1 p[0] = dispatch(object_(tok), p[1], nil_Expressions()) p.lineno = p.lineno(1)
def gen_token(value, type, line, lexpos): t = LexToken() t.value = value t.type = type t.line = line t.lexpos = lexpos t.lexer = self return t
def _new_token(self, type=None, value=None, lexpos=None, lineno=None) -> LexToken: """ Creates a new lexer token with the given properties. :return: a new lexer token with the given properties. """ token = LexToken() token.type = type token.value = value token.lexpos = lexpos token.lineno = lineno
def to_tokens(self, token_list): result = [] for values in token_list: token = LexToken() token.type = values[0] token.value = values[1] token.lineno = values[2] token.lexpos = values[3] token.lexer = self.lexer result.append(token) return result
def _new_token(self, new_type, new_value, lineno: int, lexpos: int): """ Creates a new token with the given data. :return: new token with the given data. """ token = LexToken() token.type = new_type token.value = new_value token.lineno = lineno token.lexpos = lexpos return token
def _new_token(self, new_type, new_value, lineno: int, lexpos: int): """ Creates a new token with the given data. :return: new token with the given data. """ token = LexToken() token.type = new_type token.value = new_value token.lineno = lineno token.lexpos = lexpos return token
def _gen_token(self, type, value='', lnum=None, position=0, lexpos=None): """ Generates a LexToken with the paramaters given. """ tok = LexToken() tok.lexer = self.lex tok.type = type tok.value = value tok.line_position = position # I think this will work... tok.lineno = self.lex.lineno if lnum is None else lnum tok.lexpos = self.lex.lexpos if lexpos is None else lexpos return tok
def _gen_token(self, type, value='', lnum=None, position=0, lexpos=None): """ Generates a LexToken with the paramaters given. """ tok = LexToken() tok.lexer = self.lex tok.type = type tok.value = value tok.line_position = position # I think this will work... tok.lineno = self.lex.lineno if lnum is None else lnum tok.lexpos = self.lex.lexpos if lexpos is None else lexpos return tok
def _new_token(self, type=None, value=None, lexpos=None, lineno=None) -> LexToken: """ Creates a new lexer token with the given properties. :return: a new lexer token with the given properties. """ token = LexToken() token.type = type token.value = value token.lexpos = lexpos token.lineno = lineno
def p_error(self, p): # TODO if p: self._errors.append(p) pass # self._parser.errok() else: # hack handle eof, don't know why ply behaves this way from ply.lex import LexToken tok = LexToken() tok.value = self.lexer.lexdata[self.lexer.lexpos:] tok.lineno = self.lexer.lineno tok.type = 'error' tok.lexpos = self.lexer.lexpos self._parser.errok() return tok
def p_error(self, p): # TODO if p: self._errors.append(p) pass # self._parser.errok() else: # hack handle eof, don't know why ply behaves this way from ply.lex import LexToken tok = LexToken() tok.value = self.lexer.lexdata[self.lexer.lexpos:] tok.lineno = self.lexer.lineno tok.type = 'error' tok.lexpos = self.lexer.lexpos self._parser.errok() return tok
def handle_semi_error(p): tok = p.lexer.peek() if len(p.lexer.peeks) > 1: prev = p.lexer.peeks[-2] else: prev = p.lexer.prev cur = p.lexer.cur if prev == None: prev = tok if cur == None: cur = tok if type(prev) == list: prev = prev[0] if type(cur) == list: cur = cur[0] if type(tok) == list: tok = tok[0] ret = tok == None or cur == None or prev.lineno < tok.lineno ret = ret or tok.type == "RBRACKET" or prev.type == "RBRACKET" ret = ret or cur.type == "RBRACKET" p2 = restricted() if p2 != None and not (prev.type in ["RSBRACKET", "RPAREN"] and restrict_prev() == None): ret = False p = p2 glob.g_line = p.lineno glob.g_lexpos = p.lexpos if ret and not glob.g_tried_semi: t = LexToken() t.type = "SEMI" t.value = ";" t.lineno = cur.lineno t.lexpos = cur.lexpos p.lexer.push(p.lexer.cur) p.lexer.push(t) yacc.errok() glob.g_error = False glob.g_tried_semi = True else: ret = False glob.g_error = True glob.g_error_pre = p return ret
def peek(self): p = self.lexer.token() if p is None: return None t = LexToken() t.type = p.type t.value = p.value t.lexpos = p.lexpos t.lineno = self.lexer.lineno t._comments = self.lexer.comments t._comment = self.lexer.comment t._comment_id = self.lexer.comment_id p.lineno = self.lexer.lineno p.lexer = self self.peeks.append([t, self.lexer.lexpos, self.lineno]) return p
def _replace_escaped_characters(self, original): def replace_escape(matchobj): if matchobj.group("newline"): return '\n' if matchobj.group("blank"): return ' ' if matchobj.group("keep"): return matchobj.group("keep") if matchobj.group("tab"): return '\t' return None token = LexToken() token.type = original.type token.value = self.tag_escape_replacement.sub(replace_escape, original.value) token.lineno = original.lineno token.lexpos = original.lexpos token.lexer = original.lexer return token
def t_ID(t): global last_id #p = t.lexer._lexwithprev.peek_i(0) #if p is not None and p.type == "COLON": # t.type = "ID_COLON" # t.lexer._lexwithprev.next() #else: # t.type = reserved.get(t.value,'ID') # Check for reserved words ld = t.lexer.lexdata li = t.lexpos + len(t.value) while li < len(ld) and ld[li] in [" ", "\n", "\r", "\t"]: li += 1 if li < len(ld) and ld[li] == ":" and t.value not in reserved: t.type = "ID_COLON" else: t.type = reserved.get(t.value, 'ID') # Check for reserved words if class_property_validate(ld, t.lexpos) and last_id != "CLASS_PROP_PRE": t2 = LexToken() t2.type = t.type t2.value = t.value t2.lineno = t.lineno t2.lexer = t.lexer t2.lexpos = t.lexpos t.lexer._lexwithprev.push(t2) t.type = "CLASS_PROP_PRE" t.value = "" last_id = t.type #sys.exit() return t last_id = t.type return t
def token(self): if self.tokens: return self.tokens.pop(0) token = None while not token: if self.lineno >= self.line_count: return None line = self.lines[self.lineno] line_size = len(line) line = line.strip() if line in literals: token = LexToken() token.type = line token.value = line token.lineno = self.lineno token.lexpos = self.lexpos elif line.startswith('0x'): pass elif line.startswith(':'): cap = line[1:] skip = cap not in self.used_tokens and cap > '' if skip: line = self.lines[self.lineno] skip_break = line[:len(line) - len(line.lstrip())] + '.' while skip: line = self.lines[self.lineno] skip = not line.startswith(skip_break) self.lexpos += len(line) + 1 self.lineno += 1 continue token = LexToken() token.type = 'CAP' token.value = cap token.lineno = self.lineno token.lexpos = self.lexpos else: prop, _, value = line.partition(':') skip = prop not in self.used_tokens and prop > '' and value > '' if value and not skip: token = LexToken() token.type = 'CONST' token.value = value token.lineno = self.lineno token.lexpos = self.lexpos + line_size - len(value) self.tokens.append(token) if prop and not skip: token = LexToken() token.type = 'PROP' token.value = prop token.lineno = self.lineno token.lexpos = self.lexpos self.lexpos += line_size + 1 self.lineno += 1 return token
def indent_generator(toks): """Post process the given stream of tokens to generate INDENT/DEDENT tokens. Note ---- Each generated token's value is the total amount of spaces from the beginning of the line. The way indentation tokens are generated is similar to how it works in python.""" stack = [0] # Dummy token to track the token just before the current one former = LexToken() former.type = "NEWLINE" former.value = "dummy" former.lineno = 0 former.lexpos = -1 def generate_dedent(stck, tok): amount = stck.pop(0) return new_dedent(amount, tok) for token in toks: if former.type == "NEWLINE": if token.type == "WS": indent = len(token.value) else: indent = 0 if indent == stack[0]: former = token if indent > 0: token = six.advance_iterator(toks) former = token yield token else: yield former elif indent > stack[0]: stack.insert(0, indent) ind = new_indent(indent, token) former = ind yield ind elif indent < stack[0]: if not indent in stack: raise ValueError("Wrong indent at line %d" % token.lineno) while stack[0] > indent: former = generate_dedent(stack, token) yield former if stack[0] > 0: former = six.advance_iterator(toks) yield former else: former = token yield token else: former = token yield token # Generate additional DEDENT so that the number of INDENT/DEDENT always # match while len(stack) > 1: former = generate_dedent(stack, token) yield former
def indent_generator(toks): """Post process the given stream of tokens to generate INDENT/DEDENT tokens. Note ---- Each generated token's value is the total amount of spaces from the beginning of the line. The way indentation tokens are generated is similar to how it works in python.""" stack = [0] # Dummy token to track the token just before the current one former = LexToken() former.type = "NEWLINE" former.value = "dummy" former.lineno = 0 former.lexpos = -1 def generate_dedent(stck, tok): amount = stck.pop(0) return new_dedent(amount, tok) for token in toks: if former.type == "NEWLINE": if token.type == "WS": indent = len(token.value) else: indent = 0 if indent == stack[0]: former = token if indent > 0: token = six.advance_iterator(toks) former = token yield token else: yield former elif indent > stack[0]: stack.insert(0, indent) ind = new_indent(indent, token) former = ind yield ind elif indent < stack[0]: if not indent in stack: raise ValueError("Wrong indent at line %d" % token.lineno) while stack[0] > indent: former = generate_dedent(stack, token) yield former if stack[0] > 0: former = six.advance_iterator(toks) yield former else: former = token yield token else: former = token yield token # Generate additional DEDENT so that the number of INDENT/DEDENT always # match while len(stack) > 1: former = generate_dedent(stack, token) yield former
# dentstack += [tokens[i].value] # else: # newtok.type = 'NEWLINE' # # fixedtokens += [newtok] # else: # fixedtokens += [tokens[i]] for i in range(len(tokens)-1): print ">>>", tokens[i].type, tokens[i].value fixedtokens += [tokens[i]] if tokens[i].type == 'NEWLINE': newtok = LexToken() newtok.value = None newtok.lineno = tokens[i].lineno newtok.lexpos = tokens[i].lexpos if tokens[i].value > dentstack[-1]: newtok.type = 'INDENT' dentstack += [tokens[i].value] elif tokens[i].value < dentstack[-1]: newtok.type = 'DEDENT' dentstack = dentstack[:-1] if 'type' in newtok.__dict__: fixedtokens += [newtok] fixedtokens += [tokens[-1]] dedent = LexToken() dedent.value = None dedent.lineno = 0 dedent.lexpos = 0
def handle_semi_error(p): if glob.g_production_debug: print("in handle_semi_error") tok = p.lexer.peek() if len(p.lexer.peeks) > 1: prev = p.lexer.peeks[-2] else: prev = p.lexer.prev cur = p.lexer.cur if prev == None: prev = tok if cur == None: cur = tok #print("p", prev) #print("c", cur) #print("t", tok) if type(prev) == list: prev = prev[0] if type(cur) == list: cur = cur[0] if type(tok) == list: tok = tok[0] if p != None and type(p) != LexToken: print(list(p)) ret = tok == None or cur == None or prev.lineno < tok.lineno ret = ret or tok.type == "RBRACKET" or prev.type == "RBRACKET" ret = ret or cur.type == "RBRACKET" p2 = restricted() if p2 != None and not (prev.type in ["RSBRACKET", "RPAREN"] and restrict_prev() == None): ret = False p = p2 print(prev.type, cur.type, p2, restrict_prev()) print("didn't handle semi error") glob.g_line = p.lineno glob.g_lexpos = p.lexpos #print_err(p) if ret and not glob.g_tried_semi: #""" t = LexToken() t.type = "SEMI" t.value = ";" t.lineno = cur.lineno t.lexpos = cur.lexpos #""" p.lexer.push(p.lexer.cur) p.lexer.push(t) yacc.errok() glob.g_error = False glob.g_tried_semi = True else: ret = False glob.g_error = True glob.g_error_pre = p #for l in prodname_log[-5:-1]: # print(l) #print("a real error occurred 2!?") #print_err(p) return ret
def p_error(p): """ print(p.lexer.prev.lineno, p.lineno) if p.lexer.prev.lineno < p.lineno or p.type == "RBRACKET": yacc.errok() return """ if glob.g_production_debug: if p == None: print("in p_error") else: print("in p_error", p.type, p.value) if p == None: if not restricted() and glob.g_tried_semi == False: t = LexToken() t.type = "SEMI" t.value = ";" t.lexpos = -1 t.lineno = -1 glob.g_lexer.push(t) glob.g_tried_semi = True yacc.errok() else: sys.stderr.write(glob.g_file + ": error: unexpected end of file\n") return else: glob.g_error_pre = p if handle_semi_error(p): t = LexToken() t.type = "SEMI" t.value = ";" t.lexpos = p.lexpos t.lineno = p.lineno #glob.g_lexer.push(t) #glob.g_tried_semi = True yacc.errok() glob.g_error = False if glob.g_production_debug or glob.g_semi_debug: linestr, colstr = err_find_line(p.lexer, p.lexpos); lineno = p.lineno if type(p.lineno) == int else p.lineno(0) sys.stdout.write("handled semicolon error : %d\n" % lineno) sys.stdout.write(linestr+"\n") sys.stdout.write(colstr+"\n") return else: glob.g_error = True print_err(p) return if glob.g_error: print_err(glob.g_error_pre) glob.g_error_pre = p glob.g_error = True try: line = int(p.lineno) except: line = p.lineno(1) try: lexdata = p.lexer.lexer.lexdata sline = p.lexer.lexer.lexpos except: lexdata = p.lexer.lexdata sline = p.lexer.lexpos sline = lexdata[sline-40:sline+1]