def scan(self): "return next (token name, token literal, line position)." while self.char is not None: # first skip whitespace keeping track of newlines by # advancing and going back to start of the loop. if self.char.isspace(): if self.char in ('\n', '\r'): self.line += 1 self._advance() continue # next if we encounter a single line comment token # then skip till a new line is seen and go back to start. if self.char == token.COMMENT: self._skip_single_comment() continue # We now expect to see some form of token. check char and # branch to the corresponding function. All scan functions # are responsible for incrementing the current char position. # if we dont find anything return a error (illegal token). name, literal = ( self._scan_operator() if self.char in token.operators else self._scan_number() if self.char.isdigit() else self._scan_string() if self.char == '"' else self._scan_identifier() if self.char.isalpha() else self._illegal_token()) return token.Token(name, literal, self.line) # current token is None so we have reached the end of input. return token.Token(token.EOF, token.EOF, self.line)
def read_op(self): ''' read a known single digit operator (in tk.Token.operators), such as +, - etc ''' if self.src[self.pos] == const.ASSIGN and \ self.src[self.pos+1] == const.ASSIGN: # checks for == self.pos += 2 self.curr = tk.Token(const.EQUALS, const.EQUALS) elif self.src[self.pos] == const.OR[0]: # || if self.src[self.pos+1] == const.OR[1]: self.pos += 2 self.curr = tk.Token(const.OR, const.OR) else: raise ValueError( f'unkown token {self.src[self.pos:self.pos+1]}') elif self.src[self.pos] == const.AND[0]: # && if self.src[self.pos+1] == const.AND[1]: self.pos += 2 self.curr = tk.Token(const.AND, const.AND) else: raise ValueError( f'unkown token {self.src[self.pos:self.pos+1]}') else: ret = tk.Token(self.src[self.pos], self.src[self.pos]) self.pos += 1 self.curr = ret
def lex(self): scan = self.scan self.list = [] while scan.next(): pos = scan.get_pos() if scan.get() == '(': self.list.append(token.Token('bparen', scan.emit(), pos)) self.paren_depth += 1 self.lex_list() elif scan.get() == ')': self.list.append(token.Token('eparen', scan.emit(), pos)) self.paren_depth -= 1 if self.paren_depth > 0: self.lex_list() elif self.paren_depth < 0: self.paren_depth += 1 raise error.Error('too many end parens').error() else: return self.list elif scan.get() == '#': while scan.get() != '\n': scan.next() scan.emit() else: dump = scan.emit() if len(dump.strip()) > 0: error.Error( 'unknown \'' + (dump if len(dump) <= 10 else dump[0:10 - 3] + '...') + '\'', pos=scan.get_pos()).error() return self.list
def get_next_token(self): text = self.text tk = None if self.pos > len(text) - 1: return token.Token(token.TOKEN_EOF, None) current_char = text[self.pos] while current_char == ' ': self.pos += 1 if self.pos >= len(text): break current_char = text[self.pos] if self.pos >= len(text): return token.Token(token.TOKEN_EOF, None) if current_char.isdigit(): num = int(current_char) while self.pos < len(text) - 1 and text[self.pos + 1].isdigit(): self.pos += 1 num = num * 10 + int(text[self.pos]) tk = token.Token(token.TOKEN_INTEGER, int(num)) elif current_char == '+': tk = token.Token(token.TOKEN_PLUS, '+') elif current_char == '-': tk = token.Token(token.TOKEN_MINUS, '-') if tk == None: self.error() self.pos += 1 return tk
def lex(string, col=1, line=1): index = 0 while True: if index < len(string): found_space = False while index < len(string) and (string[index].isspace() or string[index] == "#"): if string[index].isspace(): index += 1 col += 1 if string[index - 1] == "\n": col = 1 line += 1 found_space = True else: while index < len(string) and string[index] != "\n": index += 1 col = 1 if found_space: continue found = False for regex, handler in lexical_dictionary: pattern = re.compile(regex) match = pattern.match(string, index) if match: found = True t, literal, whole = handler(match) yield token.Token(t, literal, (line, col), (line, col + len(whole) - 1)) index += len(whole) col += len(whole) break if not found: yield token.Token(token.ILLEGAL, string[index], (line, col), (line, col)) index += 1 col += 1 else: index += 1 col += 1 yield token.Token(token.EOF, "", (line, col), (line, col))
def match_name(self): pos = self._scanner parts = [] while not self._scanner.eof() and self.is_ident(self.peek()): if len(parts) > 0 and parts[-1] == '_' and self.peek() == '_': self.fail('consecutive-underscores') parts.append(self.peek()) self.next() name = ''.join(parts) if common.is_number(name): return token.Token(token.NUM, int(name), position=pos) else: type = KEYWORDS.get(name, token.ID) return token.Token(type, name, position=pos)
def addData(datum): global __port__ tag = token.ExternalTag(__port__) tok = token.Token(datum, tag) __cores__[0].add(tok) __port__ += 1 log.info("Adding data to runtime: %s", tok)
def spawn_tokens(n,level): #n is the number of enemies to spawn possible_token_locations={1:[(100,500),(200,500),(300,500),(400,500)], 2:[(200,200),(600,100),(400,200),(500,300)], 3:[(300,300),(100,200),(400,100),(200,400)]} if n<=len(possible_token_locations[level]): tokens=[] for i in range(n): location=random.choice(possible_token_locations[level]) possible_token_locations[level].remove(location) x=location[0] y=location[1] new_token=token.Token(level=level) new_token.x=x new_token.y=y new_token.update_bounding_box() tokens.append(new_token) return tokens else: print 'not enough locations provided to spawn %i mobs'%(n)
def read_tokens_from_file(purse_path, max_tokens=200, max_size=100 * 1024): logger = logging.getLogger(__name__) try: if os.path.getsize(purse_path) > max_size: raise NoTokensAvailableError("Purse at %s is to big" % (purse_path)) except OSError as e: logger.warning("OSError while reading a purse", exc_info=True) return [] tokens = [] with io.open(purse_path, 'rb') as purse: for line in purse: try: t = token.Token(line) if t not in tokens: tokens.append(t) else: logger.info("Found duplicated token: %s" % t.token_string) except: pass if len(tokens) >= max_tokens: break return tokens
def emit_token(self, tid, cfg): if cfg.debug_lexing: print("[-vlexing] Emitting token " + repr(self.buf.rstrip()) + " (" + str(tid) + ")...") self.toks.append(token.Token(self.path, self.first_line, tid, self.buf)) self.skip_token()
def token(self): """Retorna el siguiente token leido en el buffer de entrada, en caso de error lexico el analizador aborta la lectura y manda un Exception indicando la fila y la columna donde esta el error """ if self.col >= len(self.buf): return None match = self.re_ws_skip.search(self.buf, self.col) if match: self.col = match.start() else: return None for regex, type in self.rules: match = regex.match(self.buf, self.col) if match: tok = token.Token(type, match.group(), self.row, self.col + 1) self.col = match.end() if type == "tk_comment": return None return tok # if we're here, no rule matched raise Exception("LexerError at row %s, col %s" % (self.row, self.col + 1))
def getRequestToken(self): req = request.Request(self.api.getRequestTokenVerb(), self.api.getRequestTokenEndpoint()) tok = token.Token("", "") req.addOAuthParam(oauth.CALLBACK, oauth.OUT_OF_BAND) self.addOAuthParams(req, tok) self.appendSignatur(req)
def __init__(self, inputstr): #self.l = lexer.Lexer() #self.l.input = inputstr #self.l.readChar() # lexer.New()の中で1回readCharを呼んでいる self.l = lexer.New(inputstr) #self.curToken = None # token.Token("", "") #self.peekToken = None # token.Token("", "") self.curToken = token.Token("", "") self.peekToken = token.Token("", "") #self.nextToken() #self.nextToken() self.errors = [] print("Parser.__init__():", self.l.input) print("Parser.__init__():", self.l) self.prefixParseFns = {} self.registerPrefix(token.IDENT, self.parseIdentifier) self.registerPrefix(token.INT, self.parseIntegerLiteral) self.registerPrefix(token.BANG, self.parsePrefixExpression) self.registerPrefix(token.MINUS, self.parsePrefixExpression) self.registerPrefix(token.TRUE, self.parseBoolean) self.registerPrefix(token.FALSE, self.parseBoolean) self.registerPrefix(token.LPAREN, self.parseGroupedExpression) self.registerPrefix(token.IF, self.parseIfExpression) self.registerPrefix(token.FUNCTION, self.parseFunctionLiteral) self.registerPrefix(token.STRING, self.parseStringLiteral) self.registerPrefix(token.LBRACKET, self.parseArrayLiteral) self.registerPrefix(token.LBRACE, self.parseHashLiteral) self.infixParseFns = {} self.registerInfix(token.PLUS, self.parseInfixExpression) self.registerInfix(token.MINUS, self.parseInfixExpression) self.registerInfix(token.SLASH, self.parseInfixExpression) self.registerInfix(token.ASTERISK, self.parseInfixExpression) self.registerInfix(token.EQ, self.parseInfixExpression) self.registerInfix(token.NOT_EQ, self.parseInfixExpression) self.registerInfix(token.LT, self.parseInfixExpression) self.registerInfix(token.GT, self.parseInfixExpression) self.registerInfix(token.LPAREN, self.parseCallExpression) self.registerInfix(token.LBRACKET, self.parseIndexExpression)
def next_element(self): if self.is_letter(): ident = self.read_identifier() keyword = token.look_up_indent(ident) return token.Token(keyword, ident) elif self.is_number(): ident = self.read_number() keyword = constants.INT return token.Token(keyword, ident) elif self.ch == '!': if self.peek_char() == '=': char = self.read_char() tok = str(char) + str(self.ch) self.read_char() return token.Token(constants.NOT_EQ, tok) else: self.read_char() return token.Token(constants.BANG, self.ch) elif self.ch == '=': if self.peek_char() == '=': char = self.read_char() tok = str(char) + str(self.ch) self.read_char() return token.Token(constants.EQ, str(char) + str(self.ch)) else: self.read_char() return token.Token(constants.ASSIGN, self.ch) else: return token.Token(constants.ILLEGAL, self.ch)
def tester(fn): t = token.Token() line = 1 with open(fn) as f: while True: t, line = scanner.driver(f, line) print "%s '%s' on line %d" % (t.identity, t.instance, t.location) if t.identity == token.token_ids.token_names[36]: break if t.identity == token.token_ids.token_names[35]: break
def run(self, tokens): outputs = [] curr_lexeme = lexeme.Lexeme("") for c in tokens: output = self.step(c) if (output != None): outputs.append(token.Token(curr_lexeme, output)) curr_lexeme = lexeme.Lexeme("") if (curr_lexeme.val != "" or re.match("\s", c) == None): curr_lexeme.val += c; return outputs
def get_tokens(state, literal, line): state_token = token.Token() if literal in keywords: state_token.identity = keywords.get(literal) state_token.instance = literal state_token.location = line elif final_states.has_key(state): state_token.identity = final_states.get(state) state_token.instance = literal state_token.location = line return state_token
def lex_number(self): scan = self.scan pos = scan.get_pos() while scan.next(): if not scan.get().isdigit(): scan.backup() break if scan.len() == 0: return False self.list.append(token.Token('number', scan.emit(), pos)) return True
def tokenize(self, to_parse): remove = "[]()+=~`@#%^*()_-+=\{[]\}|\\'\'<>," for c in remove: to_parse = to_parse.replace(c, "") # for everything in reomve: remove it tokens = to_parse.split(" ") punctuation = '!$&:;?.' result = [] # for everything in punctuation, if it's there, remove it, append the punctuation as a token for t in tokens: result += self.punc_split(t, punctuation) return [token.Token(t) for t in result]
def getAccessToken(self): headers = {"Accept": "application/json"} body = "grant_type=authorization_code&client_id=" + self.consumer_key + "&redirect_uri=" + self.redirect_uri + "&code=" + self.code url = self.huddleAuthServer + "token/" req = urllib2.Request(url, body, headers) response = urllib2.urlopen(req) jsonParse = json.load(response) self.tokenStore = token.Token(jsonParse['access_token'], jsonParse['refresh_token'], jsonParse['expires_in']) return self.tokenStore
def getRefreshToken(self): headers = {"Accept": "application/json"} body = "grant_type=refresh_token&client_id=" + self.consumer_key + "&refresh_token=" + self.tokenStore.getRefreshToken( ) url = self.huddleAuthServer + "refresh/" req = urllib2.Request(url, body, headers) response = urllib2.urlopen(req) jsonParse = json.load(response) self.tokenStore = token.Token(jsonParse['access_token'], jsonParse['refresh_token'], jsonParse['expires_in']) return self.tokenStore
def lex_str(self): scan = self.scan pos = scan.get_pos() scan.next() if scan.get() == '"': while scan.next(): if scan.get() == '"': break else: scan.backup() return False self.list.append(token.Token('str', scan.emit()[1:-1], pos)) return True
def __init__(self, label="", depth=None, child1=None, child2=None, child3=None, child4=None, token1=token.Token(), token2=token.Token(), token3=token.Token(), token4=token.Token(), toks=[]): self.label = label self.depth = depth self.child1 = child1 self.child2 = child2 self.child3 = child3 self.child4 = child4 self.token1 = token1 self.token2 = token2 self.token3 = token3 self.token4 = token4 self.toks = toks
def lex_id(self): scan = self.scan pos = scan.get_pos() if not scan.next(): return False if not scan.get().isalpha() and not self.is_symbol(scan.get()): scan.backup() return False while scan.next(): if not scan.get().isalnum() and not self.is_symbol(scan.get()): scan.backup() break self.list.append(token.Token('id', scan.emit(), pos)) return True
def read_word(self): ''' read a word character by character, returning a node for it with it's type being either variable or reserved word a variable can only start with a character, but can have numbers and underscore in it ''' word = self.src[self.pos] self.pos += 1 while self.src[self.pos].isalpha() or self.src[self.pos].isdigit() \ or self.src[self.pos] == const.UNDERSCORE: word += self.src[self.pos] self.pos += 1 self.curr = tk.Token(const.RESERVED_WORD if word in const.RESERVED_WORDS else const.VARIABLE, word)
def parse_list(self, tree): t = self._list_pop() if t == None: return type_str = token.TOKEN_TYPES[t.type.type] if type_str == 'bparen': self.paren_depth += 1 return self.parse_in_list(tree.add(ast.Ast(token.Token('list', pos=t.pos)))) elif type_str == 'eparen': self.paren_depth -= 1 if self.paren_depth < 0: raise Exception('unmatched paren') elif self.paren_depth > 0: return self.parse_in_list(tree.get_parent()) else: return self.parse_list(tree.get_parent()) else: raise Exception('unrecognized token')
def read_int(self): '''' set curr's value as the next token's value (in this case, an integer) ''' if not self.src[self.pos].isdigit(): # this is check is only needed when pos=0 raise ValueError('Unexpected token {}, expected an integer' .format(self.src[self.pos])) value = 0 while self.src[self.pos].isdigit(): value = value * 10 + int(self.src[self.pos]) self.pos += 1 if self.pos >= len(self.src): break self.curr = tk.Token(const.INT, value)
def create_tokens(self, tokenstream): nxt_token = first_token = token.Token(data="", token_type="MAGICCODE", next_token=None) for tok in tokenstream: if token.getWordToToken(tok) != False: t = token.tokenCreator(tok, token.getWordToToken(tok), nxt_token) nxt_token = t else: #print("Falsche Bezeichner!") pass return first_token
def NextToken(self): tok = token.Token("", '') self.skipWhitespace() #print("in NextToken: ", self.ch) if self.ch == '-': tok = token.Token(token.MINUS, self.ch) elif self.ch == '!': if self.peekChar() == '=': ch = self.ch self.readChar() literal = str(ch) + str(self.ch) tok = token.Token(token.NOT_EQ, literal) else: tok = newToken(token.BANG, self.ch) elif self.ch == '/': tok = newToken(token.SLASH, self.ch) elif self.ch == '*': tok = newToken(token.ASTERISK, self.ch) elif self.ch == '<': tok = newToken(token.LT, self.ch) elif self.ch == '>': tok = newToken(token.GT, self.ch) elif self.ch == '=': if self.peekChar() == '=': ch = self.ch self.readChar() literal = str(ch) + str(self.ch) tok = token.Token(token.EQ, literal) else: tok = newToken(token.ASSIGN, self.ch) elif self.ch == ';': tok = newToken(token.SEMICOLON, self.ch) elif self.ch == '(': tok = newToken(token.LPAREN, self.ch) elif self.ch == ')': tok = newToken(token.RPAREN, self.ch) elif self.ch == ',': tok = newToken(token.COMMA, self.ch) elif self.ch == '+': tok = newToken(token.PLUS, self.ch) elif self.ch == '{': tok = newToken(token.LBRACE, self.ch) elif self.ch == '}': tok = newToken(token.RBRACE, self.ch) #elif self.ch == 0: elif self.ch == '': """ tok.Literal == "" tok.Type = token.EOF """ tok = newToken(token.EOF, self.ch) elif self.ch == '"': """ tok.Type = token.STRING tok.Literal = self.readString() """ tok = newToken(token.STRING, self.readString()) elif self.ch == '[': tok = newToken(token.LBRACKET, self.ch) elif self.ch == ']': tok = newToken(token.RBRACKET, self.ch) elif self.ch == ':': tok = newToken(token.COLON, self.ch) else: if isLetter(self.ch): Literal = self.readIdentifier() Type = token.LookupIdent(Literal) tok = token.Token(Type, Literal) #print("pass isLetter()") #tok.printout() return tok elif isDigit(self.ch): #print("isDisit()") #tok.Type = token.INT #tok.Literal = self.readNumber() tok = token.Token(token.INT, self.readNumber()) return tok else: tok = token.Token(token.ILLEGAL, self.ch) self.readChar() return tok
def newToken(tokenType, ch): #return token.Token(tokenType, ch) return token.Token(tokenType, str(ch))