def __findAllWords(self, string): pattern = r'\b[A-Za-z_]\w*\b' result = re.findall(pattern, string) for word in result: if self.__isKeyWord(word): self.__currStringTokens.append(Token("keyword", word)) else: self.__currStringTokens.append(Token("identifier", word)) matcher = re.search(word, string) string = string[0:matcher.start()] + string[matcher.end():] return string
def __findAllErrors(self, string): pattern = r'#' result = re.findall(pattern, string) for newLine in result: self.__currStringTokens.append(Token("new line", newLine)) matcher = re.search(newLine, string) string = string[0:matcher.start()] + string[matcher.end():] string = re.sub('\s\s+', ' ', string) string = re.sub('\s*$', '', string) string = re.sub('^\s*', '', string) if string != '': errors = string.split(' ') for error in errors: self.__currStringTokens.append(Token("error", error))
def __findNumbers(self, string): pattern = r'\b\d+\.?\d*\b' result = re.findall(pattern, string) for number in result: matcher = re.search(number, string) string = string[0:matcher.start()] + string[matcher.end():] self.__currStringTokens.append(Token("numeric constant", number)) return string
def __findBigComments(self, code): pattern = r'/\*.*?\*/' result = re.findall(r'/\*.*?\*/', code) for comment in result: self.__tokens.append(Token("comments", comment)) comment = self.__replace(comment) matcher = re.search(comment, code) code = code[0:matcher.start()] + code[matcher.end():] return code
def __findPunktMarks(self, string): pattern = r'[,.<>(){}\[\];]' result = re.findall(pattern, string) for mark in result: self.__currStringTokens.append(Token("punctuation mark", mark)) mark = self.__replace(mark) matcher = re.search(mark, string) string = string[0:matcher.start()] + string[matcher.end():] return string
def __findSymbols(self, string): pattern = r'\'.*?\'' result = re.findall(pattern, string) for symbol in result: symbolCopy = self.__replace(symbol) matcher = re.search(symbolCopy, string) if matcher is None: continue string = string[0:matcher.start()] + string[matcher.end():] self.__currStringTokens.append(Token("symbolic constant", symbol)) return string
def __findLiterals(self, string): pattern = r'\".*?\"' result = re.findall(pattern, string) for literal in result: self.__currStringTokens.append(Token("literal", literal)) literal = self.__replace(literal) matcher = re.search(literal, string) if matcher is None: continue string = string[0:matcher.start()] + string[matcher.end():] return string
def __findOperators(self, string): pattern = r'(?:^|(?<=[^+&^|*/%<>=!?:-]))[+&^|*/%<>=!?:-][+&^|*/%<>=!?:-][+&^|*/%<>=!?:-](?=[^+&^|*/%<>=!?:-]|$)|(?:^|(?<=[^+&^|*/%<>=!?:-]))[+&^|*/%<>=!?:-][+&^|*/%<>=!?:-](?=[^+&^|*/%<>=!?:-]|$)|(?:^|(?<=[^+&^|*/%<>=!?:-]))[+&^|*/%<>=!?:-](?=[^+&^|*/%<>=!?:-]|$)' result = re.findall(pattern, string) lastEntry = 0 for operator in result: if self.__isOperator(operator): self.__currStringTokens.append(Token("operator", operator)) operator = self.__replace(operator) matcher = re.search(operator, string[lastEntry:]) string = string[0:matcher.start()] + string[matcher.end():] else: operator = self.__replace(operator) matcher = re.search(operator, string[lastEntry:]) lastEntry = matcher.end() return string
def __tokenizeComment(self, comment): self.__currStringTokens.append( Token("comments", comment[0:len(comment) - 1]))