def tokenCreator(self, text, i, token=None): if not token or token.formed: token = Token() char, pos = ord(text[i]), i #Formar nuevas cadenas if (not token.inFormation and self.is_quote(char)): token.add(char) token.inFormation = True token.formed = False token.type = "string" elif (token.inFormation): if (self.is_quote(token.atFirst()) and not self.is_quote(char)): token.add(char) else: token.add(char) token.formed = True else: token = Token() pos += 1 return (pos, token)
def tokenCreator(self, char, token): repeatChar = None if not token or token.formed: token = Token() charCode = ord(char) # Comment if not token.inFormation and self.is_semicolon(charCode): token.formed = False token.inFormation = True token.type = "COMMENT" token.add(charCode) elif token.inFormation and self.is_semicolon(token.atFirst()): if self.is_newLine(charCode): token = Token() else: pass # Quote String elif not token.inFormation and self.is_quote(charCode): token.formed = False token.inFormation = True token.type = "STRING" token.add(charCode) elif token.inFormation and self.is_quote(token.atFirst()): if self.is_quote(charCode): token.add(charCode) token.inFormation = False token.formed = True else: token.add(charCode) # DoubleQuote String elif not token.inFormation and self.is_doubleQuote(charCode): token.formed = False token.inFormation = True token.type = "STRING" token.add(charCode) elif token.inFormation and self.is_doubleQuote(token.atFirst()): if self.is_doubleQuote(charCode): token.add(charCode) token.inFormation = False token.formed = True else: token.add(charCode) # Arithmetic Operator elif not token.inFormation and self.is_arithmeticOp(charCode): token.add(charCode) token.inFormation = False token.formed = True token.type = "ARITHMETIC_OP" # Identifier elif not token.inFormation and self.is_alpha(charCode): token.add(charCode) token.inFormation = True Token.Formed = False token.type = "IDENT" elif token.inFormation and self.is_alpha(token.atFirst()): if self.is_alpha(charCode) or self.is_digit(charCode): token.add(charCode) else: token.inFormation = False token.formed = True repeatChar = True return (token, repeatChar)
def getToken(self): character, c1, c2 = self.next() self.__debug("%s token acquired" % c1) if character == None: return EOF """Let's get comments out of the way first Indentation doesn't care about them""" if c1 + c2 == "//": self.__debug("Comment token") token = Token(character) token.type = SHORTCOMMENT token.components += "/" #Pass the second slash self.next() character, c1, c2 = self.next() #Double slash comments last till the end of the line while c1 != "\n": self.__debug("Seeking to end of line") token.components += c1 character, c1, c2 = self.next() self.__debug("Returning comment token") return token #Long comment type if c1 + c2 == "/*": self.__debug("Long comment token") token = Token(character) token.type = LONGCOMMENT token.components += "*" while c1 + c2 != "*/": self.__debug("Seeking to end of long comment") character, c1, c2 = self.next() token.components += c1 token.components += "/" #Pass the ending slash self.next() self.__debug("Returning long comment token") return token indentAmount = 0 #Space and Tab count equally for indentation #Don't care to be helpful to people who mix the styles if self.context == 'global' and c1 in INDENTATION: indentAmount += 1 token = Token(character) token.type = INDENT self.__debug("Indentation token") while c2 in INDENTATION: self.__debug("Gathering all indentation") character, c1, c2 = self.next() #Keep adding spaces for consistency in printout token.components += ' ' #Ignore indentation ending in a newline if c2 == "\n": return None last = self.indentStack.pop() self.__debug("Comparing current indentation amount to last") #This is a new, higher level of indentation if indentAmount > last: self.indentStack.append(last) self.indentStack.append(indentAmount) #Same indentation level; no token added elif indentAmount == last: self.indentStack.append(last) self.__debug("Indentation level matches") return None #This is actually a dedent; keep dedenting until the appropriate level is found else: self.__debug("Starting Dedenting") while indentAmount < last: self.__debug("Dedenting") tok = Token(character) tok.type = DEDENT tokens.append(tok) last = self.indentStack.pop() if indentAmount == last: self.indentStack.append(last) return None elif len(self.indentStack) == 0: print "Previous indentation amount not found!" return Token if self.context == 'global' and c1 == DECLARATION: self.__debug("Name or variable declaration") if c2 == ":": self.__debug("Name declaration") #skip the { and colon self.next() character, c1, c2 = self.next() token = Token(character) token.type = NAME character, c1, c2 = self.next() while c1 != END_DECLARATION: token.components += c1 self.__debug("Seeking declaration end") if c1 in SPACING: print "name declaration contains invalid character" return None character, c1, c2 = self.next() #Skip } self.next() #Clear trailing whitespace while c2 in INDENTATION: self.__debug("Clearing trailing whitespace") character, c1, c2 = self.next() if c1 != "\n": print "name declarations must end with a newline" while c1 != "\n": self.__debug("Recovering bad name declaration") character, c1, c2 = self.next() return token else: self.__debug("Variable declaration") #Skip the brace character, c1, c2 = self.next() token = Token(character) token.type = VARIABLE while c1 != SPACE: self.__debug("Gathering variable name") character, c1, c2 = self.next() token.components += c1 #Throw the space out self.switchContext('variable') self.__debug("Returning variable declaration token") return token #Collect the entire variable contents if self.context == 'variable': self.__debug("Variable value") if c1 == END_DECLARATION: print "empty variable declaration" self.switchContext('global') return None token = Token(character) token.type = VALUE character, c1, c2 = self.next() while c1 != END_DECLARATION: self.__debug("Scanning for remainder of variable name") token.components += c1 if c1 == '\\': if c2 in '\\:}': token.components += c2 self.next() character, c1, c2 = self.next() #The token up to this point is actually a function call if c1 == ':': token.type = FUNCTION switchContext('function') #Clear trailing whitespace while c2 in INDENTATION: self.__debug("SCANNING2") character, c1, c2 = self.next() return token #skip } self.next() #Clear trailing whitespace while c2 in INDENTATION: self.__debug("SCANNING3") character, c1, c2 = self.next() self.switchContext('global') return token if self.context == 'function': self.restoreContext() #Enclosed function arguments if c1 == '(': self.switchContext('closed-funcargs') #Clear the paren self.next() #Clear trailing whitespace while c2 in INDENTATION: self.__debug("SCANNING4") character, c1, c2 = self.next() else: self.switchContext('funcargs') if self.context == 'funcargs': token = Token(character) token.type = ARGUMENT #Add initial spacing to the token while c1 in SPACING: self.__debug("SCANNING5") character, c1, c2 = self.next() token.components += c1 #Then, consume everything up to the next whitespace while not c1 in SPACING: self.__debug("SCANNING6") if c1 == '\\': if c2 == ' ': token.components += ' ' self.next() character, c1, c2 = self.next() token.components += c1 #Argument list is over if c1 == "\n": self.next() self.restoreContext() return token #Do not add funcarg-delimiting whitespace self.next() return token if self.context == 'closed-funcargs': token = Token(character) token.type = ARGUMENT #Add initial spacing to the token while c1 in SPACING: self.__debug("SCANNING7") character, c1, c2 = self.next() token.components += c1 #Then, consume everything up to the next whitespace while not c1 in SPACING: self.__debug("SCANNING8") if c1 == '\\': if c2 == ' ' or c2 == ')': token.components += ' ' self.next() character, c1, c2 = self.next() token.components += c1 if c2 == ')': #Funcargs ended; return context after stripping ) self.next() self.restoreContext() return token return token #If we have not indented at all and we aren't using a name declaration, it's a selector if len(self.indentStack) == 1: self.__debug("Selector") token = Token(character) token.type = SELECTOR character, c1, c2 = self.next() while c1 != "\n": self.__debug("Seeking selector") token.components += c1 #Declaration starting if c2 == DECLARATION: return token character, c1, c2 = self.next() #Remove the terminating newline self.next() self.__debug("Returning selector") return token return None
def analize (cls, text): word = [] tokens = [] prev_letter = "" # Разбивка на токены i = 0 while i < len (text): letter = text[i] if letter == " ": if len (word) > 0: token = Token () token.text = ''.join (word) tokens.append (token) word = [] elif letter == "(" or \ letter == ")" or \ letter == ",": #letter == "_": if len (word) > 0: token = Token () token.text = ''.join (word) tokens.append (token) word = [] token = Token () token.text = letter tokens.append (token) elif letter == "." or \ letter == "_": j = i + 1 if j == len (text): if len (word) > 0: token = Token () token.text = ''.join (word) tokens.append (token) word = [] token = Token () token.text = letter tokens.append (token) while j < len (text): letter = text[j] if letter == " ": break elif letter == ")" or \ letter == "(" or \ letter == ",": j = j - 1 break if i == (j - 1): if letter == " ": if len (word) > 0: token = Token () token.text = ''.join (word) tokens.append (token) word = [] token = Token () token.text = letter tokens.append (token) break else: word.append (text[i]) prev_letter = letter word.append (letter) j += 1 else: prev_letter = letter word.append (letter) j += 1 i = j elif letter == "=": if prev_letter == " " or \ prev_letter == "(": if len (word) > 0: token = Token () token.text = ''.join (word) tokens.append (token) word = [] token = Token () token.text = letter tokens.append (token) else: word.append (letter) elif letter == "?": if len (word) > 0: token = Token () token.text = ''.join (word) tokens.append (token) word = [] if prev_letter not in [" ", "("]: token = Token () token.text = letter tokens.append (token) else: word.append (letter) elif letter == "\"": i += 1 while i < len (text): prev_letter = letter letter = text[i] if letter == "\"" and prev_letter != "\\": break else: word.append (letter) i += 1 token = Token () token.text = ''.join (word) token.type = TokenType.string tokens.append (token) word = [] else: word.append (letter) prev_letter = letter i += 1 # Идентификация токенов for token in tokens: #print token.text if token.type == TokenType.string: continue if token.text.find ('?') == 0 and len (token.text) > 1: s = token.text.replace ('?', '') query = "SELECT id FROM qsl_linkage WHERE name = \'" + s + "\';" cls.__cursor.execute (query) row = cls.__cursor.fetchone () if row != None: token.type = TokenType.linkage token.linkage = TokenLinkage () token.linkage.id = row[0] token.linkage.name = s else: cls.__error_text = ErrorHelper.get_text (102, token.text) return False elif token.text.find ('%') == 0 and len (token.text) > 1: token.type = TokenType.code_object elif token.text.find ('*') == 0: # Модификатор token.type = TokenType.modifier elif token.text == "(": token.type = TokenType.opening_bracket elif token.text == ")": token.type = TokenType.closing_bracket elif token.text == ",": token.type = TokenType.comma elif token.text == "_": token.type = TokenType.underscore elif token.text == ".": token.type = TokenType.point elif token.text == "?": token.type = TokenType.question_mark elif token.text == "=": token.type = TokenType.equal_sign else: query = "SELECT id, type FROM qsl_concept WHERE name = \'" + token.text + "\';" cls.__cursor.execute (query) row = cls.__cursor.fetchone () if row != None: token.type = TokenType.concept token.concept = TokenConcept () token.concept.id = row[0] token.concept.type = row[1] token.concept.name = token.text else: if token.text.isdigit (): token.type = TokenType.number else: cls.__error_text = ErrorHelper.get_text (103, token.text) return False node = cls.build_tree (tokens) if node != None: cls.proposition_tree = PropositionTree () cls.proposition_tree.root_node = node else: return False return True
def analize(cls, text): word = [] tokens = [] prev_letter = "" # Разбивка на токены i = 0 while i < len(text): letter = text[i] if letter == " ": if len(word) > 0: token = Token() token.text = ''.join(word) tokens.append(token) word = [] elif letter == "(" or \ letter == ")" or \ letter == ",": #letter == "_": if len(word) > 0: token = Token() token.text = ''.join(word) tokens.append(token) word = [] token = Token() token.text = letter tokens.append(token) elif letter == "." or \ letter == "_": j = i + 1 if j == len(text): if len(word) > 0: token = Token() token.text = ''.join(word) tokens.append(token) word = [] token = Token() token.text = letter tokens.append(token) while j < len(text): letter = text[j] if letter == " ": break elif letter == ")" or \ letter == "(" or \ letter == ",": j = j - 1 break if i == (j - 1): if letter == " ": if len(word) > 0: token = Token() token.text = ''.join(word) tokens.append(token) word = [] token = Token() token.text = letter tokens.append(token) break else: word.append(text[i]) prev_letter = letter word.append(letter) j += 1 else: prev_letter = letter word.append(letter) j += 1 i = j elif letter == "=": if prev_letter == " " or \ prev_letter == "(": if len(word) > 0: token = Token() token.text = ''.join(word) tokens.append(token) word = [] token = Token() token.text = letter tokens.append(token) else: word.append(letter) elif letter == "?": if len(word) > 0: token = Token() token.text = ''.join(word) tokens.append(token) word = [] if prev_letter not in [" ", "("]: token = Token() token.text = letter tokens.append(token) else: word.append(letter) elif letter == "\"": i += 1 while i < len(text): prev_letter = letter letter = text[i] if letter == "\"" and prev_letter != "\\": break else: word.append(letter) i += 1 token = Token() token.text = ''.join(word) token.type = TokenType.string tokens.append(token) word = [] else: word.append(letter) prev_letter = letter i += 1 # Идентификация токенов for token in tokens: #print token.text if token.type == TokenType.string: continue if token.text.find('?') == 0 and len(token.text) > 1: s = token.text.replace('?', '') query = "SELECT id FROM qsl_linkage WHERE name = \'" + s + "\';" cls.__cursor.execute(query) row = cls.__cursor.fetchone() if row != None: token.type = TokenType.linkage token.linkage = TokenLinkage() token.linkage.id = row[0] token.linkage.name = s else: cls.__error_text = ErrorHelper.get_text(102, token.text) return False elif token.text.find('%') == 0 and len(token.text) > 1: token.type = TokenType.code_object elif token.text.find('*') == 0: # Модификатор token.type = TokenType.modifier elif token.text == "(": token.type = TokenType.opening_bracket elif token.text == ")": token.type = TokenType.closing_bracket elif token.text == ",": token.type = TokenType.comma elif token.text == "_": token.type = TokenType.underscore elif token.text == ".": token.type = TokenType.point elif token.text == "?": token.type = TokenType.question_mark elif token.text == "=": token.type = TokenType.equal_sign else: query = "SELECT id, type FROM qsl_concept WHERE name = \'" + token.text + "\';" cls.__cursor.execute(query) row = cls.__cursor.fetchone() if row != None: token.type = TokenType.concept token.concept = TokenConcept() token.concept.id = row[0] token.concept.type = row[1] token.concept.name = token.text else: if token.text.isdigit(): token.type = TokenType.number else: cls.__error_text = ErrorHelper.get_text( 103, token.text) return False node = cls.build_tree(tokens) if node != None: cls.proposition_tree = PropositionTree() cls.proposition_tree.root_node = node else: return False return True