def _removeCommentsAndStrings(self): ''' Two things happen here: a. Character by character, add those characters which are not part of comments or strings to a new string Same the new string as the 'sourceString' variable b. At the same time, generate an array of line number beginnings called the 'characterToLineMap' This uses a mutable string to save time on adding ''' print "Removing comments and strings..." originalString = str(self.sourceString) self.sourceString = MString('') self.characterToLineMap = {} lineCount = 1 self.characterToLineMap[0] = lineCount #character 0 is line #1 lineCount += 1 #set up for next line which will be two #pdb.set_trace() i=0 inlineCommentLen = len(self.inlineComments) #begin analyzing charactes 1 by 1 until we reach the end of the originalString #-blockCommentLen so that we don't go out of bounds while i < len(originalString): #check if the next characters are a block comment #There are multiple types of block comments so we have to check them all for blockComment in self.blockComments: if type(blockComment['start']) == str: blockCommentLen = len(blockComment['start']) if originalString[i:][:blockCommentLen] == blockComment['start']: #if it was a block comment, jog forward prevI = i i = originalString.find(blockComment['end'],i+blockCommentLen)+blockCommentLen while originalString[i-1]=='\\': i = originalString.find(blockComment['end'],i+blockCommentLen)+blockCommentLen if i==-1+blockCommentLen: #if we can't find the blockcomment and have reached the end of the file #return the cleaned file return #increment the newlines lineCount+=originalString[prevI:i].count('\n') #still want to see the comments, just not what is inside self.sourceString.append(blockComment['start'] + blockComment['end']) break else: #is a regex blockcomment... sigh js sigh... match = blockComment['start'].match(originalString[i:]) if match: #print match.group(0) #print originalString[i-5:i+5] prevI = i endMatch = blockComment['end'].search(originalString[i+match.end(0):]) if endMatch: i = i+match.end(0)+endMatch.end(0) else: return #increment the newlines lineCount+=originalString[prevI:i].count('\n') break else: #check if the next characters are an inline comment if originalString[i:][:inlineCommentLen] == self.inlineComments: #if so, find the end of the line and jog forward. Add one to jog past the newline i = originalString.find("\n",i+inlineCommentLen+1) #if we didn't find the end of the line, that is the end of the file. Return if i==-1: return else: #Otherwise, it is not a comment. Add to returnstr self.sourceString.append(originalString[i]) #if the originalString is a newline, then we must note this if originalString[i]=='\n': self.characterToLineMap[len(self.sourceString)] = lineCount lineCount += 1 i+=1
def _removeCommentsAndStrings(self): ''' Two things happen here: a. Character by character, add those characters which are not part of comments or strings to a new string Same the new string as the 'sourceString' variable b. At the same time, generate an array of line number beginnings called the 'characterToLineMap' This uses a mutable string to save time on adding ''' print "Removing comments and strings..." originalString = str(self.sourceString) self.sourceString = MString('') self.characterToLineMap = {} lineCount = 1 self.characterToLineMap[0] = lineCount #character 0 is line #1 lineCount += 1 #set up for next line which will be two #pdb.set_trace() i = 0 inlineCommentLen = len(self.inlineComments) #begin analyzing charactes 1 by 1 until we reach the end of the originalString #-blockCommentLen so that we don't go out of bounds while i < len(originalString): #check if the next characters are a block comment #There are multiple types of block comments so we have to check them all for blockComment in self.blockComments: if type(blockComment['start']) == str: blockCommentLen = len(blockComment['start']) if originalString[i:][:blockCommentLen] == blockComment[ 'start']: #if it was a block comment, jog forward prevI = i i = originalString.find( blockComment['end'], i + blockCommentLen) + blockCommentLen while originalString[i - 1] == '\\': i = originalString.find( blockComment['end'], i + blockCommentLen) + blockCommentLen if i == -1 + blockCommentLen: #if we can't find the blockcomment and have reached the end of the file #return the cleaned file return #increment the newlines lineCount += originalString[prevI:i].count('\n') #still want to see the comments, just not what is inside self.sourceString.append(blockComment['start'] + blockComment['end']) break else: #is a regex blockcomment... sigh js sigh... match = blockComment['start'].match(originalString[i:]) if match: #print match.group(0) #print originalString[i-5:i+5] prevI = i endMatch = blockComment['end'].search( originalString[i + match.end(0):]) if endMatch: i = i + match.end(0) + endMatch.end(0) else: return #increment the newlines lineCount += originalString[prevI:i].count('\n') break else: #check if the next characters are an inline comment if originalString[ i:][:inlineCommentLen] == self.inlineComments: #if so, find the end of the line and jog forward. Add one to jog past the newline i = originalString.find("\n", i + inlineCommentLen + 1) #if we didn't find the end of the line, that is the end of the file. Return if i == -1: return else: #Otherwise, it is not a comment. Add to returnstr self.sourceString.append(originalString[i]) #if the originalString is a newline, then we must note this if originalString[i] == '\n': self.characterToLineMap[len( self.sourceString)] = lineCount lineCount += 1 i += 1
class SourceCode(object): ''' SourceCode is a convenient object object representing: source text (sourceString) a line number array (characterToLineMap) A sourcecode object is maintained internally in both the Group and Node and classes Implementations will probably only have to overwrite the two properties: blockComments strings Although Python does overwrite more because of it's indent system The sourcecode object supports the following primitive operations sc = SourceCode() len(sc) #characters sc[a:b] #betweenCharacters sc[a] #character scA + scB #addition as long as line numbers do not overlap scA - scB #subtraction as long as scB is completely inside scA sc == True #truth testing (empty string) str(sc) print with line numbers And these are the methods copy() #deepcopy firstLineNumber() #of the entire object lastLineNumber() #of the entire object remove(string) #and return new sourcecode pop() #return last line getPosition(lineNumber) #get character index at linenumber getLineNumber(characterPos) #get line number of character find(what,start) #run sourceString.find() extractBetweenDelimiters(a,b,startAt) #return new sourcecode between the first pair of delimiters after startAt getSourceInBlock(bracketPos) #Return the source to the matching bracket matchingBracketPos(bracketPos) #Return the matching bracket position endDelimPos(startAt,a,b) #return the position of the nearest end bracket given a position in the block openDelimPos(startAt) #return the position of the nearest begin bracket given a position in the block _removeCommentsAndStrings() #called on init. Does as it says changing the object ''' #These two must be subclassed blockComments = [] inlineComments = '' delimA='{' delimB='}' def __init__(self,sourceString,characterToLineMap=None): ''' Remove the comments and build the linenumber/file mapping while doing so ''' self.sourceString = sourceString if characterToLineMap: self.characterToLineMap = characterToLineMap else: self.characterToLineMap = {} self._removeCommentsAndStrings() self.sourceString = str(self.sourceString) #convert back to regular python string from mutable string if DEBUG: #print 'REMOVED COMMENTS',self with open('cleanedSource','w') as outfile: outfile.write(self.sourceString) self.delimLen = len(self.delimA) def __len__(self): return len(self.sourceString) def __getitem__(self,sl): ''' If sliced, return a new object with the sourceString and the characterToLineMap sliced by [firstChar:lastChar] 1. Slice the source string in the obvious way. 2. Slice the charactertolinemap a. Remove character mappings that are not in between where we are shifting to b. Take remaining characterPositions and shift them over by start shift ''' if type(sl) == int: return self.sourceString[sl] if type(sl) != slice: raise Exception("Slice was not passed") if sl.step and (sl.start or sl.stop): raise Exception("Sourcecode slicing does not support the step attribute (e.g. source[from:to:step] is not supported)") if sl.start is None: start = 0 else: start = sl.start if sl.stop is None: stop = len(self.sourceString) elif sl.stop < 0: stop = len(self.sourceString)+sl.stop else: stop = sl.stop if start>stop: raise Exception("Begin slice cannot be greater than end slice. You passed SourceCode[%d:%d]"%(sl.start,sl.stop)) ret = self.copy() ret.sourceString = ret.sourceString[start:stop] #filter out character mapping we won't be using shiftedCharacterToLineMap = {} characterPositions = ret.characterToLineMap.keys() characterPositions = filter(lambda p: p>=start and p<=stop,characterPositions) #shift existing character mappings to reflect the new start position #If we start with 0, no shifting will take place for characterPosition in characterPositions: shiftedCharacterToLineMap[characterPosition-start] = ret.characterToLineMap[characterPosition] #we need this to be sure that we can always get the line number no matter where we splice shiftedCharacterToLineMap[0] = self.getLineNumber(start) ret.characterToLineMap = shiftedCharacterToLineMap return ret def __add__(self,other): ''' Add two pieces of sourcecode together shifting the character to line map appropriately ''' #If one operand is nothing, just return the value of this operand if not other: return self.copy() if self.lastLineNumber()>other.firstLineNumber(): raise Exception("When adding two pieces of sourcecode, the second piece must be completely after the first as far as line numbers go") sourceString = self.sourceString + other.sourceString shiftedCharacterToLineMap = {} characterPositions = other.characterToLineMap.keys() for characterPosition in characterPositions: shiftedCharacterToLineMap[characterPosition+len(self.sourceString)] = other.characterToLineMap[characterPosition] characterToLineMap = dict(self.characterToLineMap.items() + shiftedCharacterToLineMap.items()) ret = SourceCode(sourceString=sourceString,characterToLineMap=characterToLineMap) return ret def __sub__(self,other): if not other: return self.copy() if self.firstLineNumber()>other.firstLineNumber() or self.lastLineNumber()<other.lastLineNumber(): pdb.set_trace() raise Exception("When subtracting a piece of one bit of sourcecode from another, the second must lie completely within the first") firstPos = self.sourceString.find(other.sourceString) if firstPos == -1: pdb.set_trace() raise Exception('Could not subtract string starting with "%s" from source because string could not be found'%other.sourceString[:50].replace("\n","\\n")) lastPos = firstPos + len(other.sourceString) firstPart = self[:firstPos] secondPart = self[lastPos:] return firstPart+secondPart def __nonzero__(self): ''' __nonzero__ is object evaluates to True or False sourceString will be False when the sourceString has nothing or nothing but whitespace ''' return self.sourceString.strip()!='' def __str__(self): ''' Mostly for debugging. Print the source with line numbers ''' ret = '' for i, char in enumerate(self.sourceString): if i in self.characterToLineMap: ret += '%d: '%self.characterToLineMap[i] ret += char return ret def copy(self): return copy.deepcopy(self) def firstLineNumber(self): ''' First line number of the entire source ''' try: return min(self.characterToLineMap.values()) except ValueError: raise Exception("Sourcecode has no line numbers") def lastLineNumber(self): ''' Last line number of the entire source ''' try: return max(self.characterToLineMap.values()) except ValueError: raise Exception("Sourcecode has no line numbers") def remove(self,stringToRemove): ''' Remove a string. Does not alter object in place ''' firstPos = self.sourceString.find(stringToRemove) if firstPos == -1: pdb.set_trace() raise Exception("String not found in source") lastPos = firstPos + len(stringToRemove) return self[:firstPos]+self[lastPos:] def pop(self): ''' Pop off the last line ''' lastLinePos = self.sourceString.rfind('\n') ret = self.sourceString[lastLinePos:] self = self[:lastLinePos] return ret def getPosition(self,lineNumberRequest): ''' From lineNumber, get the character position ''' for pos,lineNumber in self.characterToLineMap.items(): if lineNumber == lineNumberRequest: return pos raise Exception("Could not find line number in source") def getLineNumber(self,pos): ''' Decrement until we find the first character of the line and can get the linenumber ''' while True: try: return self.characterToLineMap[pos] except: pos-=1 if pos < 0: raise Exception("could not get line number for position %d"%pos) def find(self,what,start=0): ''' Pass through 'find' makes implementations cleaner ''' return self.sourceString.find(what,start) def extractBetweenDelimiters(self,startAt=0): ''' Return the source between the first pair of delimiters after 'startAt' ''' start = self.sourceString.find(self.delimA,startAt) if start == -1: return None start += self.delimLen endPos = self.endDelimPos(start,self.delimA,self.delimB) if endPos != -1: return self[start:endPos] else: return None def getSourceInBlock(self,bracketPos): ''' Get the source within two matching brackets ''' otherBracketPosition = self.matchingBracketPos(bracketPos) if bracketPos < otherBracketPosition: startBracketPos = bracketPos endBracketPos = otherBracketPosition else: startBracketPos = otherBracketPosition endBracketPos = bracketPos ret = self[startBracketPos+1:endBracketPos] return ret def matchingBracketPos(self,bracketPos): ''' Find the matching bracket position ''' delim = self[bracketPos] if delim == self.delimA: if self.sourceString[bracketPos+1]==self.delimB: return bracketPos + 1 else: return self.endDelimPos(startAt=bracketPos+1) elif delim == self.delimB: if self.sourceString[bracketPos-1]==self.delimA: return bracketPos - 1 else: return self.openDelimPos(startAt=bracketPos-1) else: raise Exception('"%s" is not a known delimiter'%delim) def endDelimPos(self,startAt): ''' Find the nearest end delimiter assuming that 'startAt' is inside of a block ''' count = 1 i = startAt while i<len(self.sourceString) and count>0: tmp = self.sourceString[i:i+self.delimLen] if tmp==self.delimA: count += 1 i+=self.delimLen elif tmp==self.delimB: count -= 1 i+=self.delimLen else: i+=1 if count == 0: return i-self.delimLen else: return -1 def openDelimPos(self,pos): ''' Find the nearest begin delimiter assuming that 'pos' is inside of a block TODO there is probably no reason why this also includes parenthesis TODO this should probably just be the same function as endDelimPos ''' count = 0 i = pos while i>=0 and count>=0: if self.sourceString[i] in ('}',')'): count += 1 elif self.sourceString[i] in ('{','('): count -= 1 i-=1 if count==-1: return i+1 else: return 0 def _removeCommentsAndStrings(self): ''' Two things happen here: a. Character by character, add those characters which are not part of comments or strings to a new string Same the new string as the 'sourceString' variable b. At the same time, generate an array of line number beginnings called the 'characterToLineMap' This uses a mutable string to save time on adding ''' print "Removing comments and strings..." originalString = str(self.sourceString) self.sourceString = MString('') self.characterToLineMap = {} lineCount = 1 self.characterToLineMap[0] = lineCount #character 0 is line #1 lineCount += 1 #set up for next line which will be two #pdb.set_trace() i=0 inlineCommentLen = len(self.inlineComments) #begin analyzing charactes 1 by 1 until we reach the end of the originalString #-blockCommentLen so that we don't go out of bounds while i < len(originalString): #check if the next characters are a block comment #There are multiple types of block comments so we have to check them all for blockComment in self.blockComments: if type(blockComment['start']) == str: blockCommentLen = len(blockComment['start']) if originalString[i:][:blockCommentLen] == blockComment['start']: #if it was a block comment, jog forward prevI = i i = originalString.find(blockComment['end'],i+blockCommentLen)+blockCommentLen while originalString[i-1]=='\\': i = originalString.find(blockComment['end'],i+blockCommentLen)+blockCommentLen if i==-1+blockCommentLen: #if we can't find the blockcomment and have reached the end of the file #return the cleaned file return #increment the newlines lineCount+=originalString[prevI:i].count('\n') #still want to see the comments, just not what is inside self.sourceString.append(blockComment['start'] + blockComment['end']) break else: #is a regex blockcomment... sigh js sigh... match = blockComment['start'].match(originalString[i:]) if match: #print match.group(0) #print originalString[i-5:i+5] prevI = i endMatch = blockComment['end'].search(originalString[i+match.end(0):]) if endMatch: i = i+match.end(0)+endMatch.end(0) else: return #increment the newlines lineCount+=originalString[prevI:i].count('\n') break else: #check if the next characters are an inline comment if originalString[i:][:inlineCommentLen] == self.inlineComments: #if so, find the end of the line and jog forward. Add one to jog past the newline i = originalString.find("\n",i+inlineCommentLen+1) #if we didn't find the end of the line, that is the end of the file. Return if i==-1: return else: #Otherwise, it is not a comment. Add to returnstr self.sourceString.append(originalString[i]) #if the originalString is a newline, then we must note this if originalString[i]=='\n': self.characterToLineMap[len(self.sourceString)] = lineCount lineCount += 1 i+=1
class SourceCode(object): ''' SourceCode is a convenient object object representing: source text (sourceString) a line number array (characterToLineMap) A sourcecode object is maintained internally in both the Group and Node and classes Implementations will probably only have to overwrite the two properties: blockComments strings Although Python does overwrite more because of it's indent system The sourcecode object supports the following primitive operations sc = SourceCode() len(sc) #characters sc[a:b] #betweenCharacters sc[a] #character scA + scB #addition as long as line numbers do not overlap scA - scB #subtraction as long as scB is completely inside scA sc == True #truth testing (empty string) str(sc) print with line numbers And these are the methods copy() #deepcopy firstLineNumber() #of the entire object lastLineNumber() #of the entire object remove(string) #and return new sourcecode pop() #return last line getPosition(lineNumber) #get character index at linenumber getLineNumber(characterPos) #get line number of character find(what,start) #run sourceString.find() extractBetweenDelimiters(a,b,startAt) #return new sourcecode between the first pair of delimiters after startAt getSourceInBlock(bracketPos) #Return the source to the matching bracket matchingBracketPos(bracketPos) #Return the matching bracket position endDelimPos(startAt,a,b) #return the position of the nearest end bracket given a position in the block openDelimPos(startAt) #return the position of the nearest begin bracket given a position in the block _removeCommentsAndStrings() #called on init. Does as it says changing the object ''' #These two must be subclassed blockComments = [] inlineComments = '' delimA = '{' delimB = '}' def __init__(self, sourceString, characterToLineMap=None): ''' Remove the comments and build the linenumber/file mapping while doing so ''' self.sourceString = sourceString if characterToLineMap: self.characterToLineMap = characterToLineMap else: self.characterToLineMap = {} self._removeCommentsAndStrings() self.sourceString = str( self.sourceString ) #convert back to regular python string from mutable string if DEBUG: #print 'REMOVED COMMENTS',self with open('cleanedSource', 'w') as outfile: outfile.write(self.sourceString) self.delimLen = len(self.delimA) def __len__(self): return len(self.sourceString) def __getitem__(self, sl): ''' If sliced, return a new object with the sourceString and the characterToLineMap sliced by [firstChar:lastChar] 1. Slice the source string in the obvious way. 2. Slice the charactertolinemap a. Remove character mappings that are not in between where we are shifting to b. Take remaining characterPositions and shift them over by start shift ''' if type(sl) == int: return self.sourceString[sl] if type(sl) != slice: raise Exception("Slice was not passed") if sl.step and (sl.start or sl.stop): raise Exception( "Sourcecode slicing does not support the step attribute (e.g. source[from:to:step] is not supported)" ) if sl.start is None: start = 0 else: start = sl.start if sl.stop is None: stop = len(self.sourceString) elif sl.stop < 0: stop = len(self.sourceString) + sl.stop else: stop = sl.stop if start > stop: raise Exception( "Begin slice cannot be greater than end slice. You passed SourceCode[%d:%d]" % (sl.start, sl.stop)) ret = self.copy() ret.sourceString = ret.sourceString[start:stop] #filter out character mapping we won't be using shiftedCharacterToLineMap = {} characterPositions = ret.characterToLineMap.keys() characterPositions = filter(lambda p: p >= start and p <= stop, characterPositions) #shift existing character mappings to reflect the new start position #If we start with 0, no shifting will take place for characterPosition in characterPositions: shiftedCharacterToLineMap[ characterPosition - start] = ret.characterToLineMap[characterPosition] #we need this to be sure that we can always get the line number no matter where we splice shiftedCharacterToLineMap[0] = self.getLineNumber(start) ret.characterToLineMap = shiftedCharacterToLineMap return ret def __add__(self, other): ''' Add two pieces of sourcecode together shifting the character to line map appropriately ''' #If one operand is nothing, just return the value of this operand if not other: return self.copy() if self.lastLineNumber() > other.firstLineNumber(): raise Exception( "When adding two pieces of sourcecode, the second piece must be completely after the first as far as line numbers go" ) sourceString = self.sourceString + other.sourceString shiftedCharacterToLineMap = {} characterPositions = other.characterToLineMap.keys() for characterPosition in characterPositions: shiftedCharacterToLineMap[ characterPosition + len(self.sourceString )] = other.characterToLineMap[characterPosition] characterToLineMap = dict(self.characterToLineMap.items() + shiftedCharacterToLineMap.items()) ret = SourceCode(sourceString=sourceString, characterToLineMap=characterToLineMap) return ret def __sub__(self, other): if not other: return self.copy() if self.firstLineNumber() > other.firstLineNumber( ) or self.lastLineNumber() < other.lastLineNumber(): pdb.set_trace() raise Exception( "When subtracting a piece of one bit of sourcecode from another, the second must lie completely within the first" ) firstPos = self.sourceString.find(other.sourceString) if firstPos == -1: pdb.set_trace() raise Exception( 'Could not subtract string starting with "%s" from source because string could not be found' % other.sourceString[:50].replace("\n", "\\n")) lastPos = firstPos + len(other.sourceString) firstPart = self[:firstPos] secondPart = self[lastPos:] return firstPart + secondPart def __nonzero__(self): ''' __nonzero__ is object evaluates to True or False sourceString will be False when the sourceString has nothing or nothing but whitespace ''' return self.sourceString.strip() != '' def __str__(self): ''' Mostly for debugging. Print the source with line numbers ''' ret = '' for i, char in enumerate(self.sourceString): if i in self.characterToLineMap: ret += '%d: ' % self.characterToLineMap[i] ret += char return ret def copy(self): return copy.deepcopy(self) def firstLineNumber(self): ''' First line number of the entire source ''' try: return min(self.characterToLineMap.values()) except ValueError: raise Exception("Sourcecode has no line numbers") def lastLineNumber(self): ''' Last line number of the entire source ''' try: return max(self.characterToLineMap.values()) except ValueError: raise Exception("Sourcecode has no line numbers") def remove(self, stringToRemove): ''' Remove a string. Does not alter object in place ''' firstPos = self.sourceString.find(stringToRemove) if firstPos == -1: pdb.set_trace() raise Exception("String not found in source") lastPos = firstPos + len(stringToRemove) return self[:firstPos] + self[lastPos:] def pop(self): ''' Pop off the last line ''' lastLinePos = self.sourceString.rfind('\n') ret = self.sourceString[lastLinePos:] self = self[:lastLinePos] return ret def getPosition(self, lineNumberRequest): ''' From lineNumber, get the character position ''' for pos, lineNumber in self.characterToLineMap.items(): if lineNumber == lineNumberRequest: return pos raise Exception("Could not find line number in source") def getLineNumber(self, pos): ''' Decrement until we find the first character of the line and can get the linenumber ''' while True: try: return self.characterToLineMap[pos] except: pos -= 1 if pos < 0: raise Exception( "could not get line number for position %d" % pos) def find(self, what, start=0): ''' Pass through 'find' makes implementations cleaner ''' return self.sourceString.find(what, start) def extractBetweenDelimiters(self, startAt=0): ''' Return the source between the first pair of delimiters after 'startAt' ''' start = self.sourceString.find(self.delimA, startAt) if start == -1: return None start += self.delimLen endPos = self.endDelimPos(start, self.delimA, self.delimB) if endPos != -1: return self[start:endPos] else: return None def getSourceInBlock(self, bracketPos): ''' Get the source within two matching brackets ''' otherBracketPosition = self.matchingBracketPos(bracketPos) if bracketPos < otherBracketPosition: startBracketPos = bracketPos endBracketPos = otherBracketPosition else: startBracketPos = otherBracketPosition endBracketPos = bracketPos ret = self[startBracketPos + 1:endBracketPos] return ret def matchingBracketPos(self, bracketPos): ''' Find the matching bracket position ''' delim = self[bracketPos] if delim == self.delimA: if self.sourceString[bracketPos + 1] == self.delimB: return bracketPos + 1 else: return self.endDelimPos(startAt=bracketPos + 1) elif delim == self.delimB: if self.sourceString[bracketPos - 1] == self.delimA: return bracketPos - 1 else: return self.openDelimPos(startAt=bracketPos - 1) else: raise Exception('"%s" is not a known delimiter' % delim) def endDelimPos(self, startAt): ''' Find the nearest end delimiter assuming that 'startAt' is inside of a block ''' count = 1 i = startAt while i < len(self.sourceString) and count > 0: tmp = self.sourceString[i:i + self.delimLen] if tmp == self.delimA: count += 1 i += self.delimLen elif tmp == self.delimB: count -= 1 i += self.delimLen else: i += 1 if count == 0: return i - self.delimLen else: return -1 def openDelimPos(self, pos): ''' Find the nearest begin delimiter assuming that 'pos' is inside of a block TODO there is probably no reason why this also includes parenthesis TODO this should probably just be the same function as endDelimPos ''' count = 0 i = pos while i >= 0 and count >= 0: if self.sourceString[i] in ('}', ')'): count += 1 elif self.sourceString[i] in ('{', '('): count -= 1 i -= 1 if count == -1: return i + 1 else: return 0 def _removeCommentsAndStrings(self): ''' Two things happen here: a. Character by character, add those characters which are not part of comments or strings to a new string Same the new string as the 'sourceString' variable b. At the same time, generate an array of line number beginnings called the 'characterToLineMap' This uses a mutable string to save time on adding ''' print "Removing comments and strings..." originalString = str(self.sourceString) self.sourceString = MString('') self.characterToLineMap = {} lineCount = 1 self.characterToLineMap[0] = lineCount #character 0 is line #1 lineCount += 1 #set up for next line which will be two #pdb.set_trace() i = 0 inlineCommentLen = len(self.inlineComments) #begin analyzing charactes 1 by 1 until we reach the end of the originalString #-blockCommentLen so that we don't go out of bounds while i < len(originalString): #check if the next characters are a block comment #There are multiple types of block comments so we have to check them all for blockComment in self.blockComments: if type(blockComment['start']) == str: blockCommentLen = len(blockComment['start']) if originalString[i:][:blockCommentLen] == blockComment[ 'start']: #if it was a block comment, jog forward prevI = i i = originalString.find( blockComment['end'], i + blockCommentLen) + blockCommentLen while originalString[i - 1] == '\\': i = originalString.find( blockComment['end'], i + blockCommentLen) + blockCommentLen if i == -1 + blockCommentLen: #if we can't find the blockcomment and have reached the end of the file #return the cleaned file return #increment the newlines lineCount += originalString[prevI:i].count('\n') #still want to see the comments, just not what is inside self.sourceString.append(blockComment['start'] + blockComment['end']) break else: #is a regex blockcomment... sigh js sigh... match = blockComment['start'].match(originalString[i:]) if match: #print match.group(0) #print originalString[i-5:i+5] prevI = i endMatch = blockComment['end'].search( originalString[i + match.end(0):]) if endMatch: i = i + match.end(0) + endMatch.end(0) else: return #increment the newlines lineCount += originalString[prevI:i].count('\n') break else: #check if the next characters are an inline comment if originalString[ i:][:inlineCommentLen] == self.inlineComments: #if so, find the end of the line and jog forward. Add one to jog past the newline i = originalString.find("\n", i + inlineCommentLen + 1) #if we didn't find the end of the line, that is the end of the file. Return if i == -1: return else: #Otherwise, it is not a comment. Add to returnstr self.sourceString.append(originalString[i]) #if the originalString is a newline, then we must note this if originalString[i] == '\n': self.characterToLineMap[len( self.sourceString)] = lineCount lineCount += 1 i += 1