Ejemplo n.º 1
0
	def _removeCommentsAndStrings(self):
		'''

		Two things happen here:
		a. Character by character, add those characters which are not part of comments or strings to a new string
		   Same the new string as the 'sourceString' variable
		b. At the same time, generate an array of line number beginnings called the 'characterToLineMap'

		This uses a mutable string to save time on adding

		'''
		print "Removing comments and strings..."

		originalString = str(self.sourceString)
		self.sourceString = MString('')
		self.characterToLineMap = {}
		lineCount = 1
		self.characterToLineMap[0] = lineCount #character 0 is line #1
		lineCount += 1 #set up for next line which will be two
		#pdb.set_trace()
		i=0

		inlineCommentLen = len(self.inlineComments)

		#begin analyzing charactes 1 by 1 until we reach the end of the originalString
		#-blockCommentLen so that we don't go out of bounds
		while i < len(originalString):
			#check if the next characters are a block comment
			#There are multiple types of block comments so we have to check them all
			for blockComment in self.blockComments:
				if type(blockComment['start']) == str:
					blockCommentLen = len(blockComment['start'])
					if originalString[i:][:blockCommentLen] == blockComment['start']:
						#if it was a block comment, jog forward
						prevI = i
						i = originalString.find(blockComment['end'],i+blockCommentLen)+blockCommentLen

						while originalString[i-1]=='\\':
							i = originalString.find(blockComment['end'],i+blockCommentLen)+blockCommentLen

						if i==-1+blockCommentLen:
							#if we can't find the blockcomment and have reached the end of the file
							#return the cleaned file
							return

						#increment the newlines
						lineCount+=originalString[prevI:i].count('\n')

						#still want to see the comments, just not what is inside
						self.sourceString.append(blockComment['start'] + blockComment['end'])

						break
				else:
					#is a regex blockcomment... sigh js sigh...
					match = blockComment['start'].match(originalString[i:])
					if match:
						#print match.group(0)
						#print originalString[i-5:i+5]
						prevI = i

						endMatch = blockComment['end'].search(originalString[i+match.end(0):])

						if endMatch:
							i = i+match.end(0)+endMatch.end(0)
						else:
							return

						#increment the newlines
						lineCount+=originalString[prevI:i].count('\n')
						break
			else:
				#check if the next characters are an inline comment
				if originalString[i:][:inlineCommentLen] == self.inlineComments:
					#if so, find the end of the line and jog forward. Add one to jog past the newline
					i = originalString.find("\n",i+inlineCommentLen+1)

					#if we didn't find the end of the line, that is the end of the file. Return
					if i==-1:
						return
				else:
					#Otherwise, it is not a comment. Add to returnstr
					self.sourceString.append(originalString[i])

					#if the originalString is a newline, then we must note this
					if originalString[i]=='\n':
						self.characterToLineMap[len(self.sourceString)] = lineCount
						lineCount += 1
					i+=1
Ejemplo n.º 2
0
    def _removeCommentsAndStrings(self):
        '''

		Two things happen here:
		a. Character by character, add those characters which are not part of comments or strings to a new string
		   Same the new string as the 'sourceString' variable
		b. At the same time, generate an array of line number beginnings called the 'characterToLineMap'

		This uses a mutable string to save time on adding

		'''
        print "Removing comments and strings..."

        originalString = str(self.sourceString)
        self.sourceString = MString('')
        self.characterToLineMap = {}
        lineCount = 1
        self.characterToLineMap[0] = lineCount  #character 0 is line #1
        lineCount += 1  #set up for next line which will be two
        #pdb.set_trace()
        i = 0

        inlineCommentLen = len(self.inlineComments)

        #begin analyzing charactes 1 by 1 until we reach the end of the originalString
        #-blockCommentLen so that we don't go out of bounds
        while i < len(originalString):
            #check if the next characters are a block comment
            #There are multiple types of block comments so we have to check them all
            for blockComment in self.blockComments:
                if type(blockComment['start']) == str:
                    blockCommentLen = len(blockComment['start'])
                    if originalString[i:][:blockCommentLen] == blockComment[
                            'start']:
                        #if it was a block comment, jog forward
                        prevI = i
                        i = originalString.find(
                            blockComment['end'],
                            i + blockCommentLen) + blockCommentLen

                        while originalString[i - 1] == '\\':
                            i = originalString.find(
                                blockComment['end'],
                                i + blockCommentLen) + blockCommentLen

                        if i == -1 + blockCommentLen:
                            #if we can't find the blockcomment and have reached the end of the file
                            #return the cleaned file
                            return

                        #increment the newlines
                        lineCount += originalString[prevI:i].count('\n')

                        #still want to see the comments, just not what is inside
                        self.sourceString.append(blockComment['start'] +
                                                 blockComment['end'])

                        break
                else:
                    #is a regex blockcomment... sigh js sigh...
                    match = blockComment['start'].match(originalString[i:])
                    if match:
                        #print match.group(0)
                        #print originalString[i-5:i+5]
                        prevI = i

                        endMatch = blockComment['end'].search(
                            originalString[i + match.end(0):])

                        if endMatch:
                            i = i + match.end(0) + endMatch.end(0)
                        else:
                            return

                        #increment the newlines
                        lineCount += originalString[prevI:i].count('\n')
                        break
            else:
                #check if the next characters are an inline comment
                if originalString[
                        i:][:inlineCommentLen] == self.inlineComments:
                    #if so, find the end of the line and jog forward. Add one to jog past the newline
                    i = originalString.find("\n", i + inlineCommentLen + 1)

                    #if we didn't find the end of the line, that is the end of the file. Return
                    if i == -1:
                        return
                else:
                    #Otherwise, it is not a comment. Add to returnstr
                    self.sourceString.append(originalString[i])

                    #if the originalString is a newline, then we must note this
                    if originalString[i] == '\n':
                        self.characterToLineMap[len(
                            self.sourceString)] = lineCount
                        lineCount += 1
                    i += 1
Ejemplo n.º 3
0
class SourceCode(object):
	'''
	SourceCode is a convenient object object representing:
		source text (sourceString)
		a line number array (characterToLineMap)

	A sourcecode object is maintained internally in both the Group and Node and classes

	Implementations will probably only have to overwrite the two properties:
		blockComments
		strings
	Although Python does overwrite more because of it's indent system

	The sourcecode object supports the following primitive operations
		sc = SourceCode()
		len(sc) #characters
		sc[a:b] #betweenCharacters
		sc[a] #character
		scA + scB #addition as long as line numbers do not overlap
		scA - scB #subtraction as long as scB is completely inside scA
		sc == True #truth testing (empty string)
		str(sc) print with line numbers

	And these are the methods
		copy() #deepcopy
		firstLineNumber() #of the entire object
		lastLineNumber()  #of the entire object
		remove(string) #and return new sourcecode
		pop() #return last line
		getPosition(lineNumber) #get character index at linenumber
		getLineNumber(characterPos) #get line number of character
		find(what,start) #run sourceString.find()
		extractBetweenDelimiters(a,b,startAt) #return new sourcecode between the first pair of delimiters after startAt
		getSourceInBlock(bracketPos) #Return the source to the matching bracket
		matchingBracketPos(bracketPos) #Return the matching bracket position
		endDelimPos(startAt,a,b) #return the position of the nearest end bracket given a position in the block
		openDelimPos(startAt) #return the position of the nearest begin bracket given a position in the block
		_removeCommentsAndStrings() #called on init. Does as it says changing the object

	'''

	#These two must be subclassed
	blockComments = []
	inlineComments = ''
	delimA='{'
	delimB='}'

	def __init__(self,sourceString,characterToLineMap=None):
		'''
		Remove the comments and build the linenumber/file mapping while doing so
		'''
		self.sourceString = sourceString

		if characterToLineMap:
			self.characterToLineMap = characterToLineMap
		else:
			self.characterToLineMap = {}

			self._removeCommentsAndStrings()
			self.sourceString = str(self.sourceString) #convert back to regular python string from mutable string

			if DEBUG:
				#print 'REMOVED COMMENTS',self
				with open('cleanedSource','w') as outfile:
					outfile.write(self.sourceString)

		self.delimLen = len(self.delimA)

	def __len__(self):
		return len(self.sourceString)

	def __getitem__(self,sl):
		'''
		If sliced, return a new object with the sourceString and the characterToLineMap sliced by [firstChar:lastChar]

		1. Slice the source string in the obvious way.
		2. Slice the charactertolinemap
			a. Remove character mappings that are not in between where we are shifting to
			b. Take remaining characterPositions and shift them over by start shift

		'''
		if type(sl) == int:
			return self.sourceString[sl]

		if type(sl) != slice:
			raise Exception("Slice was not passed")

		if sl.step and (sl.start or sl.stop):
			raise Exception("Sourcecode slicing does not support the step attribute (e.g. source[from:to:step] is not supported)")

		if sl.start is None:
			start = 0
		else:
			start = sl.start

		if sl.stop is None:
			stop = len(self.sourceString)
		elif sl.stop < 0:
			stop = len(self.sourceString)+sl.stop
		else:
			stop = sl.stop

		if start>stop:
			raise Exception("Begin slice cannot be greater than end slice. You passed SourceCode[%d:%d]"%(sl.start,sl.stop))

		ret = self.copy()

		ret.sourceString = ret.sourceString[start:stop]

		#filter out character mapping we won't be using
		shiftedCharacterToLineMap = {}
		characterPositions = ret.characterToLineMap.keys()
		characterPositions = filter(lambda p: p>=start and p<=stop,characterPositions)

		#shift existing character mappings to reflect the new start position
		#If we start with 0, no shifting will take place
		for characterPosition in characterPositions:
			shiftedCharacterToLineMap[characterPosition-start] = ret.characterToLineMap[characterPosition]

		#we need this to be sure that we can always get the line number no matter where we splice
		shiftedCharacterToLineMap[0] = self.getLineNumber(start)

		ret.characterToLineMap = shiftedCharacterToLineMap
		return ret

	def __add__(self,other):
		'''
		Add two pieces of sourcecode together shifting the character to line map appropriately
		'''

		#If one operand is nothing, just return the value of this operand
		if not other:
			return self.copy()

		if self.lastLineNumber()>other.firstLineNumber():
			raise Exception("When adding two pieces of sourcecode, the second piece must be completely after the first as far as line numbers go")

		sourceString = self.sourceString + other.sourceString

		shiftedCharacterToLineMap = {}
		characterPositions = other.characterToLineMap.keys()
		for characterPosition in characterPositions:
			shiftedCharacterToLineMap[characterPosition+len(self.sourceString)] = other.characterToLineMap[characterPosition]

		characterToLineMap = dict(self.characterToLineMap.items() + shiftedCharacterToLineMap.items())

		ret = SourceCode(sourceString=sourceString,characterToLineMap=characterToLineMap)

		return ret

	def __sub__(self,other):
		if not other:
			return self.copy()

		if self.firstLineNumber()>other.firstLineNumber() or self.lastLineNumber()<other.lastLineNumber():
			pdb.set_trace()
			raise Exception("When subtracting a piece of one bit of sourcecode from another, the second must lie completely within the first")

		firstPos = self.sourceString.find(other.sourceString)

		if firstPos == -1:
			pdb.set_trace()
			raise Exception('Could not subtract string starting with "%s" from source because string could not be found'%other.sourceString[:50].replace("\n","\\n"))

		lastPos = firstPos + len(other.sourceString)


		firstPart = self[:firstPos]

		secondPart = self[lastPos:]

		return firstPart+secondPart

	def __nonzero__(self):
		'''
		__nonzero__ is object evaluates to True or False
		sourceString will be False when the sourceString has nothing or nothing but whitespace
		'''
		return self.sourceString.strip()!=''

	def __str__(self):
		'''
		Mostly for debugging. Print the source with line numbers
		'''
		ret = ''
		for i, char in enumerate(self.sourceString):
			if i in self.characterToLineMap:
				ret += '%d: '%self.characterToLineMap[i]
			ret += char
		return ret

	def copy(self):
		return copy.deepcopy(self)

	def firstLineNumber(self):
		'''
		First line number of the entire source
		'''
		try:
			return min(self.characterToLineMap.values())
		except ValueError:
			raise Exception("Sourcecode has no line numbers")

	def lastLineNumber(self):
		'''
		Last line number of the entire source
		'''
		try:
			return max(self.characterToLineMap.values())
		except ValueError:
			raise Exception("Sourcecode has no line numbers")

	def remove(self,stringToRemove):
		'''
		Remove a string. Does not alter object in place
		'''
		firstPos = self.sourceString.find(stringToRemove)
		if firstPos == -1:
			pdb.set_trace()
			raise Exception("String not found in source")
		lastPos = firstPos + len(stringToRemove)
		return self[:firstPos]+self[lastPos:]

	def pop(self):
		'''
		Pop off the last line
		'''
		lastLinePos = self.sourceString.rfind('\n')
		ret = self.sourceString[lastLinePos:]
		self = self[:lastLinePos]

		return ret

	def getPosition(self,lineNumberRequest):
		'''
		From lineNumber, get the character position
		'''
		for pos,lineNumber in self.characterToLineMap.items():
			if lineNumber == lineNumberRequest:
				return pos

		raise Exception("Could not find line number in source")

	def getLineNumber(self,pos):
		'''
		Decrement until we find the first character of the line and can get the linenumber
		'''
		while True:
			try:
				return self.characterToLineMap[pos]
			except:
				pos-=1
				if pos < 0:
					raise Exception("could not get line number for position %d"%pos)

	def find(self,what,start=0):
		'''
		Pass through 'find' makes implementations cleaner
		'''
		return self.sourceString.find(what,start)

	def extractBetweenDelimiters(self,startAt=0):
		'''
		Return the source between the first pair of delimiters after 'startAt'
		'''

		start = self.sourceString.find(self.delimA,startAt)
		if start == -1:
			return None
		start += self.delimLen

		endPos = self.endDelimPos(start,self.delimA,self.delimB)
		if endPos != -1:
			return self[start:endPos]
		else:
			return None

	def getSourceInBlock(self,bracketPos):
		'''
		Get the source within two matching brackets
		'''
		otherBracketPosition = self.matchingBracketPos(bracketPos)

		if bracketPos < otherBracketPosition:
			startBracketPos = bracketPos
			endBracketPos = otherBracketPosition
		else:
			startBracketPos = otherBracketPosition
			endBracketPos = bracketPos

		ret = self[startBracketPos+1:endBracketPos]
		return ret

	def matchingBracketPos(self,bracketPos):
		'''
		Find the matching bracket position
		'''

		delim = self[bracketPos]
		if delim == self.delimA:
			if self.sourceString[bracketPos+1]==self.delimB:
				return bracketPos + 1
			else:
				return self.endDelimPos(startAt=bracketPos+1)
		elif delim == self.delimB:
			if self.sourceString[bracketPos-1]==self.delimA:
				return bracketPos - 1
			else:
				return self.openDelimPos(startAt=bracketPos-1)
		else:
			raise Exception('"%s" is not a known delimiter'%delim)

	def endDelimPos(self,startAt):
		'''
		Find the nearest end delimiter assuming that 'startAt' is inside of a block
		'''

		count = 1
		i = startAt
		while i<len(self.sourceString) and count>0:
			tmp = self.sourceString[i:i+self.delimLen]
			if tmp==self.delimA:
				count += 1
				i+=self.delimLen
			elif tmp==self.delimB:
				count -= 1
				i+=self.delimLen
			else:
				i+=1

		if count == 0:
			return i-self.delimLen
		else:
			return -1

	def openDelimPos(self,pos):
		'''
		Find the nearest begin delimiter assuming that 'pos' is inside of a block
		TODO there is probably no reason why this also includes parenthesis
		TODO this should probably just be the same function as endDelimPos
		'''

		count = 0
		i = pos
		while i>=0 and count>=0:
			if self.sourceString[i] in ('}',')'):
				count += 1
			elif self.sourceString[i] in ('{','('):
				count -= 1
			i-=1

		if count==-1:
			return i+1
		else:
			return 0

	def _removeCommentsAndStrings(self):
		'''

		Two things happen here:
		a. Character by character, add those characters which are not part of comments or strings to a new string
		   Same the new string as the 'sourceString' variable
		b. At the same time, generate an array of line number beginnings called the 'characterToLineMap'

		This uses a mutable string to save time on adding

		'''
		print "Removing comments and strings..."

		originalString = str(self.sourceString)
		self.sourceString = MString('')
		self.characterToLineMap = {}
		lineCount = 1
		self.characterToLineMap[0] = lineCount #character 0 is line #1
		lineCount += 1 #set up for next line which will be two
		#pdb.set_trace()
		i=0

		inlineCommentLen = len(self.inlineComments)

		#begin analyzing charactes 1 by 1 until we reach the end of the originalString
		#-blockCommentLen so that we don't go out of bounds
		while i < len(originalString):
			#check if the next characters are a block comment
			#There are multiple types of block comments so we have to check them all
			for blockComment in self.blockComments:
				if type(blockComment['start']) == str:
					blockCommentLen = len(blockComment['start'])
					if originalString[i:][:blockCommentLen] == blockComment['start']:
						#if it was a block comment, jog forward
						prevI = i
						i = originalString.find(blockComment['end'],i+blockCommentLen)+blockCommentLen

						while originalString[i-1]=='\\':
							i = originalString.find(blockComment['end'],i+blockCommentLen)+blockCommentLen

						if i==-1+blockCommentLen:
							#if we can't find the blockcomment and have reached the end of the file
							#return the cleaned file
							return

						#increment the newlines
						lineCount+=originalString[prevI:i].count('\n')

						#still want to see the comments, just not what is inside
						self.sourceString.append(blockComment['start'] + blockComment['end'])

						break
				else:
					#is a regex blockcomment... sigh js sigh...
					match = blockComment['start'].match(originalString[i:])
					if match:
						#print match.group(0)
						#print originalString[i-5:i+5]
						prevI = i

						endMatch = blockComment['end'].search(originalString[i+match.end(0):])

						if endMatch:
							i = i+match.end(0)+endMatch.end(0)
						else:
							return

						#increment the newlines
						lineCount+=originalString[prevI:i].count('\n')
						break
			else:
				#check if the next characters are an inline comment
				if originalString[i:][:inlineCommentLen] == self.inlineComments:
					#if so, find the end of the line and jog forward. Add one to jog past the newline
					i = originalString.find("\n",i+inlineCommentLen+1)

					#if we didn't find the end of the line, that is the end of the file. Return
					if i==-1:
						return
				else:
					#Otherwise, it is not a comment. Add to returnstr
					self.sourceString.append(originalString[i])

					#if the originalString is a newline, then we must note this
					if originalString[i]=='\n':
						self.characterToLineMap[len(self.sourceString)] = lineCount
						lineCount += 1
					i+=1
Ejemplo n.º 4
0
class SourceCode(object):
    '''
	SourceCode is a convenient object object representing:
		source text (sourceString)
		a line number array (characterToLineMap)

	A sourcecode object is maintained internally in both the Group and Node and classes

	Implementations will probably only have to overwrite the two properties:
		blockComments
		strings
	Although Python does overwrite more because of it's indent system

	The sourcecode object supports the following primitive operations
		sc = SourceCode()
		len(sc) #characters
		sc[a:b] #betweenCharacters
		sc[a] #character
		scA + scB #addition as long as line numbers do not overlap
		scA - scB #subtraction as long as scB is completely inside scA
		sc == True #truth testing (empty string)
		str(sc) print with line numbers

	And these are the methods
		copy() #deepcopy
		firstLineNumber() #of the entire object
		lastLineNumber()  #of the entire object
		remove(string) #and return new sourcecode
		pop() #return last line
		getPosition(lineNumber) #get character index at linenumber
		getLineNumber(characterPos) #get line number of character
		find(what,start) #run sourceString.find()
		extractBetweenDelimiters(a,b,startAt) #return new sourcecode between the first pair of delimiters after startAt
		getSourceInBlock(bracketPos) #Return the source to the matching bracket
		matchingBracketPos(bracketPos) #Return the matching bracket position
		endDelimPos(startAt,a,b) #return the position of the nearest end bracket given a position in the block
		openDelimPos(startAt) #return the position of the nearest begin bracket given a position in the block
		_removeCommentsAndStrings() #called on init. Does as it says changing the object

	'''

    #These two must be subclassed
    blockComments = []
    inlineComments = ''
    delimA = '{'
    delimB = '}'

    def __init__(self, sourceString, characterToLineMap=None):
        '''
		Remove the comments and build the linenumber/file mapping while doing so
		'''
        self.sourceString = sourceString

        if characterToLineMap:
            self.characterToLineMap = characterToLineMap
        else:
            self.characterToLineMap = {}

            self._removeCommentsAndStrings()
            self.sourceString = str(
                self.sourceString
            )  #convert back to regular python string from mutable string

            if DEBUG:
                #print 'REMOVED COMMENTS',self
                with open('cleanedSource', 'w') as outfile:
                    outfile.write(self.sourceString)

        self.delimLen = len(self.delimA)

    def __len__(self):
        return len(self.sourceString)

    def __getitem__(self, sl):
        '''
		If sliced, return a new object with the sourceString and the characterToLineMap sliced by [firstChar:lastChar]

		1. Slice the source string in the obvious way.
		2. Slice the charactertolinemap
			a. Remove character mappings that are not in between where we are shifting to
			b. Take remaining characterPositions and shift them over by start shift

		'''
        if type(sl) == int:
            return self.sourceString[sl]

        if type(sl) != slice:
            raise Exception("Slice was not passed")

        if sl.step and (sl.start or sl.stop):
            raise Exception(
                "Sourcecode slicing does not support the step attribute (e.g. source[from:to:step] is not supported)"
            )

        if sl.start is None:
            start = 0
        else:
            start = sl.start

        if sl.stop is None:
            stop = len(self.sourceString)
        elif sl.stop < 0:
            stop = len(self.sourceString) + sl.stop
        else:
            stop = sl.stop

        if start > stop:
            raise Exception(
                "Begin slice cannot be greater than end slice. You passed SourceCode[%d:%d]"
                % (sl.start, sl.stop))

        ret = self.copy()

        ret.sourceString = ret.sourceString[start:stop]

        #filter out character mapping we won't be using
        shiftedCharacterToLineMap = {}
        characterPositions = ret.characterToLineMap.keys()
        characterPositions = filter(lambda p: p >= start and p <= stop,
                                    characterPositions)

        #shift existing character mappings to reflect the new start position
        #If we start with 0, no shifting will take place
        for characterPosition in characterPositions:
            shiftedCharacterToLineMap[
                characterPosition -
                start] = ret.characterToLineMap[characterPosition]

        #we need this to be sure that we can always get the line number no matter where we splice
        shiftedCharacterToLineMap[0] = self.getLineNumber(start)

        ret.characterToLineMap = shiftedCharacterToLineMap
        return ret

    def __add__(self, other):
        '''
		Add two pieces of sourcecode together shifting the character to line map appropriately
		'''

        #If one operand is nothing, just return the value of this operand
        if not other:
            return self.copy()

        if self.lastLineNumber() > other.firstLineNumber():
            raise Exception(
                "When adding two pieces of sourcecode, the second piece must be completely after the first as far as line numbers go"
            )

        sourceString = self.sourceString + other.sourceString

        shiftedCharacterToLineMap = {}
        characterPositions = other.characterToLineMap.keys()
        for characterPosition in characterPositions:
            shiftedCharacterToLineMap[
                characterPosition +
                len(self.sourceString
                    )] = other.characterToLineMap[characterPosition]

        characterToLineMap = dict(self.characterToLineMap.items() +
                                  shiftedCharacterToLineMap.items())

        ret = SourceCode(sourceString=sourceString,
                         characterToLineMap=characterToLineMap)

        return ret

    def __sub__(self, other):
        if not other:
            return self.copy()

        if self.firstLineNumber() > other.firstLineNumber(
        ) or self.lastLineNumber() < other.lastLineNumber():
            pdb.set_trace()
            raise Exception(
                "When subtracting a piece of one bit of sourcecode from another, the second must lie completely within the first"
            )

        firstPos = self.sourceString.find(other.sourceString)

        if firstPos == -1:
            pdb.set_trace()
            raise Exception(
                'Could not subtract string starting with "%s" from source because string could not be found'
                % other.sourceString[:50].replace("\n", "\\n"))

        lastPos = firstPos + len(other.sourceString)

        firstPart = self[:firstPos]

        secondPart = self[lastPos:]

        return firstPart + secondPart

    def __nonzero__(self):
        '''
		__nonzero__ is object evaluates to True or False
		sourceString will be False when the sourceString has nothing or nothing but whitespace
		'''
        return self.sourceString.strip() != ''

    def __str__(self):
        '''
		Mostly for debugging. Print the source with line numbers
		'''
        ret = ''
        for i, char in enumerate(self.sourceString):
            if i in self.characterToLineMap:
                ret += '%d: ' % self.characterToLineMap[i]
            ret += char
        return ret

    def copy(self):
        return copy.deepcopy(self)

    def firstLineNumber(self):
        '''
		First line number of the entire source
		'''
        try:
            return min(self.characterToLineMap.values())
        except ValueError:
            raise Exception("Sourcecode has no line numbers")

    def lastLineNumber(self):
        '''
		Last line number of the entire source
		'''
        try:
            return max(self.characterToLineMap.values())
        except ValueError:
            raise Exception("Sourcecode has no line numbers")

    def remove(self, stringToRemove):
        '''
		Remove a string. Does not alter object in place
		'''
        firstPos = self.sourceString.find(stringToRemove)
        if firstPos == -1:
            pdb.set_trace()
            raise Exception("String not found in source")
        lastPos = firstPos + len(stringToRemove)
        return self[:firstPos] + self[lastPos:]

    def pop(self):
        '''
		Pop off the last line
		'''
        lastLinePos = self.sourceString.rfind('\n')
        ret = self.sourceString[lastLinePos:]
        self = self[:lastLinePos]

        return ret

    def getPosition(self, lineNumberRequest):
        '''
		From lineNumber, get the character position
		'''
        for pos, lineNumber in self.characterToLineMap.items():
            if lineNumber == lineNumberRequest:
                return pos

        raise Exception("Could not find line number in source")

    def getLineNumber(self, pos):
        '''
		Decrement until we find the first character of the line and can get the linenumber
		'''
        while True:
            try:
                return self.characterToLineMap[pos]
            except:
                pos -= 1
                if pos < 0:
                    raise Exception(
                        "could not get line number for position %d" % pos)

    def find(self, what, start=0):
        '''
		Pass through 'find' makes implementations cleaner
		'''
        return self.sourceString.find(what, start)

    def extractBetweenDelimiters(self, startAt=0):
        '''
		Return the source between the first pair of delimiters after 'startAt'
		'''

        start = self.sourceString.find(self.delimA, startAt)
        if start == -1:
            return None
        start += self.delimLen

        endPos = self.endDelimPos(start, self.delimA, self.delimB)
        if endPos != -1:
            return self[start:endPos]
        else:
            return None

    def getSourceInBlock(self, bracketPos):
        '''
		Get the source within two matching brackets
		'''
        otherBracketPosition = self.matchingBracketPos(bracketPos)

        if bracketPos < otherBracketPosition:
            startBracketPos = bracketPos
            endBracketPos = otherBracketPosition
        else:
            startBracketPos = otherBracketPosition
            endBracketPos = bracketPos

        ret = self[startBracketPos + 1:endBracketPos]
        return ret

    def matchingBracketPos(self, bracketPos):
        '''
		Find the matching bracket position
		'''

        delim = self[bracketPos]
        if delim == self.delimA:
            if self.sourceString[bracketPos + 1] == self.delimB:
                return bracketPos + 1
            else:
                return self.endDelimPos(startAt=bracketPos + 1)
        elif delim == self.delimB:
            if self.sourceString[bracketPos - 1] == self.delimA:
                return bracketPos - 1
            else:
                return self.openDelimPos(startAt=bracketPos - 1)
        else:
            raise Exception('"%s" is not a known delimiter' % delim)

    def endDelimPos(self, startAt):
        '''
		Find the nearest end delimiter assuming that 'startAt' is inside of a block
		'''

        count = 1
        i = startAt
        while i < len(self.sourceString) and count > 0:
            tmp = self.sourceString[i:i + self.delimLen]
            if tmp == self.delimA:
                count += 1
                i += self.delimLen
            elif tmp == self.delimB:
                count -= 1
                i += self.delimLen
            else:
                i += 1

        if count == 0:
            return i - self.delimLen
        else:
            return -1

    def openDelimPos(self, pos):
        '''
		Find the nearest begin delimiter assuming that 'pos' is inside of a block
		TODO there is probably no reason why this also includes parenthesis
		TODO this should probably just be the same function as endDelimPos
		'''

        count = 0
        i = pos
        while i >= 0 and count >= 0:
            if self.sourceString[i] in ('}', ')'):
                count += 1
            elif self.sourceString[i] in ('{', '('):
                count -= 1
            i -= 1

        if count == -1:
            return i + 1
        else:
            return 0

    def _removeCommentsAndStrings(self):
        '''

		Two things happen here:
		a. Character by character, add those characters which are not part of comments or strings to a new string
		   Same the new string as the 'sourceString' variable
		b. At the same time, generate an array of line number beginnings called the 'characterToLineMap'

		This uses a mutable string to save time on adding

		'''
        print "Removing comments and strings..."

        originalString = str(self.sourceString)
        self.sourceString = MString('')
        self.characterToLineMap = {}
        lineCount = 1
        self.characterToLineMap[0] = lineCount  #character 0 is line #1
        lineCount += 1  #set up for next line which will be two
        #pdb.set_trace()
        i = 0

        inlineCommentLen = len(self.inlineComments)

        #begin analyzing charactes 1 by 1 until we reach the end of the originalString
        #-blockCommentLen so that we don't go out of bounds
        while i < len(originalString):
            #check if the next characters are a block comment
            #There are multiple types of block comments so we have to check them all
            for blockComment in self.blockComments:
                if type(blockComment['start']) == str:
                    blockCommentLen = len(blockComment['start'])
                    if originalString[i:][:blockCommentLen] == blockComment[
                            'start']:
                        #if it was a block comment, jog forward
                        prevI = i
                        i = originalString.find(
                            blockComment['end'],
                            i + blockCommentLen) + blockCommentLen

                        while originalString[i - 1] == '\\':
                            i = originalString.find(
                                blockComment['end'],
                                i + blockCommentLen) + blockCommentLen

                        if i == -1 + blockCommentLen:
                            #if we can't find the blockcomment and have reached the end of the file
                            #return the cleaned file
                            return

                        #increment the newlines
                        lineCount += originalString[prevI:i].count('\n')

                        #still want to see the comments, just not what is inside
                        self.sourceString.append(blockComment['start'] +
                                                 blockComment['end'])

                        break
                else:
                    #is a regex blockcomment... sigh js sigh...
                    match = blockComment['start'].match(originalString[i:])
                    if match:
                        #print match.group(0)
                        #print originalString[i-5:i+5]
                        prevI = i

                        endMatch = blockComment['end'].search(
                            originalString[i + match.end(0):])

                        if endMatch:
                            i = i + match.end(0) + endMatch.end(0)
                        else:
                            return

                        #increment the newlines
                        lineCount += originalString[prevI:i].count('\n')
                        break
            else:
                #check if the next characters are an inline comment
                if originalString[
                        i:][:inlineCommentLen] == self.inlineComments:
                    #if so, find the end of the line and jog forward. Add one to jog past the newline
                    i = originalString.find("\n", i + inlineCommentLen + 1)

                    #if we didn't find the end of the line, that is the end of the file. Return
                    if i == -1:
                        return
                else:
                    #Otherwise, it is not a comment. Add to returnstr
                    self.sourceString.append(originalString[i])

                    #if the originalString is a newline, then we must note this
                    if originalString[i] == '\n':
                        self.characterToLineMap[len(
                            self.sourceString)] = lineCount
                        lineCount += 1
                    i += 1