Python WideStringOffsetConverter Examples, textUtils.WideStringOffsetConverter Python Examples

Example #1

0

Show file

File: offsets.py Project: resslerruntime/nvda

 def _getWordOffsets(self, offset):
     if self.encoding not in (textUtils.WCHAR_ENCODING, None, "utf_32_le",
                              locale.getlocale()[1]):
         raise NotImplementedError
     lineStart, lineEnd = self._getLineOffsets(offset)
     lineText = self._getTextRange(lineStart, lineEnd)
     # Convert NULL and non-breaking space to space to make sure that words will break on them
     lineText = lineText.translate({0: u' ', 0xa0: u' '})
     relOffset = offset - lineStart
     if self.useUniscribe:
         relStart = ctypes.c_int()
         relEnd = ctypes.c_int()
         # uniscribe does some strange things when you give it a string  with not more than two alphanumeric chars in a row.
         # Inject two alphanumeric characters at the end to fix this
         uniscribeLineText = lineText + "xx"
         # We can't rely on len(lineText) to calculate the length of the line.
         if self.encoding != textUtils.WCHAR_ENCODING:
             # We need to convert the str based line offsets to wide string offsets.
             offsetConverter = textUtils.WideStringOffsetConverter(lineText)
             lineLength = offsetConverter.wideStringLength
             relOffset = offsetConverter.strToWideOffsets(
                 relOffset, relOffset)[0]
         else:
             lineLength = (lineEnd - lineStart)
         uniscribeLineLength = lineLength + 2
         if NVDAHelper.localLib.calculateWordOffsets(
                 uniscribeLineText, uniscribeLineLength, relOffset,
                 ctypes.byref(relStart), ctypes.byref(relEnd)):
             relStart = relStart.value
             relEnd = min(lineLength, relEnd.value)
             if self.encoding != textUtils.WCHAR_ENCODING:
                 # We need to convert the uniscribe based offsets to str offsets.
                 relStart, relEnd = offsetConverter.wideToStrOffsets(
                     relStart, relEnd)
             return (relStart + lineStart, relEnd + lineStart)
     #Fall back to the older word offsets detection that only breaks on non alphanumeric
     if self.encoding == textUtils.WCHAR_ENCODING:
         offsetConverter = textUtils.WideStringOffsetConverter(lineText)
         relStrOffset = offsetConverter.wideToStrOffsets(
             relOffset, relOffset)[0]
         relStrStart = findStartOfWord(lineText, relStrOffset)
         relStrEnd = findEndOfWord(lineText, relStrOffset)
         relWideStringStart, relWideStringEnd = offsetConverter.strToWideOffsets(
             relStrStart, relStrEnd)
         return (relWideStringStart + lineStart,
                 relWideStringEnd + lineStart)
     start = findStartOfWord(lineText, offset - lineStart) + lineStart
     end = findEndOfWord(lineText, offset - lineStart) + lineStart
     return [start, end]

Example #2

0

Show file

File: offsets.py Project: xingkong0113/nvda

	def _getWordOffsets(self,offset):
		if not (
			self.encoding == textUtils.WCHAR_ENCODING
			or self.encoding is None
			or self.encoding == "utf_32_le"
			or self.encoding == textUtils.USER_ANSI_CODE_PAGE
		):
			raise NotImplementedError
		lineStart, lineEnd = self._getLineOffsets(offset)
		lineText = self._getTextRange(lineStart,lineEnd)
		# Convert NULL and non-breaking space to space to make sure that words will break on them
		lineText = lineText.translate({0:u' ',0xa0:u' '})
		relOffset = offset - lineStart
		if self.useUniscribe:
			offsets = self._calculateUniscribeOffsets(lineText, textInfos.UNIT_WORD, relOffset)
			if offsets is not None:
				return (offsets[0] + lineStart, offsets[1] + lineStart)
		#Fall back to the older word offsets detection that only breaks on non alphanumeric
		if self.encoding == textUtils.WCHAR_ENCODING:
			offsetConverter = textUtils.WideStringOffsetConverter(lineText)
			relStrOffset = offsetConverter.wideToStrOffsets(relOffset, relOffset)[0]
			relStrStart = findStartOfWord(lineText, relStrOffset)
			relStrEnd = findEndOfWord(lineText, relStrOffset)
			relWideStringStart, relWideStringEnd = offsetConverter.strToWideOffsets(relStrStart, relStrEnd)
			return (relWideStringStart + lineStart, relWideStringEnd + lineStart)
		start=findStartOfWord(lineText,offset-lineStart)+lineStart
		end=findEndOfWord(lineText,offset-lineStart)+lineStart
		return [start,end]

Example #3

0

Show file

File: displayModel.py Project: SWEN-712/screen-reader-MaxMendelson

 def _getSelectionOffsets(self):
     if self.backgroundSelectionColor is not None and self.foregroundSelectionColor is not None:
         fields = self._storyFieldsAndRects[0]
         startOffset = None
         endOffset = None
         curOffset = 0
         inHighlightChunk = False
         for item in fields:
             if isinstance(
                     item, textInfos.FieldCommand
             ) and item.command == "formatChange" and item.field.get(
                     'color', None
             ) == self.foregroundSelectionColor and item.field.get(
                     'background-color',
                     None) == self.backgroundSelectionColor:
                 inHighlightChunk = True
                 if startOffset is None:
                     startOffset = curOffset
             elif isinstance(item, str):
                 curOffset += textUtils.WideStringOffsetConverter(
                     item).wideStringLength
                 if inHighlightChunk:
                     endOffset = curOffset
             else:
                 inHighlightChunk = False
         if startOffset is not None and endOffset is not None:
             return (startOffset, endOffset)
     raise LookupError

Example #4

0

Show file

File: displayModel.py Project: ruifontes/nvda

	def _getFieldsInRange(self,start,end):
		storyFields=self._storyFieldsAndRects[0]
		if not storyFields:
			return []
		#Strip  unwanted commands and text from the start and the end to honour the requested offsets
		lastEndOffset=0
		startIndex=endIndex=relStart=relEnd=None
		for index in range(len(storyFields)):
			item=storyFields[index]
			if isinstance(item,str):
				endOffset = lastEndOffset + textUtils.WideStringOffsetConverter(item).wideStringLength
				if lastEndOffset<=start<endOffset:
					startIndex=index-1
					relStart=start-lastEndOffset
				if lastEndOffset<end<=endOffset:
					endIndex=index+1
					relEnd=end-lastEndOffset
				lastEndOffset=endOffset
		if startIndex is None:
			return []
		if endIndex is None:
			endIndex=len(storyFields)
		commandList=storyFields[startIndex:endIndex]
		if (endIndex-startIndex)==2 and relStart is not None and relEnd is not None:
			commandList[1]=commandList[1][relStart:relEnd]
		else:
			if relStart is not None:
				commandList[1]=commandList[1][relStart:]
			if relEnd is not None:
				commandList[-1]=commandList[-1][:relEnd]
		return commandList

Example #5

0

Show file

File: b2.py Project: lukaszgo1/Becky

 def _getSelectionOffsets(self):
     fields = self._storyFieldsAndRects[0]
     startOffset = None
     endOffset = None
     curOffset = 0
     inHighlightChunk = False
     for item in fields:
         if (isinstance(item, textInfos.FieldCommand)
                 and item.command == "formatChange" and item.field.get(
                     'color', None) == RGB(red=255, green=255, blue=255)):
             inHighlightChunk = True
             if startOffset is None:
                 startOffset = curOffset
         elif isinstance(item, six.string_types):
             try:
                 import textUtils
                 curOffset += textUtils.WideStringOffsetConverter(
                     item).wideStringLength
             except ImportError:
                 curOffset += len(item)
             if inHighlightChunk:
                 endOffset = curOffset
         else:
             inHighlightChunk = False
     if startOffset is not None and endOffset is not None:
         return (startOffset, endOffset)
     offset = self._getCaretOffset()
     return offset, offset

Example #6

0

Show file

File: displayModel.py Project: sdokowe/nvda

	def _get__storyFieldsAndRects(self):
		# All returned coordinates are logical coordinates.
		if self._location:
			left, top, right, bottom = self._location
		else:
			try:
				left, top, width, height = self.obj.location
			except TypeError:
				# No location; nothing we can do.
				return [],[],[]
			right = left + width
			bottom = top + height
		bindingHandle=self.obj.appModule.helperLocalBindingHandle
		if not bindingHandle:
			log.debugWarning("AppModule does not have a binding handle")
			return [],[],[]
		left,top=windowUtils.physicalToLogicalPoint(self.obj.windowHandle,left,top)
		right,bottom=windowUtils.physicalToLogicalPoint(self.obj.windowHandle,right,bottom)
		text,rects=getWindowTextInRect(bindingHandle, self.obj.windowHandle, left, top, right, bottom, self.minHorizontalWhitespace, self.minVerticalWhitespace,self.stripOuterWhitespace,self.includeDescendantWindows)
		if not text:
			return [],[],[]
		text="<control>%s</control>"%text
		commandList=XMLFormatting.XMLTextParser().parse(text)
		curFormatField=None
		lastEndOffset=0
		lineStartOffset=0
		lineStartIndex=0
		lineBaseline=None
		lineEndOffsets=[]
		for index in range(len(commandList)):
			item=commandList[index]
			if isinstance(item,str):
				lastEndOffset += textUtils.WideStringOffsetConverter(item).wideStringLength
			elif isinstance(item,textInfos.FieldCommand):
				if isinstance(item.field,textInfos.FormatField):
					curFormatField=item.field
					self._normalizeFormatField(curFormatField)
				else:
					curFormatField=None
				baseline=curFormatField['baseline'] if curFormatField  else None
				if baseline!=lineBaseline:
					if lineBaseline is not None:
						processWindowChunksInLine(commandList,rects,lineStartIndex,lineStartOffset,index,lastEndOffset)
						#Convert the whitespace at the end of the line into a line feed
						item=commandList[index-1]
						if (
							isinstance(item,str)
							# Since we're searching for white space, it is safe to
							# do this opperation on the length of the pythonic string
							and len(item)==1
							and item.isspace()
						):
							commandList[index-1]=u'\n'
						lineEndOffsets.append(lastEndOffset)
					if baseline is not None:
						lineStartIndex=index
						lineStartOffset=lastEndOffset
						lineBaseline=baseline
		return commandList,rects,lineEndOffsets

Example #7

0

Show file

    def _getCurrentOffsetInThisLine(self, lineInfo):
        """
		Given a caret textInfo expanded to line, returns the index into the
		line where the caret is located.
		This is necessary since Uniscribe requires indices into the text to
		find word boundaries, but UIA only allows for relative movement.
		"""
        # position a textInfo from the start of the line up to the current position.
        charInfo = lineInfo.copy()
        charInfo.setEndPoint(self, "endToStart")
        text = charInfo._rangeObj.getText(-1)
        offset = textUtils.WideStringOffsetConverter(text).wideStringLength
        return offset

Example #8

0

Show file

File: offsets.py Project: resslerruntime/nvda

 def _getLineOffsets(self, offset):
     text = self._getStoryText()
     if self.encoding == textUtils.WCHAR_ENCODING:
         offsetConverter = textUtils.WideStringOffsetConverter(text)
         strOffset = offsetConverter.wideToStrOffsets(offset, offset)[0]
         strStart = findStartOfLine(text, strOffset)
         strEnd = findEndOfLine(text, strOffset)
         return offsetConverter.strToWideOffsets(strStart, strEnd)
     elif self.encoding not in (None, "utf_32_le", locale.getlocale()[1]):
         raise NotImplementedError
     start = findStartOfLine(text, offset)
     end = findEndOfLine(text, offset)
     return [start, end]

Example #9

0

Show file

File: offsets.py Project: XLTechie/xlnvda

 def _getLineOffsets(self, offset):
     text = self._getStoryText()
     if self.encoding == textUtils.WCHAR_ENCODING:
         offsetConverter = textUtils.WideStringOffsetConverter(text)
         strOffset = offsetConverter.wideToStrOffsets(offset, offset)[0]
         strStart = findStartOfLine(text, strOffset)
         strEnd = findEndOfLine(text, strOffset)
         return offsetConverter.strToWideOffsets(strStart, strEnd)
     elif not (self.encoding is None or self.encoding == "utf_32_le"
               or self.encoding == textUtils.USER_ANSI_CODE_PAGE):
         raise NotImplementedError
     start = findStartOfLine(text, offset)
     end = findEndOfLine(text, offset)
     return [start, end]

Example #10

0

Show file

File: displayModel.py Project: ruifontes/nvda

	def _getStoryOffsetLocations(self):
		baseline=None
		direction=0
		lastEndOffset=0
		commandList, rects = self._storyFieldsAndRects[:2]
		for item in commandList:
			if isinstance(item,textInfos.FieldCommand) and isinstance(item.field,textInfos.FormatField):
				baseline=item.field['baseline']
				direction=item.field['direction']
			elif isinstance(item,str):
				endOffset = lastEndOffset + textUtils.WideStringOffsetConverter(item).wideStringLength
				for rect in rects[lastEndOffset:endOffset]:
					yield rect,baseline,direction
				lastEndOffset=endOffset

Example #11

0

Show file

File: displayModel.py Project: ruifontes/nvda

def processWindowChunksInLine(commandList,rects,startIndex,startOffset,endIndex,endOffset):
	windowStartIndex=startIndex
	lastEndOffset=windowStartOffset=startOffset
	lastHwnd=None
	for index in range(startIndex,endIndex+1):
		item=commandList[index] if index<endIndex else None
		if isinstance(item,str):
			lastEndOffset += textUtils.WideStringOffsetConverter(item).wideStringLength
		else:
			hwnd=item.field['hwnd'] if item else None
			if lastHwnd is not None and hwnd!=lastHwnd:
				processFieldsAndRectsRangeReadingdirection(commandList,rects,windowStartIndex,windowStartOffset,index,lastEndOffset)
				windowStartIndex=index
				windowStartOffset=lastEndOffset
			lastHwnd=hwnd

Example #12

0

Show file

File: offsets.py Project: resslerruntime/nvda

 def _getCharacterOffsets(self, offset):
     if self.encoding == textUtils.WCHAR_ENCODING:
         lineStart, lineEnd = self._getLineOffsets(offset)
         lineText = self._getTextRange(lineStart, lineEnd)
         offsetConverter = textUtils.WideStringOffsetConverter(lineText)
         relOffset = offset - lineStart
         relStrStart, relStrEnd = offsetConverter.wideToStrOffsets(
             relOffset, relOffset + 1)
         relWideStringStart, relWideStringEnd = offsetConverter.strToWideOffsets(
             relStrStart, relStrEnd)
         return (relWideStringStart + lineStart,
                 relWideStringEnd + lineStart)
     elif self.encoding not in (None, "utf_32_le", locale.getlocale()[1]):
         raise NotImplementedError
     return offset, offset + 1

Example #13

0

Show file

File: offsets.py Project: resslerruntime/nvda

    def _getTextRange(self, start, end):
        """Retrieve the text in a given offset range.
		@param start: The start offset.
		@type start: int
		@param end: The end offset (exclusive).
		@type end: int
		@return: The text contained in the requested range.
		@rtype: str
		"""
        text = self._getStoryText()
        if self.encoding == textUtils.WCHAR_ENCODING:
            offsetConverter = textUtils.WideStringOffsetConverter(text)
            start, end = offsetConverter.wideToStrOffsets(start, end)
        elif self.encoding not in (None, "utf_32_le", locale.getlocale()[1]):
            raise NotImplementedError
        return text[start:end]

Example #14

0

Show file

 def _getWordOffsetsInThisLine(self, offset, lineInfo):
     lineText = lineInfo._rangeObj.getText(-1)
     # Convert NULL and non-breaking space to space to make sure
     # that words will break on them
     lineText = lineText.translate({0: u' ', 0xa0: u' '})
     start = ctypes.c_int()
     end = ctypes.c_int()
     # Uniscribe does some strange things when you give it a string  with
     # not more than two alphanumeric chars in a row.
     # Inject two alphanumeric characters at the end to fix this.
     lineText += "xx"
     lineTextLen = textUtils.WideStringOffsetConverter(
         lineText).wideStringLength
     NVDAHelper.localLib.calculateWordOffsets(lineText, lineTextLen, offset,
                                              ctypes.byref(start),
                                              ctypes.byref(end))
     return (start.value, min(end.value, max(1, lineTextLen - 2)))

Example #15

0

Show file

File: offsets.py Project: XLTechie/xlnvda

    def _calculateUniscribeOffsets(
            self, lineText: str, unit: str,
            relOffset: int) -> Optional[Tuple[int, int]]:
        """
		Calculates the bounds of a unit at an offset within a given string of text
		using the Windows uniscribe  library, also used in Notepad, for example.
		Units supported are character and word.
		@param lineText: the text string to analyze
		@param unit: the TextInfo unit (character or word)
		@param relOffset: the character offset within the text string at which to calculate the bounds.
		"""
        if unit is textInfos.UNIT_WORD:
            helperFunc = NVDAHelper.localLib.calculateWordOffsets
        elif unit is textInfos.UNIT_CHARACTER:
            helperFunc = NVDAHelper.localLib.calculateCharacterOffsets
        else:
            raise NotImplementedError(f"Unit: {unit}")
        relStart = ctypes.c_int()
        relEnd = ctypes.c_int()
        # uniscribe does some strange things
        # when you give it a string  with not more than two alphanumeric chars in a row.
        # Inject two alphanumeric characters at the end to fix this
        uniscribeLineText = lineText + "xx"
        # We can't rely on len(lineText) to calculate the length of the line.
        offsetConverter = textUtils.WideStringOffsetConverter(lineText)
        lineLength = offsetConverter.wideStringLength
        if self.encoding != textUtils.WCHAR_ENCODING:
            # We need to convert the str based line offsets to wide string offsets.
            relOffset = offsetConverter.strToWideOffsets(relOffset,
                                                         relOffset)[0]
        uniscribeLineLength = lineLength + 2
        if helperFunc(uniscribeLineText, uniscribeLineLength, relOffset,
                      ctypes.byref(relStart), ctypes.byref(relEnd)):
            relStart = relStart.value
            relEnd = min(lineLength, relEnd.value)
            if self.encoding != textUtils.WCHAR_ENCODING:
                # We need to convert the uniscribe based offsets to str offsets.
                relStart, relEnd = offsetConverter.wideToStrOffsets(
                    relStart, relEnd)
            return (relStart, relEnd)
        log.debugWarning(
            f"Uniscribe failed to calculate {unit} offsets for text {lineText!r}"
        )
        return None

Example #16

0

Show file

File: offsets.py Project: xingkong0113/nvda

	def _getCharacterOffsets(self, offset):
		if not (
			self.encoding == textUtils.WCHAR_ENCODING
			or self.encoding is None
			or self.encoding == "utf_32_le"
			or self.encoding == textUtils.USER_ANSI_CODE_PAGE
		):
			raise NotImplementedError
		lineStart, lineEnd = self._getLineOffsets(offset)
		lineText = self._getTextRange(lineStart, lineEnd)
		relOffset = offset - lineStart
		if self.useUniscribe:
			offsets = self._calculateUniscribeOffsets(lineText, textInfos.UNIT_CHARACTER, relOffset)
			if offsets is not None:
				return (offsets[0] + lineStart, offsets[1] + lineStart)
		if self.encoding == textUtils.WCHAR_ENCODING:
			offsetConverter = textUtils.WideStringOffsetConverter(lineText)
			relStrStart, relStrEnd = offsetConverter.wideToStrOffsets(relOffset, relOffset + 1)
			relWideStringStart, relWideStringEnd = offsetConverter.strToWideOffsets(relStrStart, relStrEnd)
			return (relWideStringStart + lineStart, relWideStringEnd + lineStart)
		return (offset, offset + 1)

Example #17

0

Show file

File: displayModel.py Project: ruifontes/nvda

def processFieldsAndRectsRangeReadingdirection(commandList,rects,startIndex,startOffset,endIndex,endOffset):
	containsRtl=False # True if any rtl text is found at all
	curFormatField=None 
	overallDirection=0 # The general reading direction calculated based on the amount of rtl vs ltr text there is
	# Detect the direction for fields with an unknown reading direction, and calculate an over all direction for the entire passage
	for index in range(startIndex,endIndex):
		item=commandList[index]
		if isinstance(item,textInfos.FieldCommand) and isinstance(item.field,textInfos.FormatField):
			curFormatField=item.field
		elif isinstance(item,str):
			direction=curFormatField['direction']
			if direction==0:
				curFormatField['direction']=direction=detectStringDirection(item)
			elif direction==-2: #numbers in an rtl context
				curFormatField['direction']=direction=-1
				curFormatField['shouldReverseText']=False
			if direction<0:
				containsRtl=True
			overallDirection+=direction
	if not containsRtl:
		# As no rtl text was ever seen, then there is nothing else to do
		return
	if overallDirection==0: overallDirection=1
	# following the calculated over all reading direction of the passage, correct all weak/neutral fields to have the same reading direction as the field preceeding them 
	lastDirection=overallDirection
	for index in range(startIndex,endIndex):
		if overallDirection<0: index=endIndex-index-1
		item=commandList[index]
		if isinstance(item,textInfos.FieldCommand) and isinstance(item.field,textInfos.FormatField):
			direction=item.field['direction']
			if direction==0:
				item.field['direction']=lastDirection
			lastDirection=direction
	# For fields that are rtl, reverse their text, their rects, and the order of consecutive rtl fields 
	lastEndOffset=startOffset
	runDirection=None
	runStartIndex=None
	runStartOffset=None
	if overallDirection<0:
		reorderList=[]
	for index in range(startIndex,endIndex+1):
		item=commandList[index] if index<endIndex else None
		if isinstance(item,str):
			lastEndOffset += textUtils.WideStringOffsetConverter(item).wideStringLength
		elif not item or (isinstance(item,textInfos.FieldCommand) and isinstance(item.field,textInfos.FormatField)):
			direction=item.field['direction'] if item else None
			if direction is None or (direction!=runDirection): 
				if runDirection is not None:
					# This is the end of a run of consecutive fields of the same direction
					if runDirection<0:
						#This run is rtl, so reverse its rects, the text within the fields, and the order of fields themselves
						#Reverse rects
						rects[runStartOffset:lastEndOffset]=rects[lastEndOffset-1:runStartOffset-1 if runStartOffset>0 else None:-1]
						rectsStart=runStartOffset
						for i in range(runStartIndex,index,2):
							command=commandList[i]
							text=commandList[i+1]
							rectsEnd = rectsStart + textUtils.WideStringOffsetConverter(text).wideStringLength
							commandList[i+1]=command
							shouldReverseText=command.field.get('shouldReverseText',True)
							commandList[i]=normalizeRtlString(text[::-1] if shouldReverseText else text)
							if not shouldReverseText:
								#Because all the rects in the run were already reversed, we need to undo that for this field
								rects[rectsStart:rectsEnd]=rects[rectsEnd-1:rectsStart-1 if rectsStart>0 else None:-1]
							rectsStart=rectsEnd
						#Reverse commandList
						commandList[runStartIndex:index]=commandList[index-1:runStartIndex-1 if runStartIndex>0 else None:-1]
					if overallDirection<0:
						#As the overall reading direction of the passage is rtl, record the location of this run so we can reverse the order of runs later
						reorderList.append((runStartIndex,runStartOffset,index,lastEndOffset))
				if item:
					runStartIndex=index
					runStartOffset=lastEndOffset
					runDirection=direction
	if overallDirection<0:
		# As the overall reading direction of the passage is rtl, build a new command list and rects list with the order of runs reversed
		# The content of each run is already in logical reading order itself
		newCommandList=[]
		newRects=[]
		for si,so,ei,eo in reversed(reorderList):
			newCommandList.extend(yieldListRange(commandList,si,ei))
			newRects.extend(yieldListRange(rects,so,eo))
		# Update the original command list and rect list replacing the old content for this passage with the reordered runs
		commandList[startIndex:endIndex]=newCommandList
		rects[startOffset:endOffset]=newRects

Example #18

0

Show file

 def _getStoryLength(self):
     # NVDAObjectTextInfo will just return the str length of the story text,.
     # As we are using UTF-16 as the internal encoding for this TextInfo, this is incorrect.
     return textUtils.WideStringOffsetConverter(
         self._getStoryText()).wideStringLength