def _getWordOffsets(self, offset): if self.encoding not in (textUtils.WCHAR_ENCODING, None, "utf_32_le", locale.getlocale()[1]): raise NotImplementedError lineStart, lineEnd = self._getLineOffsets(offset) lineText = self._getTextRange(lineStart, lineEnd) # Convert NULL and non-breaking space to space to make sure that words will break on them lineText = lineText.translate({0: u' ', 0xa0: u' '}) relOffset = offset - lineStart if self.useUniscribe: relStart = ctypes.c_int() relEnd = ctypes.c_int() # uniscribe does some strange things when you give it a string with not more than two alphanumeric chars in a row. # Inject two alphanumeric characters at the end to fix this uniscribeLineText = lineText + "xx" # We can't rely on len(lineText) to calculate the length of the line. if self.encoding != textUtils.WCHAR_ENCODING: # We need to convert the str based line offsets to wide string offsets. offsetConverter = textUtils.WideStringOffsetConverter(lineText) lineLength = offsetConverter.wideStringLength relOffset = offsetConverter.strToWideOffsets( relOffset, relOffset)[0] else: lineLength = (lineEnd - lineStart) uniscribeLineLength = lineLength + 2 if NVDAHelper.localLib.calculateWordOffsets( uniscribeLineText, uniscribeLineLength, relOffset, ctypes.byref(relStart), ctypes.byref(relEnd)): relStart = relStart.value relEnd = min(lineLength, relEnd.value) if self.encoding != textUtils.WCHAR_ENCODING: # We need to convert the uniscribe based offsets to str offsets. relStart, relEnd = offsetConverter.wideToStrOffsets( relStart, relEnd) return (relStart + lineStart, relEnd + lineStart) #Fall back to the older word offsets detection that only breaks on non alphanumeric if self.encoding == textUtils.WCHAR_ENCODING: offsetConverter = textUtils.WideStringOffsetConverter(lineText) relStrOffset = offsetConverter.wideToStrOffsets( relOffset, relOffset)[0] relStrStart = findStartOfWord(lineText, relStrOffset) relStrEnd = findEndOfWord(lineText, relStrOffset) relWideStringStart, relWideStringEnd = offsetConverter.strToWideOffsets( relStrStart, relStrEnd) return (relWideStringStart + lineStart, relWideStringEnd + lineStart) start = findStartOfWord(lineText, offset - lineStart) + lineStart end = findEndOfWord(lineText, offset - lineStart) + lineStart return [start, end]
def _getWordOffsets(self,offset): if not ( self.encoding == textUtils.WCHAR_ENCODING or self.encoding is None or self.encoding == "utf_32_le" or self.encoding == textUtils.USER_ANSI_CODE_PAGE ): raise NotImplementedError lineStart, lineEnd = self._getLineOffsets(offset) lineText = self._getTextRange(lineStart,lineEnd) # Convert NULL and non-breaking space to space to make sure that words will break on them lineText = lineText.translate({0:u' ',0xa0:u' '}) relOffset = offset - lineStart if self.useUniscribe: offsets = self._calculateUniscribeOffsets(lineText, textInfos.UNIT_WORD, relOffset) if offsets is not None: return (offsets[0] + lineStart, offsets[1] + lineStart) #Fall back to the older word offsets detection that only breaks on non alphanumeric if self.encoding == textUtils.WCHAR_ENCODING: offsetConverter = textUtils.WideStringOffsetConverter(lineText) relStrOffset = offsetConverter.wideToStrOffsets(relOffset, relOffset)[0] relStrStart = findStartOfWord(lineText, relStrOffset) relStrEnd = findEndOfWord(lineText, relStrOffset) relWideStringStart, relWideStringEnd = offsetConverter.strToWideOffsets(relStrStart, relStrEnd) return (relWideStringStart + lineStart, relWideStringEnd + lineStart) start=findStartOfWord(lineText,offset-lineStart)+lineStart end=findEndOfWord(lineText,offset-lineStart)+lineStart return [start,end]
def _getSelectionOffsets(self): if self.backgroundSelectionColor is not None and self.foregroundSelectionColor is not None: fields = self._storyFieldsAndRects[0] startOffset = None endOffset = None curOffset = 0 inHighlightChunk = False for item in fields: if isinstance( item, textInfos.FieldCommand ) and item.command == "formatChange" and item.field.get( 'color', None ) == self.foregroundSelectionColor and item.field.get( 'background-color', None) == self.backgroundSelectionColor: inHighlightChunk = True if startOffset is None: startOffset = curOffset elif isinstance(item, str): curOffset += textUtils.WideStringOffsetConverter( item).wideStringLength if inHighlightChunk: endOffset = curOffset else: inHighlightChunk = False if startOffset is not None and endOffset is not None: return (startOffset, endOffset) raise LookupError
def _getFieldsInRange(self,start,end): storyFields=self._storyFieldsAndRects[0] if not storyFields: return [] #Strip unwanted commands and text from the start and the end to honour the requested offsets lastEndOffset=0 startIndex=endIndex=relStart=relEnd=None for index in range(len(storyFields)): item=storyFields[index] if isinstance(item,str): endOffset = lastEndOffset + textUtils.WideStringOffsetConverter(item).wideStringLength if lastEndOffset<=start<endOffset: startIndex=index-1 relStart=start-lastEndOffset if lastEndOffset<end<=endOffset: endIndex=index+1 relEnd=end-lastEndOffset lastEndOffset=endOffset if startIndex is None: return [] if endIndex is None: endIndex=len(storyFields) commandList=storyFields[startIndex:endIndex] if (endIndex-startIndex)==2 and relStart is not None and relEnd is not None: commandList[1]=commandList[1][relStart:relEnd] else: if relStart is not None: commandList[1]=commandList[1][relStart:] if relEnd is not None: commandList[-1]=commandList[-1][:relEnd] return commandList
def _getSelectionOffsets(self): fields = self._storyFieldsAndRects[0] startOffset = None endOffset = None curOffset = 0 inHighlightChunk = False for item in fields: if (isinstance(item, textInfos.FieldCommand) and item.command == "formatChange" and item.field.get( 'color', None) == RGB(red=255, green=255, blue=255)): inHighlightChunk = True if startOffset is None: startOffset = curOffset elif isinstance(item, six.string_types): try: import textUtils curOffset += textUtils.WideStringOffsetConverter( item).wideStringLength except ImportError: curOffset += len(item) if inHighlightChunk: endOffset = curOffset else: inHighlightChunk = False if startOffset is not None and endOffset is not None: return (startOffset, endOffset) offset = self._getCaretOffset() return offset, offset
def _get__storyFieldsAndRects(self): # All returned coordinates are logical coordinates. if self._location: left, top, right, bottom = self._location else: try: left, top, width, height = self.obj.location except TypeError: # No location; nothing we can do. return [],[],[] right = left + width bottom = top + height bindingHandle=self.obj.appModule.helperLocalBindingHandle if not bindingHandle: log.debugWarning("AppModule does not have a binding handle") return [],[],[] left,top=windowUtils.physicalToLogicalPoint(self.obj.windowHandle,left,top) right,bottom=windowUtils.physicalToLogicalPoint(self.obj.windowHandle,right,bottom) text,rects=getWindowTextInRect(bindingHandle, self.obj.windowHandle, left, top, right, bottom, self.minHorizontalWhitespace, self.minVerticalWhitespace,self.stripOuterWhitespace,self.includeDescendantWindows) if not text: return [],[],[] text="<control>%s</control>"%text commandList=XMLFormatting.XMLTextParser().parse(text) curFormatField=None lastEndOffset=0 lineStartOffset=0 lineStartIndex=0 lineBaseline=None lineEndOffsets=[] for index in range(len(commandList)): item=commandList[index] if isinstance(item,str): lastEndOffset += textUtils.WideStringOffsetConverter(item).wideStringLength elif isinstance(item,textInfos.FieldCommand): if isinstance(item.field,textInfos.FormatField): curFormatField=item.field self._normalizeFormatField(curFormatField) else: curFormatField=None baseline=curFormatField['baseline'] if curFormatField else None if baseline!=lineBaseline: if lineBaseline is not None: processWindowChunksInLine(commandList,rects,lineStartIndex,lineStartOffset,index,lastEndOffset) #Convert the whitespace at the end of the line into a line feed item=commandList[index-1] if ( isinstance(item,str) # Since we're searching for white space, it is safe to # do this opperation on the length of the pythonic string and len(item)==1 and item.isspace() ): commandList[index-1]=u'\n' lineEndOffsets.append(lastEndOffset) if baseline is not None: lineStartIndex=index lineStartOffset=lastEndOffset lineBaseline=baseline return commandList,rects,lineEndOffsets
def _getCurrentOffsetInThisLine(self, lineInfo): """ Given a caret textInfo expanded to line, returns the index into the line where the caret is located. This is necessary since Uniscribe requires indices into the text to find word boundaries, but UIA only allows for relative movement. """ # position a textInfo from the start of the line up to the current position. charInfo = lineInfo.copy() charInfo.setEndPoint(self, "endToStart") text = charInfo._rangeObj.getText(-1) offset = textUtils.WideStringOffsetConverter(text).wideStringLength return offset
def _getLineOffsets(self, offset): text = self._getStoryText() if self.encoding == textUtils.WCHAR_ENCODING: offsetConverter = textUtils.WideStringOffsetConverter(text) strOffset = offsetConverter.wideToStrOffsets(offset, offset)[0] strStart = findStartOfLine(text, strOffset) strEnd = findEndOfLine(text, strOffset) return offsetConverter.strToWideOffsets(strStart, strEnd) elif self.encoding not in (None, "utf_32_le", locale.getlocale()[1]): raise NotImplementedError start = findStartOfLine(text, offset) end = findEndOfLine(text, offset) return [start, end]
def _getLineOffsets(self, offset): text = self._getStoryText() if self.encoding == textUtils.WCHAR_ENCODING: offsetConverter = textUtils.WideStringOffsetConverter(text) strOffset = offsetConverter.wideToStrOffsets(offset, offset)[0] strStart = findStartOfLine(text, strOffset) strEnd = findEndOfLine(text, strOffset) return offsetConverter.strToWideOffsets(strStart, strEnd) elif not (self.encoding is None or self.encoding == "utf_32_le" or self.encoding == textUtils.USER_ANSI_CODE_PAGE): raise NotImplementedError start = findStartOfLine(text, offset) end = findEndOfLine(text, offset) return [start, end]
def _getStoryOffsetLocations(self): baseline=None direction=0 lastEndOffset=0 commandList, rects = self._storyFieldsAndRects[:2] for item in commandList: if isinstance(item,textInfos.FieldCommand) and isinstance(item.field,textInfos.FormatField): baseline=item.field['baseline'] direction=item.field['direction'] elif isinstance(item,str): endOffset = lastEndOffset + textUtils.WideStringOffsetConverter(item).wideStringLength for rect in rects[lastEndOffset:endOffset]: yield rect,baseline,direction lastEndOffset=endOffset
def processWindowChunksInLine(commandList,rects,startIndex,startOffset,endIndex,endOffset): windowStartIndex=startIndex lastEndOffset=windowStartOffset=startOffset lastHwnd=None for index in range(startIndex,endIndex+1): item=commandList[index] if index<endIndex else None if isinstance(item,str): lastEndOffset += textUtils.WideStringOffsetConverter(item).wideStringLength else: hwnd=item.field['hwnd'] if item else None if lastHwnd is not None and hwnd!=lastHwnd: processFieldsAndRectsRangeReadingdirection(commandList,rects,windowStartIndex,windowStartOffset,index,lastEndOffset) windowStartIndex=index windowStartOffset=lastEndOffset lastHwnd=hwnd
def _getCharacterOffsets(self, offset): if self.encoding == textUtils.WCHAR_ENCODING: lineStart, lineEnd = self._getLineOffsets(offset) lineText = self._getTextRange(lineStart, lineEnd) offsetConverter = textUtils.WideStringOffsetConverter(lineText) relOffset = offset - lineStart relStrStart, relStrEnd = offsetConverter.wideToStrOffsets( relOffset, relOffset + 1) relWideStringStart, relWideStringEnd = offsetConverter.strToWideOffsets( relStrStart, relStrEnd) return (relWideStringStart + lineStart, relWideStringEnd + lineStart) elif self.encoding not in (None, "utf_32_le", locale.getlocale()[1]): raise NotImplementedError return offset, offset + 1
def _getTextRange(self, start, end): """Retrieve the text in a given offset range. @param start: The start offset. @type start: int @param end: The end offset (exclusive). @type end: int @return: The text contained in the requested range. @rtype: str """ text = self._getStoryText() if self.encoding == textUtils.WCHAR_ENCODING: offsetConverter = textUtils.WideStringOffsetConverter(text) start, end = offsetConverter.wideToStrOffsets(start, end) elif self.encoding not in (None, "utf_32_le", locale.getlocale()[1]): raise NotImplementedError return text[start:end]
def _getWordOffsetsInThisLine(self, offset, lineInfo): lineText = lineInfo._rangeObj.getText(-1) # Convert NULL and non-breaking space to space to make sure # that words will break on them lineText = lineText.translate({0: u' ', 0xa0: u' '}) start = ctypes.c_int() end = ctypes.c_int() # Uniscribe does some strange things when you give it a string with # not more than two alphanumeric chars in a row. # Inject two alphanumeric characters at the end to fix this. lineText += "xx" lineTextLen = textUtils.WideStringOffsetConverter( lineText).wideStringLength NVDAHelper.localLib.calculateWordOffsets(lineText, lineTextLen, offset, ctypes.byref(start), ctypes.byref(end)) return (start.value, min(end.value, max(1, lineTextLen - 2)))
def _calculateUniscribeOffsets( self, lineText: str, unit: str, relOffset: int) -> Optional[Tuple[int, int]]: """ Calculates the bounds of a unit at an offset within a given string of text using the Windows uniscribe library, also used in Notepad, for example. Units supported are character and word. @param lineText: the text string to analyze @param unit: the TextInfo unit (character or word) @param relOffset: the character offset within the text string at which to calculate the bounds. """ if unit is textInfos.UNIT_WORD: helperFunc = NVDAHelper.localLib.calculateWordOffsets elif unit is textInfos.UNIT_CHARACTER: helperFunc = NVDAHelper.localLib.calculateCharacterOffsets else: raise NotImplementedError(f"Unit: {unit}") relStart = ctypes.c_int() relEnd = ctypes.c_int() # uniscribe does some strange things # when you give it a string with not more than two alphanumeric chars in a row. # Inject two alphanumeric characters at the end to fix this uniscribeLineText = lineText + "xx" # We can't rely on len(lineText) to calculate the length of the line. offsetConverter = textUtils.WideStringOffsetConverter(lineText) lineLength = offsetConverter.wideStringLength if self.encoding != textUtils.WCHAR_ENCODING: # We need to convert the str based line offsets to wide string offsets. relOffset = offsetConverter.strToWideOffsets(relOffset, relOffset)[0] uniscribeLineLength = lineLength + 2 if helperFunc(uniscribeLineText, uniscribeLineLength, relOffset, ctypes.byref(relStart), ctypes.byref(relEnd)): relStart = relStart.value relEnd = min(lineLength, relEnd.value) if self.encoding != textUtils.WCHAR_ENCODING: # We need to convert the uniscribe based offsets to str offsets. relStart, relEnd = offsetConverter.wideToStrOffsets( relStart, relEnd) return (relStart, relEnd) log.debugWarning( f"Uniscribe failed to calculate {unit} offsets for text {lineText!r}" ) return None
def _getCharacterOffsets(self, offset): if not ( self.encoding == textUtils.WCHAR_ENCODING or self.encoding is None or self.encoding == "utf_32_le" or self.encoding == textUtils.USER_ANSI_CODE_PAGE ): raise NotImplementedError lineStart, lineEnd = self._getLineOffsets(offset) lineText = self._getTextRange(lineStart, lineEnd) relOffset = offset - lineStart if self.useUniscribe: offsets = self._calculateUniscribeOffsets(lineText, textInfos.UNIT_CHARACTER, relOffset) if offsets is not None: return (offsets[0] + lineStart, offsets[1] + lineStart) if self.encoding == textUtils.WCHAR_ENCODING: offsetConverter = textUtils.WideStringOffsetConverter(lineText) relStrStart, relStrEnd = offsetConverter.wideToStrOffsets(relOffset, relOffset + 1) relWideStringStart, relWideStringEnd = offsetConverter.strToWideOffsets(relStrStart, relStrEnd) return (relWideStringStart + lineStart, relWideStringEnd + lineStart) return (offset, offset + 1)
def processFieldsAndRectsRangeReadingdirection(commandList,rects,startIndex,startOffset,endIndex,endOffset): containsRtl=False # True if any rtl text is found at all curFormatField=None overallDirection=0 # The general reading direction calculated based on the amount of rtl vs ltr text there is # Detect the direction for fields with an unknown reading direction, and calculate an over all direction for the entire passage for index in range(startIndex,endIndex): item=commandList[index] if isinstance(item,textInfos.FieldCommand) and isinstance(item.field,textInfos.FormatField): curFormatField=item.field elif isinstance(item,str): direction=curFormatField['direction'] if direction==0: curFormatField['direction']=direction=detectStringDirection(item) elif direction==-2: #numbers in an rtl context curFormatField['direction']=direction=-1 curFormatField['shouldReverseText']=False if direction<0: containsRtl=True overallDirection+=direction if not containsRtl: # As no rtl text was ever seen, then there is nothing else to do return if overallDirection==0: overallDirection=1 # following the calculated over all reading direction of the passage, correct all weak/neutral fields to have the same reading direction as the field preceeding them lastDirection=overallDirection for index in range(startIndex,endIndex): if overallDirection<0: index=endIndex-index-1 item=commandList[index] if isinstance(item,textInfos.FieldCommand) and isinstance(item.field,textInfos.FormatField): direction=item.field['direction'] if direction==0: item.field['direction']=lastDirection lastDirection=direction # For fields that are rtl, reverse their text, their rects, and the order of consecutive rtl fields lastEndOffset=startOffset runDirection=None runStartIndex=None runStartOffset=None if overallDirection<0: reorderList=[] for index in range(startIndex,endIndex+1): item=commandList[index] if index<endIndex else None if isinstance(item,str): lastEndOffset += textUtils.WideStringOffsetConverter(item).wideStringLength elif not item or (isinstance(item,textInfos.FieldCommand) and isinstance(item.field,textInfos.FormatField)): direction=item.field['direction'] if item else None if direction is None or (direction!=runDirection): if runDirection is not None: # This is the end of a run of consecutive fields of the same direction if runDirection<0: #This run is rtl, so reverse its rects, the text within the fields, and the order of fields themselves #Reverse rects rects[runStartOffset:lastEndOffset]=rects[lastEndOffset-1:runStartOffset-1 if runStartOffset>0 else None:-1] rectsStart=runStartOffset for i in range(runStartIndex,index,2): command=commandList[i] text=commandList[i+1] rectsEnd = rectsStart + textUtils.WideStringOffsetConverter(text).wideStringLength commandList[i+1]=command shouldReverseText=command.field.get('shouldReverseText',True) commandList[i]=normalizeRtlString(text[::-1] if shouldReverseText else text) if not shouldReverseText: #Because all the rects in the run were already reversed, we need to undo that for this field rects[rectsStart:rectsEnd]=rects[rectsEnd-1:rectsStart-1 if rectsStart>0 else None:-1] rectsStart=rectsEnd #Reverse commandList commandList[runStartIndex:index]=commandList[index-1:runStartIndex-1 if runStartIndex>0 else None:-1] if overallDirection<0: #As the overall reading direction of the passage is rtl, record the location of this run so we can reverse the order of runs later reorderList.append((runStartIndex,runStartOffset,index,lastEndOffset)) if item: runStartIndex=index runStartOffset=lastEndOffset runDirection=direction if overallDirection<0: # As the overall reading direction of the passage is rtl, build a new command list and rects list with the order of runs reversed # The content of each run is already in logical reading order itself newCommandList=[] newRects=[] for si,so,ei,eo in reversed(reorderList): newCommandList.extend(yieldListRange(commandList,si,ei)) newRects.extend(yieldListRange(rects,so,eo)) # Update the original command list and rect list replacing the old content for this passage with the reordered runs commandList[startIndex:endIndex]=newCommandList rects[startOffset:endOffset]=newRects
def _getStoryLength(self): # NVDAObjectTextInfo will just return the str length of the story text,. # As we are using UTF-16 as the internal encoding for this TextInfo, this is incorrect. return textUtils.WideStringOffsetConverter( self._getStoryText()).wideStringLength